{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5, "global_step": 6294, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 7.936507936507936e-06, "loss": 1.0932, "theoretical_loss": 14.920781838632275, "tokens_seen": 262144 }, { "epoch": 0.0, "learning_rate": 1.5873015873015872e-05, "loss": 1.0937, "theoretical_loss": 12.718594708127029, "tokens_seen": 524288 }, { "epoch": 0.0, "learning_rate": 2.380952380952381e-05, "loss": 1.0519, "theoretical_loss": 11.615184291350435, "tokens_seen": 786432 }, { "epoch": 0.0, "learning_rate": 3.1746031746031745e-05, "loss": 1.0059, "theoretical_loss": 10.904893169100655, "tokens_seen": 1048576 }, { "epoch": 0.0, "learning_rate": 3.968253968253968e-05, "loss": 0.9761, "theoretical_loss": 10.392029026407034, "tokens_seen": 1310720 }, { "epoch": 0.0, "learning_rate": 4.761904761904762e-05, "loss": 0.9521, "theoretical_loss": 9.996134261483984, "tokens_seen": 1572864 }, { "epoch": 0.0, "learning_rate": 5.555555555555555e-05, "loss": 0.9364, "theoretical_loss": 9.67682184172525, "tokens_seen": 1835008 }, { "epoch": 0.0, "learning_rate": 6.349206349206349e-05, "loss": 0.9245, "theoretical_loss": 9.41114487355416, "tokens_seen": 2097152 }, { "epoch": 0.0, "learning_rate": 7.142857142857142e-05, "loss": 0.9103, "theoretical_loss": 9.184905895151996, "tokens_seen": 2359296 }, { "epoch": 0.0, "learning_rate": 7.936507936507937e-05, "loss": 0.8933, "theoretical_loss": 8.988754572553061, "tokens_seen": 2621440 }, { "epoch": 0.0, "learning_rate": 8.73015873015873e-05, "loss": 0.8721, "theoretical_loss": 8.816230875422118, "tokens_seen": 2883584 }, { "epoch": 0.0, "learning_rate": 9.523809523809524e-05, "loss": 0.8552, "theoretical_loss": 8.66269920037918, "tokens_seen": 3145728 }, { "epoch": 0.0, "learning_rate": 0.00010317460317460317, "loss": 0.8283, "theoretical_loss": 8.524729102289708, "tokens_seen": 3407872 }, { "epoch": 0.0, "learning_rate": 0.0001111111111111111, "loss": 0.8016, "theoretical_loss": 8.399716359763914, "tokens_seen": 3670016 }, { "epoch": 0.0, "learning_rate": 0.00011904761904761905, "loss": 0.7709, "theoretical_loss": 8.285641004895568, "tokens_seen": 3932160 }, { "epoch": 0.0, "learning_rate": 0.00012698412698412698, "loss": 0.7402, "theoretical_loss": 8.180907195283321, "tokens_seen": 4194304 }, { "epoch": 0.0, "learning_rate": 0.0001349206349206349, "loss": 0.7102, "theoretical_loss": 8.084233979345122, "tokens_seen": 4456448 }, { "epoch": 0.0, "learning_rate": 0.00014285714285714284, "loss": 0.6751, "theoretical_loss": 7.9945788049155055, "tokens_seen": 4718592 }, { "epoch": 0.0, "learning_rate": 0.0001507936507936508, "loss": 0.6503, "theoretical_loss": 7.911082722632908, "tokens_seen": 4980736 }, { "epoch": 0.0, "learning_rate": 0.00015873015873015873, "loss": 0.6184, "theoretical_loss": 7.83303033759787, "tokens_seen": 5242880 }, { "epoch": 0.0, "learning_rate": 0.00016666666666666666, "loss": 0.5919, "theoretical_loss": 7.759820016443023, "tokens_seen": 5505024 }, { "epoch": 0.0, "learning_rate": 0.0001746031746031746, "loss": 0.5615, "theoretical_loss": 7.690941370375033, "tokens_seen": 5767168 }, { "epoch": 0.0, "learning_rate": 0.00018253968253968252, "loss": 0.5314, "theoretical_loss": 7.6259579939239845, "tokens_seen": 6029312 }, { "epoch": 0.0, "learning_rate": 0.00019047619047619048, "loss": 0.5049, "theoretical_loss": 7.564494061943624, "tokens_seen": 6291456 }, { "epoch": 0.0, "objective/train/advantage_avg": 0.19619600474834442, "objective/train/docs_used": 9704, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 7.333767890930176, "objective/train/original_loss": 7.333766937255859, "objective/train/theoretical_loss": 7.5062238006917354, "objective/train/tokens_used": 27013600, "objective/train/value_avg": -0.204345703125, "objective/train/value_loss": 0.03900672122836113, "objective/train/value_max": -0.19873046875, "objective/train/value_min": -0.251953125, "objective/train/value_reward_corr": -0.012004978426084556, "objective/train/value_std": 0.01258087158203125, "objective/train/weight_avg": 1.2170684337615967, "objective/train/weighted_lm_loss": 8.908797264099121, "objective/train/weights_max": 1.2865357398986816, "objective/train/weights_min": 0.4672624170780182, "theoretical_loss": 7.5062238006917354, "tokens_seen": 6553600 }, { "epoch": 0.0, "learning_rate": 0.0001984126984126984, "loss": 0.4798, "theoretical_loss": 7.5062238006917354, "tokens_seen": 6553600 }, { "epoch": 0.0, "learning_rate": 0.00020634920634920634, "loss": 0.4565, "theoretical_loss": 7.45086312850561, "tokens_seen": 6815744 }, { "epoch": 0.0, "learning_rate": 0.00021428571428571427, "loss": 0.4398, "theoretical_loss": 7.398162954262078, "tokens_seen": 7077888 }, { "epoch": 0.0, "learning_rate": 0.0002222222222222222, "loss": 0.4258, "theoretical_loss": 7.347903756717382, "tokens_seen": 7340032 }, { "epoch": 0.0, "learning_rate": 0.00023015873015873016, "loss": 0.4009, "theoretical_loss": 7.299891163694537, "tokens_seen": 7602176 }, { "epoch": 0.0, "learning_rate": 0.0002380952380952381, "loss": 0.391, "theoretical_loss": 7.253952319156202, "tokens_seen": 7864320 }, { "epoch": 0.0, "learning_rate": 0.000246031746031746, "loss": 0.3753, "theoretical_loss": 7.2099328765932205, "tokens_seen": 8126464 }, { "epoch": 0.0, "learning_rate": 0.00025396825396825396, "loss": 0.366, "theoretical_loss": 7.167694494355343, "tokens_seen": 8388608 }, { "epoch": 0.0, "learning_rate": 0.0002619047619047619, "loss": 0.3573, "theoretical_loss": 7.127112736305475, "tokens_seen": 8650752 }, { "epoch": 0.0, "learning_rate": 0.0002698412698412698, "loss": 0.3476, "theoretical_loss": 7.0880753020982725, "tokens_seen": 8912896 }, { "epoch": 0.0, "learning_rate": 0.0002777777777777778, "loss": 0.3461, "theoretical_loss": 7.050480527300383, "tokens_seen": 9175040 }, { "epoch": 0.0, "learning_rate": 0.0002857142857142857, "loss": 0.3395, "theoretical_loss": 7.014236105786485, "tokens_seen": 9437184 }, { "epoch": 0.0, "learning_rate": 0.0002936507936507937, "loss": 0.3386, "theoretical_loss": 6.979257996300014, "tokens_seen": 9699328 }, { "epoch": 0.0, "learning_rate": 0.0003015873015873016, "loss": 0.3357, "theoretical_loss": 6.945469482441503, "tokens_seen": 9961472 }, { "epoch": 0.0, "learning_rate": 0.00030952380952380956, "loss": 0.3318, "theoretical_loss": 6.912800361140576, "tokens_seen": 10223616 }, { "epoch": 0.0, "learning_rate": 0.00031746031746031746, "loss": 0.33, "theoretical_loss": 6.881186239250335, "tokens_seen": 10485760 }, { "epoch": 0.0, "learning_rate": 0.0003253968253968254, "loss": 0.3287, "theoretical_loss": 6.8505679215514235, "tokens_seen": 10747904 }, { "epoch": 0.0, "learning_rate": 0.0003333333333333333, "loss": 0.3244, "theoretical_loss": 6.8208908763759295, "tokens_seen": 11010048 }, { "epoch": 0.0, "learning_rate": 0.0003412698412698413, "loss": 0.3241, "theoretical_loss": 6.79210476741633, "tokens_seen": 11272192 }, { "epoch": 0.0, "learning_rate": 0.0003492063492063492, "loss": 0.3206, "theoretical_loss": 6.76416304219278, "tokens_seen": 11534336 }, { "epoch": 0.0, "learning_rate": 0.00035714285714285714, "loss": 0.3173, "theoretical_loss": 6.737022569206117, "tokens_seen": 11796480 }, { "epoch": 0.0, "learning_rate": 0.00036507936507936505, "loss": 0.3156, "theoretical_loss": 6.710643317075979, "tokens_seen": 12058624 }, { "epoch": 0.0, "learning_rate": 0.000373015873015873, "loss": 0.3142, "theoretical_loss": 6.684988070009584, "tokens_seen": 12320768 }, { "epoch": 0.0, "learning_rate": 0.00038095238095238096, "loss": 0.3193, "theoretical_loss": 6.660022174811009, "tokens_seen": 12582912 }, { "epoch": 0.0, "learning_rate": 0.0003888888888888889, "loss": 0.3096, "theoretical_loss": 6.6357133153579175, "tokens_seen": 12845056 }, { "epoch": 0.0, "objective/train/advantage_avg": 0.028042539954185486, "objective/train/docs_used": 12170, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 6.02435827255249, "objective/train/original_loss": 6.02435827255249, "objective/train/theoretical_loss": 6.612031311070119, "objective/train/tokens_used": 33567200, "objective/train/value_avg": -0.034576416015625, "objective/train/value_loss": 0.0012018069392070174, "objective/train/value_max": -0.01337432861328125, "objective/train/value_min": -0.052825927734375, "objective/train/value_reward_corr": -0.01474424872782136, "objective/train/value_std": 0.005672454833984375, "objective/train/weight_avg": 1.0286412239074707, "objective/train/weighted_lm_loss": 6.1940083503723145, "objective/train/weights_max": 1.054246187210083, "objective/train/weights_min": 0.3763794004917145, "theoretical_loss": 6.612031311070119, "tokens_seen": 13107200 }, { "epoch": 0.0, "learning_rate": 0.0003968253968253968, "loss": 0.309, "theoretical_loss": 6.612031311070119, "tokens_seen": 13107200 }, { "epoch": 0.0, "learning_rate": 0.0004047619047619048, "loss": 0.3093, "theoretical_loss": 6.588947936394168, "tokens_seen": 13369344 }, { "epoch": 0.0, "learning_rate": 0.0004126984126984127, "loss": 0.303, "theoretical_loss": 6.566436758747731, "tokens_seen": 13631488 }, { "epoch": 0.0, "learning_rate": 0.00042063492063492065, "loss": 0.3026, "theoretical_loss": 6.544472992721121, "tokens_seen": 13893632 }, { "epoch": 0.0, "learning_rate": 0.00042857142857142855, "loss": 0.3027, "theoretical_loss": 6.523033368632323, "tokens_seen": 14155776 }, { "epoch": 0.0, "learning_rate": 0.0004365079365079365, "loss": 0.3037, "theoretical_loss": 6.502096013785574, "tokens_seen": 14417920 }, { "epoch": 0.0, "learning_rate": 0.0004444444444444444, "loss": 0.2987, "theoretical_loss": 6.481640344999435, "tokens_seen": 14680064 }, { "epoch": 0.0, "learning_rate": 0.00045238095238095237, "loss": 0.3013, "theoretical_loss": 6.461646971154669, "tokens_seen": 14942208 }, { "epoch": 0.0, "learning_rate": 0.00046031746031746033, "loss": 0.3009, "theoretical_loss": 6.442097604670096, "tokens_seen": 15204352 }, { "epoch": 0.0, "learning_rate": 0.0004682539682539683, "loss": 0.291, "theoretical_loss": 6.422974980950157, "tokens_seen": 15466496 }, { "epoch": 0.0, "learning_rate": 0.0004761904761904762, "loss": 0.295, "theoretical_loss": 6.404262784964672, "tokens_seen": 15728640 }, { "epoch": 0.0, "learning_rate": 0.00048412698412698415, "loss": 0.2957, "theoretical_loss": 6.3859455842220765, "tokens_seen": 15990784 }, { "epoch": 0.0, "learning_rate": 0.000492063492063492, "loss": 0.2955, "theoretical_loss": 6.368008767484675, "tokens_seen": 16252928 }, { "epoch": 0.01, "learning_rate": 0.0005, "loss": 0.2931, "theoretical_loss": 6.350438488650175, "tokens_seen": 16515072 }, { "epoch": 0.01, "learning_rate": 0.0005079365079365079, "loss": 0.2938, "theoretical_loss": 6.333221615289645, "tokens_seen": 16777216 }, { "epoch": 0.01, "learning_rate": 0.0005158730158730159, "loss": 0.2898, "theoretical_loss": 6.316345681389436, "tokens_seen": 17039360 }, { "epoch": 0.01, "learning_rate": 0.0005238095238095238, "loss": 0.2847, "theoretical_loss": 6.2997988438948465, "tokens_seen": 17301504 }, { "epoch": 0.01, "learning_rate": 0.0005317460317460317, "loss": 0.2833, "theoretical_loss": 6.283569842697203, "tokens_seen": 17563648 }, { "epoch": 0.01, "learning_rate": 0.0005396825396825396, "loss": 0.2873, "theoretical_loss": 6.26764796374462, "tokens_seen": 17825792 }, { "epoch": 0.01, "learning_rate": 0.0005476190476190477, "loss": 0.2833, "theoretical_loss": 6.25202300499066, "tokens_seen": 18087936 }, { "epoch": 0.01, "learning_rate": 0.0005555555555555556, "loss": 0.284, "theoretical_loss": 6.236685244924882, "tokens_seen": 18350080 }, { "epoch": 0.01, "learning_rate": 0.0005634920634920635, "loss": 0.2831, "theoretical_loss": 6.2216254134558024, "tokens_seen": 18612224 }, { "epoch": 0.01, "learning_rate": 0.0005714285714285714, "loss": 0.2838, "theoretical_loss": 6.206834664939976, "tokens_seen": 18874368 }, { "epoch": 0.01, "learning_rate": 0.0005793650793650794, "loss": 0.2854, "theoretical_loss": 6.192304553171669, "tokens_seen": 19136512 }, { "epoch": 0.01, "learning_rate": 0.0005873015873015874, "loss": 0.2811, "theoretical_loss": 6.178027008165916, "tokens_seen": 19398656 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.023865671828389168, "objective/train/docs_used": 14656, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 5.437821865081787, "objective/train/original_loss": 5.437821388244629, "objective/train/theoretical_loss": 6.163994314584031, "objective/train/tokens_used": 40120800, "objective/train/value_avg": -0.032073974609375, "objective/train/value_loss": 0.0011683752527460456, "objective/train/value_max": -0.00933837890625, "objective/train/value_min": -0.06573486328125, "objective/train/value_reward_corr": 0.06998308804235678, "objective/train/value_std": 0.01036834716796875, "objective/train/weight_avg": 1.0244362354278564, "objective/train/weighted_lm_loss": 5.583455562591553, "objective/train/weights_max": 1.0648194551467896, "objective/train/weights_min": 0.37916356325149536, "theoretical_loss": 6.163994314584031, "tokens_seen": 19660800 }, { "epoch": 0.01, "learning_rate": 0.0005952380952380953, "loss": 0.282, "theoretical_loss": 6.163994314584031, "tokens_seen": 19660800 }, { "epoch": 0.01, "learning_rate": 0.0006031746031746032, "loss": 0.2786, "theoretical_loss": 6.150199091665225, "tokens_seen": 19922944 }, { "epoch": 0.01, "learning_rate": 0.0006111111111111112, "loss": 0.2766, "theoretical_loss": 6.136634274540901, "tokens_seen": 20185088 }, { "epoch": 0.01, "learning_rate": 0.0006190476190476191, "loss": 0.279, "theoretical_loss": 6.123293096819758, "tokens_seen": 20447232 }, { "epoch": 0.01, "learning_rate": 0.000626984126984127, "loss": 0.2761, "theoretical_loss": 6.1101690743422505, "tokens_seen": 20709376 }, { "epoch": 0.01, "learning_rate": 0.0006349206349206349, "loss": 0.2751, "theoretical_loss": 6.097255990012153, "tokens_seen": 20971520 }, { "epoch": 0.01, "learning_rate": 0.0006428571428571429, "loss": 0.2728, "theoretical_loss": 6.084547879621354, "tokens_seen": 21233664 }, { "epoch": 0.01, "learning_rate": 0.0006507936507936508, "loss": 0.2741, "theoretical_loss": 6.072039018591484, "tokens_seen": 21495808 }, { "epoch": 0.01, "learning_rate": 0.0006587301587301587, "loss": 0.2744, "theoretical_loss": 6.059723909562683, "tokens_seen": 21757952 }, { "epoch": 0.01, "learning_rate": 0.0006666666666666666, "loss": 0.2699, "theoretical_loss": 6.047597270765904, "tokens_seen": 22020096 }, { "epoch": 0.01, "learning_rate": 0.0006746031746031747, "loss": 0.271, "theoretical_loss": 6.035654025120612, "tokens_seen": 22282240 }, { "epoch": 0.01, "learning_rate": 0.0006825396825396826, "loss": 0.2683, "theoretical_loss": 6.023889290004692, "tokens_seen": 22544384 }, { "epoch": 0.01, "learning_rate": 0.0006904761904761905, "loss": 0.2678, "theoretical_loss": 6.012298367647816, "tokens_seen": 22806528 }, { "epoch": 0.01, "learning_rate": 0.0006984126984126984, "loss": 0.2651, "theoretical_loss": 6.000876736103618, "tokens_seen": 23068672 }, { "epoch": 0.01, "learning_rate": 0.0007063492063492064, "loss": 0.2683, "theoretical_loss": 5.989620040759641, "tokens_seen": 23330816 }, { "epoch": 0.01, "learning_rate": 0.0007142857142857143, "loss": 0.2624, "theoretical_loss": 5.978524086347409, "tokens_seen": 23592960 }, { "epoch": 0.01, "learning_rate": 0.0007222222222222222, "loss": 0.2659, "theoretical_loss": 5.967584829417934, "tokens_seen": 23855104 }, { "epoch": 0.01, "learning_rate": 0.0007301587301587301, "loss": 0.2616, "theoretical_loss": 5.956798371250791, "tokens_seen": 24117248 }, { "epoch": 0.01, "learning_rate": 0.0007380952380952381, "loss": 0.2583, "theoretical_loss": 5.9461609511673625, "tokens_seen": 24379392 }, { "epoch": 0.01, "learning_rate": 0.000746031746031746, "loss": 0.2625, "theoretical_loss": 5.935668940221127, "tokens_seen": 24641536 }, { "epoch": 0.01, "learning_rate": 0.000753968253968254, "loss": 0.261, "theoretical_loss": 5.92531883523999, "tokens_seen": 24903680 }, { "epoch": 0.01, "learning_rate": 0.0007619047619047619, "loss": 0.26, "theoretical_loss": 5.915107253197538, "tokens_seen": 25165824 }, { "epoch": 0.01, "learning_rate": 0.0007698412698412699, "loss": 0.2623, "theoretical_loss": 5.905030925891829, "tokens_seen": 25427968 }, { "epoch": 0.01, "learning_rate": 0.0007777777777777778, "loss": 0.2632, "theoretical_loss": 5.895086694911951, "tokens_seen": 25690112 }, { "epoch": 0.01, "learning_rate": 0.0007857142857142857, "loss": 0.2608, "theoretical_loss": 5.88527150687402, "tokens_seen": 25952256 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.016880027949810028, "objective/train/docs_used": 17033, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 5.177478313446045, "objective/train/original_loss": 5.177477836608887, "objective/train/theoretical_loss": 5.8755824089096285, "objective/train/tokens_used": 46674400, "objective/train/value_avg": -0.0229949951171875, "objective/train/value_loss": 0.000880128238350153, "objective/train/value_max": -0.00821685791015625, "objective/train/value_min": -0.06536865234375, "objective/train/value_reward_corr": 0.07602733877190453, "objective/train/value_std": 0.00830841064453125, "objective/train/weight_avg": 1.017284870147705, "objective/train/weighted_lm_loss": 5.27687406539917, "objective/train/weights_max": 1.0675524473190308, "objective/train/weights_min": 0.3755991756916046, "theoretical_loss": 5.8755824089096285, "tokens_seen": 26214400 }, { "epoch": 0.01, "learning_rate": 0.0007936507936507937, "loss": 0.2611, "theoretical_loss": 5.8755824089096285, "tokens_seen": 26214400 }, { "epoch": 0.01, "learning_rate": 0.0008015873015873017, "loss": 0.2562, "theoretical_loss": 5.866016544391016, "tokens_seen": 26476544 }, { "epoch": 0.01, "learning_rate": 0.0008095238095238096, "loss": 0.253, "theoretical_loss": 5.856571148878293, "tokens_seen": 26738688 }, { "epoch": 0.01, "learning_rate": 0.0008174603174603175, "loss": 0.2532, "theoretical_loss": 5.847243546275179, "tokens_seen": 27000832 }, { "epoch": 0.01, "learning_rate": 0.0008253968253968254, "loss": 0.2524, "theoretical_loss": 5.838031145180573, "tokens_seen": 27262976 }, { "epoch": 0.01, "learning_rate": 0.0008333333333333334, "loss": 0.257, "theoretical_loss": 5.82893143542425, "tokens_seen": 27525120 }, { "epoch": 0.01, "learning_rate": 0.0008412698412698413, "loss": 0.253, "theoretical_loss": 5.81994198477569, "tokens_seen": 27787264 }, { "epoch": 0.01, "learning_rate": 0.0008492063492063492, "loss": 0.2507, "theoretical_loss": 5.811060435815881, "tokens_seen": 28049408 }, { "epoch": 0.01, "learning_rate": 0.0008571428571428571, "loss": 0.2532, "theoretical_loss": 5.802284502962563, "tokens_seen": 28311552 }, { "epoch": 0.01, "learning_rate": 0.0008650793650793651, "loss": 0.2512, "theoretical_loss": 5.793611969640068, "tokens_seen": 28573696 }, { "epoch": 0.01, "learning_rate": 0.000873015873015873, "loss": 0.2509, "theoretical_loss": 5.785040685585437, "tokens_seen": 28835840 }, { "epoch": 0.01, "learning_rate": 0.0008809523809523809, "loss": 0.2497, "theoretical_loss": 5.7765685642831155, "tokens_seen": 29097984 }, { "epoch": 0.01, "learning_rate": 0.0008888888888888888, "loss": 0.2538, "theoretical_loss": 5.768193580520972, "tokens_seen": 29360128 }, { "epoch": 0.01, "learning_rate": 0.0008968253968253968, "loss": 0.2495, "theoretical_loss": 5.759913768060882, "tokens_seen": 29622272 }, { "epoch": 0.01, "learning_rate": 0.0009047619047619047, "loss": 0.2494, "theoretical_loss": 5.7517272174175496, "tokens_seen": 29884416 }, { "epoch": 0.01, "learning_rate": 0.0009126984126984126, "loss": 0.2498, "theoretical_loss": 5.743632073739626, "tokens_seen": 30146560 }, { "epoch": 0.01, "learning_rate": 0.0009206349206349207, "loss": 0.2478, "theoretical_loss": 5.735626534787584, "tokens_seen": 30408704 }, { "epoch": 0.01, "learning_rate": 0.0009285714285714287, "loss": 0.2429, "theoretical_loss": 5.727708849003127, "tokens_seen": 30670848 }, { "epoch": 0.01, "learning_rate": 0.0009365079365079366, "loss": 0.2443, "theoretical_loss": 5.719877313665254, "tokens_seen": 30932992 }, { "epoch": 0.01, "learning_rate": 0.0009444444444444445, "loss": 0.2456, "theoretical_loss": 5.712130273128388, "tokens_seen": 31195136 }, { "epoch": 0.01, "learning_rate": 0.0009523809523809524, "loss": 0.2472, "theoretical_loss": 5.704466117138258, "tokens_seen": 31457280 }, { "epoch": 0.01, "learning_rate": 0.0009603174603174604, "loss": 0.2447, "theoretical_loss": 5.696883279221504, "tokens_seen": 31719424 }, { "epoch": 0.01, "learning_rate": 0.0009682539682539683, "loss": 0.2421, "theoretical_loss": 5.689380235145171, "tokens_seen": 31981568 }, { "epoch": 0.01, "learning_rate": 0.0009761904761904762, "loss": 0.2447, "theoretical_loss": 5.6819555014425305, "tokens_seen": 32243712 }, { "epoch": 0.01, "learning_rate": 0.000984126984126984, "loss": 0.2412, "theoretical_loss": 5.674607634001871, "tokens_seen": 32505856 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.012133671902120113, "objective/train/docs_used": 19239, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 4.836267948150635, "objective/train/original_loss": 4.836267471313477, "objective/train/theoretical_loss": 5.667335226715059, "objective/train/tokens_used": 53228000, "objective/train/value_avg": -0.0186767578125, "objective/train/value_loss": 0.0005454737693071365, "objective/train/value_max": -0.00572967529296875, "objective/train/value_min": -0.0872802734375, "objective/train/value_reward_corr": 0.07709606794972773, "objective/train/value_std": 0.0084075927734375, "objective/train/weight_avg": 1.0123944282531738, "objective/train/weighted_lm_loss": 4.902460098266602, "objective/train/weights_max": 1.077897310256958, "objective/train/weights_min": 0.3729293644428253, "theoretical_loss": 5.667335226715059, "tokens_seen": 32768000 }, { "epoch": 0.01, "learning_rate": 0.000992063492063492, "loss": 0.2475, "theoretical_loss": 5.667335226715059, "tokens_seen": 32768000 }, { "epoch": 0.01, "learning_rate": 0.001, "loss": 0.243, "theoretical_loss": 5.6601369101828904, "tokens_seen": 33030144 }, { "epoch": 0.01, "learning_rate": 0.0009999197560584176, "loss": 0.2423, "theoretical_loss": 5.6530113504744435, "tokens_seen": 33292288 }, { "epoch": 0.01, "learning_rate": 0.0009998395121168352, "loss": 0.2419, "theoretical_loss": 5.645957247937725, "tokens_seen": 33554432 }, { "epoch": 0.01, "learning_rate": 0.0009997592681752529, "loss": 0.2402, "theoretical_loss": 5.638973336059157, "tokens_seen": 33816576 }, { "epoch": 0.01, "learning_rate": 0.0009996790242336705, "loss": 0.235, "theoretical_loss": 5.632058380369512, "tokens_seen": 34078720 }, { "epoch": 0.01, "learning_rate": 0.0009995987802920879, "loss": 0.2355, "theoretical_loss": 5.625211177394046, "tokens_seen": 34340864 }, { "epoch": 0.01, "learning_rate": 0.0009995185363505057, "loss": 0.2349, "theoretical_loss": 5.618430553644782, "tokens_seen": 34603008 }, { "epoch": 0.01, "learning_rate": 0.000999438292408923, "loss": 0.2367, "theoretical_loss": 5.611715364652864, "tokens_seen": 34865152 }, { "epoch": 0.01, "learning_rate": 0.0009993580484673407, "loss": 0.2314, "theoretical_loss": 5.605064494039176, "tokens_seen": 35127296 }, { "epoch": 0.01, "learning_rate": 0.0009992778045257583, "loss": 0.2299, "theoretical_loss": 5.598476852621397, "tokens_seen": 35389440 }, { "epoch": 0.01, "learning_rate": 0.000999197560584176, "loss": 0.2361, "theoretical_loss": 5.591951377555809, "tokens_seen": 35651584 }, { "epoch": 0.01, "learning_rate": 0.0009991173166425935, "loss": 0.2362, "theoretical_loss": 5.585487031512276, "tokens_seen": 35913728 }, { "epoch": 0.01, "learning_rate": 0.0009990370727010112, "loss": 0.2358, "theoretical_loss": 5.579082801880871, "tokens_seen": 36175872 }, { "epoch": 0.01, "learning_rate": 0.0009989568287594288, "loss": 0.2332, "theoretical_loss": 5.572737700008718, "tokens_seen": 36438016 }, { "epoch": 0.01, "learning_rate": 0.0009988765848178462, "loss": 0.2354, "theoretical_loss": 5.56645076046569, "tokens_seen": 36700160 }, { "epoch": 0.01, "learning_rate": 0.0009987963408762638, "loss": 0.2326, "theoretical_loss": 5.5602210403376775, "tokens_seen": 36962304 }, { "epoch": 0.01, "learning_rate": 0.0009987160969346814, "loss": 0.2372, "theoretical_loss": 5.554047618546193, "tokens_seen": 37224448 }, { "epoch": 0.01, "learning_rate": 0.000998635852993099, "loss": 0.2413, "theoretical_loss": 5.547929595193182, "tokens_seen": 37486592 }, { "epoch": 0.01, "learning_rate": 0.0009985556090515166, "loss": 0.2345, "theoretical_loss": 5.5418660909298945, "tokens_seen": 37748736 }, { "epoch": 0.01, "learning_rate": 0.0009984753651099342, "loss": 0.2308, "theoretical_loss": 5.535856246348814, "tokens_seen": 38010880 }, { "epoch": 0.01, "learning_rate": 0.0009983951211683518, "loss": 0.2326, "theoretical_loss": 5.529899221397624, "tokens_seen": 38273024 }, { "epoch": 0.01, "learning_rate": 0.0009983148772267695, "loss": 0.2299, "theoretical_loss": 5.523994194814273, "tokens_seen": 38535168 }, { "epoch": 0.01, "learning_rate": 0.000998234633285187, "loss": 0.2315, "theoretical_loss": 5.518140363582252, "tokens_seen": 38797312 }, { "epoch": 0.01, "learning_rate": 0.0009981543893436047, "loss": 0.2336, "theoretical_loss": 5.512336942405216, "tokens_seen": 39059456 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.007620680145919323, "objective/train/docs_used": 21760, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 4.368919372558594, "objective/train/original_loss": 4.368919372558594, "objective/train/theoretical_loss": 5.506583163200142, "objective/train/tokens_used": 59781600, "objective/train/value_avg": -0.0172576904296875, "objective/train/value_loss": 0.0010155023774132133, "objective/train/value_max": -0.00572967529296875, "objective/train/value_min": -0.12042236328125, "objective/train/value_reward_corr": 0.19761336452653674, "objective/train/value_std": 0.0086517333984375, "objective/train/weight_avg": 1.0080623626708984, "objective/train/weighted_lm_loss": 4.408056735992432, "objective/train/weights_max": 1.1209726333618164, "objective/train/weights_min": 0.3737069368362427, "theoretical_loss": 5.506583163200142, "tokens_seen": 39321600 }, { "epoch": 0.01, "learning_rate": 0.000998074145402022, "loss": 0.2265, "theoretical_loss": 5.506583163200142, "tokens_seen": 39321600 }, { "epoch": 0.01, "learning_rate": 0.0009979939014604397, "loss": 0.2311, "theoretical_loss": 5.5008782746082625, "tokens_seen": 39583744 }, { "epoch": 0.01, "learning_rate": 0.0009979136575188573, "loss": 0.2287, "theoretical_loss": 5.495221541523011, "tokens_seen": 39845888 }, { "epoch": 0.01, "learning_rate": 0.000997833413577275, "loss": 0.2302, "theoretical_loss": 5.489612244634316, "tokens_seen": 40108032 }, { "epoch": 0.01, "learning_rate": 0.0009977531696356925, "loss": 0.2287, "theoretical_loss": 5.48404967998854, "tokens_seen": 40370176 }, { "epoch": 0.01, "learning_rate": 0.0009976729256941101, "loss": 0.2282, "theoretical_loss": 5.478533158563456, "tokens_seen": 40632320 }, { "epoch": 0.01, "learning_rate": 0.0009975926817525277, "loss": 0.2321, "theoretical_loss": 5.473062005857637, "tokens_seen": 40894464 }, { "epoch": 0.01, "learning_rate": 0.0009975124378109451, "loss": 0.2303, "theoretical_loss": 5.467635561493681, "tokens_seen": 41156608 }, { "epoch": 0.01, "learning_rate": 0.000997432193869363, "loss": 0.2289, "theoretical_loss": 5.462253178834744, "tokens_seen": 41418752 }, { "epoch": 0.01, "learning_rate": 0.0009973519499277804, "loss": 0.225, "theoretical_loss": 5.456914224613812, "tokens_seen": 41680896 }, { "epoch": 0.01, "learning_rate": 0.0009972717059861982, "loss": 0.228, "theoretical_loss": 5.451618078575256, "tokens_seen": 41943040 }, { "epoch": 0.01, "learning_rate": 0.0009971914620446156, "loss": 0.2249, "theoretical_loss": 5.446364133128155, "tokens_seen": 42205184 }, { "epoch": 0.01, "learning_rate": 0.0009971112181030332, "loss": 0.224, "theoretical_loss": 5.44115179301095, "tokens_seen": 42467328 }, { "epoch": 0.01, "learning_rate": 0.0009970309741614508, "loss": 0.2268, "theoretical_loss": 5.435980474966981, "tokens_seen": 42729472 }, { "epoch": 0.01, "learning_rate": 0.0009969507302198684, "loss": 0.223, "theoretical_loss": 5.430849607430501, "tokens_seen": 42991616 }, { "epoch": 0.01, "learning_rate": 0.000996870486278286, "loss": 0.2224, "theoretical_loss": 5.425758630222747, "tokens_seen": 43253760 }, { "epoch": 0.01, "learning_rate": 0.0009967902423367037, "loss": 0.227, "theoretical_loss": 5.42070699425771, "tokens_seen": 43515904 }, { "epoch": 0.01, "learning_rate": 0.0009967099983951213, "loss": 0.2264, "theoretical_loss": 5.415694161257225, "tokens_seen": 43778048 }, { "epoch": 0.01, "learning_rate": 0.0009966297544535387, "loss": 0.2205, "theoretical_loss": 5.410719603475034, "tokens_seen": 44040192 }, { "epoch": 0.01, "learning_rate": 0.0009965495105119565, "loss": 0.2239, "theoretical_loss": 5.405782803429483, "tokens_seen": 44302336 }, { "epoch": 0.01, "learning_rate": 0.0009964692665703739, "loss": 0.2202, "theoretical_loss": 5.400883253644551, "tokens_seen": 44564480 }, { "epoch": 0.01, "learning_rate": 0.0009963890226287915, "loss": 0.224, "theoretical_loss": 5.396020456398885, "tokens_seen": 44826624 }, { "epoch": 0.01, "learning_rate": 0.0009963087786872091, "loss": 0.217, "theoretical_loss": 5.391193923482547, "tokens_seen": 45088768 }, { "epoch": 0.01, "learning_rate": 0.0009962285347456267, "loss": 0.2206, "theoretical_loss": 5.386403175961223, "tokens_seen": 45350912 }, { "epoch": 0.01, "learning_rate": 0.0009961482908040443, "loss": 0.2205, "theoretical_loss": 5.381647743947578, "tokens_seen": 45613056 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.008024879731237888, "objective/train/docs_used": 24173, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 4.302799701690674, "objective/train/original_loss": 4.302799701690674, "objective/train/theoretical_loss": 5.37692716637954, "objective/train/tokens_used": 66335200, "objective/train/value_avg": -0.017547607421875, "objective/train/value_loss": 0.0007267643231898546, "objective/train/value_max": -0.0055999755859375, "objective/train/value_min": -0.1319580078125, "objective/train/value_reward_corr": 0.2775875654342026, "objective/train/value_std": 0.0094757080078125, "objective/train/weight_avg": 1.0083508491516113, "objective/train/weighted_lm_loss": 4.344386577606201, "objective/train/weights_max": 1.0802388191223145, "objective/train/weights_min": 0.23186293244361877, "theoretical_loss": 5.37692716637954, "tokens_seen": 45875200 }, { "epoch": 0.01, "learning_rate": 0.000996068046862462, "loss": 0.223, "theoretical_loss": 5.37692716637954, "tokens_seen": 45875200 }, { "epoch": 0.01, "learning_rate": 0.0009959878029208796, "loss": 0.2202, "theoretical_loss": 5.372240990805237, "tokens_seen": 46137344 }, { "epoch": 0.01, "learning_rate": 0.0009959075589792972, "loss": 0.2182, "theoretical_loss": 5.367588773174377, "tokens_seen": 46399488 }, { "epoch": 0.01, "learning_rate": 0.0009958273150377146, "loss": 0.2236, "theoretical_loss": 5.36297007763582, "tokens_seen": 46661632 }, { "epoch": 0.01, "learning_rate": 0.0009957470710961322, "loss": 0.2225, "theoretical_loss": 5.358384476341126, "tokens_seen": 46923776 }, { "epoch": 0.01, "learning_rate": 0.0009956668271545498, "loss": 0.2186, "theoretical_loss": 5.353831549253895, "tokens_seen": 47185920 }, { "epoch": 0.01, "learning_rate": 0.0009955865832129674, "loss": 0.2205, "theoretical_loss": 5.349310883964664, "tokens_seen": 47448064 }, { "epoch": 0.01, "learning_rate": 0.000995506339271385, "loss": 0.2193, "theoretical_loss": 5.344822075511196, "tokens_seen": 47710208 }, { "epoch": 0.01, "learning_rate": 0.0009954260953298026, "loss": 0.2176, "theoretical_loss": 5.340364726203955, "tokens_seen": 47972352 }, { "epoch": 0.01, "learning_rate": 0.0009953458513882203, "loss": 0.2151, "theoretical_loss": 5.3359384454566055, "tokens_seen": 48234496 }, { "epoch": 0.01, "learning_rate": 0.0009952656074466376, "loss": 0.2138, "theoretical_loss": 5.331542849621357, "tokens_seen": 48496640 }, { "epoch": 0.01, "learning_rate": 0.0009951853635050555, "loss": 0.2168, "theoretical_loss": 5.327177561828993, "tokens_seen": 48758784 }, { "epoch": 0.01, "learning_rate": 0.0009951051195634729, "loss": 0.2189, "theoretical_loss": 5.32284221183342, "tokens_seen": 49020928 }, { "epoch": 0.01, "learning_rate": 0.0009950248756218905, "loss": 0.2206, "theoretical_loss": 5.318536435860599, "tokens_seen": 49283072 }, { "epoch": 0.02, "learning_rate": 0.000994944631680308, "loss": 0.2147, "theoretical_loss": 5.314259876461705, "tokens_seen": 49545216 }, { "epoch": 0.02, "learning_rate": 0.0009948643877387257, "loss": 0.2176, "theoretical_loss": 5.310012182370359, "tokens_seen": 49807360 }, { "epoch": 0.02, "learning_rate": 0.0009947841437971433, "loss": 0.214, "theoretical_loss": 5.305793008363841, "tokens_seen": 50069504 }, { "epoch": 0.02, "learning_rate": 0.000994703899855561, "loss": 0.2142, "theoretical_loss": 5.301602015128104, "tokens_seen": 50331648 }, { "epoch": 0.02, "learning_rate": 0.0009946236559139785, "loss": 0.2118, "theoretical_loss": 5.297438869126498, "tokens_seen": 50593792 }, { "epoch": 0.02, "learning_rate": 0.0009945434119723962, "loss": 0.2195, "theoretical_loss": 5.293303242472074, "tokens_seen": 50855936 }, { "epoch": 0.02, "learning_rate": 0.0009944631680308138, "loss": 0.2132, "theoretical_loss": 5.289194812803347, "tokens_seen": 51118080 }, { "epoch": 0.02, "learning_rate": 0.0009943829240892312, "loss": 0.2113, "theoretical_loss": 5.285113263163414, "tokens_seen": 51380224 }, { "epoch": 0.02, "learning_rate": 0.000994302680147649, "loss": 0.2133, "theoretical_loss": 5.2810582818823235, "tokens_seen": 51642368 }, { "epoch": 0.02, "learning_rate": 0.0009942224362060664, "loss": 0.2144, "theoretical_loss": 5.27702956246258, "tokens_seen": 51904512 }, { "epoch": 0.02, "learning_rate": 0.000994142192264484, "loss": 0.2125, "theoretical_loss": 5.273026803467695, "tokens_seen": 52166656 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.007762981578707695, "objective/train/docs_used": 26541, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 4.0233917236328125, "objective/train/original_loss": 4.023391246795654, "objective/train/theoretical_loss": 5.269049708413682, "objective/train/tokens_used": 72888800, "objective/train/value_avg": -0.0139312744140625, "objective/train/value_loss": 0.0003585839585866779, "objective/train/value_max": -0.004230499267578125, "objective/train/value_min": -0.0765380859375, "objective/train/value_reward_corr": 0.23784686314537432, "objective/train/value_std": 0.006893157958984375, "objective/train/weight_avg": 1.0079320669174194, "objective/train/weighted_lm_loss": 4.059372901916504, "objective/train/weights_max": 1.0704232454299927, "objective/train/weights_min": 0.3713054358959198, "theoretical_loss": 5.269049708413682, "tokens_seen": 52428800 }, { "epoch": 0.02, "learning_rate": 0.0009940619483229016, "loss": 0.2108, "theoretical_loss": 5.269049708413682, "tokens_seen": 52428800 }, { "epoch": 0.02, "learning_rate": 0.0009939817043813192, "loss": 0.2138, "theoretical_loss": 5.265097985663418, "tokens_seen": 52690944 }, { "epoch": 0.02, "learning_rate": 0.0009939014604397368, "loss": 0.2121, "theoretical_loss": 5.261171348323755, "tokens_seen": 52953088 }, { "epoch": 0.02, "learning_rate": 0.0009938212164981545, "loss": 0.2093, "theoretical_loss": 5.257269514145324, "tokens_seen": 53215232 }, { "epoch": 0.02, "learning_rate": 0.000993740972556572, "loss": 0.2068, "theoretical_loss": 5.2533922054249365, "tokens_seen": 53477376 }, { "epoch": 0.02, "learning_rate": 0.0009936607286149895, "loss": 0.2108, "theoretical_loss": 5.2495391489104986, "tokens_seen": 53739520 }, { "epoch": 0.02, "learning_rate": 0.0009935804846734073, "loss": 0.2142, "theoretical_loss": 5.24571007570837, "tokens_seen": 54001664 }, { "epoch": 0.02, "learning_rate": 0.0009935002407318247, "loss": 0.2101, "theoretical_loss": 5.2419047211930865, "tokens_seen": 54263808 }, { "epoch": 0.02, "learning_rate": 0.0009934199967902423, "loss": 0.2067, "theoretical_loss": 5.238122824919387, "tokens_seen": 54525952 }, { "epoch": 0.02, "learning_rate": 0.00099333975284866, "loss": 0.2105, "theoretical_loss": 5.234364130536457, "tokens_seen": 54788096 }, { "epoch": 0.02, "learning_rate": 0.0009932595089070775, "loss": 0.2097, "theoretical_loss": 5.230628385704337, "tokens_seen": 55050240 }, { "epoch": 0.02, "learning_rate": 0.0009931792649654951, "loss": 0.208, "theoretical_loss": 5.2269153420124255, "tokens_seen": 55312384 }, { "epoch": 0.02, "learning_rate": 0.0009930990210239128, "loss": 0.2087, "theoretical_loss": 5.223224754900014, "tokens_seen": 55574528 }, { "epoch": 0.02, "learning_rate": 0.0009930187770823304, "loss": 0.2098, "theoretical_loss": 5.219556383578795, "tokens_seen": 55836672 }, { "epoch": 0.02, "learning_rate": 0.000992938533140748, "loss": 0.2064, "theoretical_loss": 5.215909990957291, "tokens_seen": 56098816 }, { "epoch": 0.02, "learning_rate": 0.0009928582891991654, "loss": 0.2073, "theoretical_loss": 5.212285343567135, "tokens_seen": 56360960 }, { "epoch": 0.02, "learning_rate": 0.000992778045257583, "loss": 0.2064, "theoretical_loss": 5.208682211491157, "tokens_seen": 56623104 }, { "epoch": 0.02, "learning_rate": 0.0009926978013160006, "loss": 0.2043, "theoretical_loss": 5.205100368293225, "tokens_seen": 56885248 }, { "epoch": 0.02, "learning_rate": 0.0009926175573744182, "loss": 0.2038, "theoretical_loss": 5.201539590949796, "tokens_seen": 57147392 }, { "epoch": 0.02, "learning_rate": 0.0009925373134328358, "loss": 0.2055, "theoretical_loss": 5.1979996597831, "tokens_seen": 57409536 }, { "epoch": 0.02, "learning_rate": 0.0009924570694912534, "loss": 0.2043, "theoretical_loss": 5.1944803583959525, "tokens_seen": 57671680 }, { "epoch": 0.02, "learning_rate": 0.000992376825549671, "loss": 0.2057, "theoretical_loss": 5.190981473608112, "tokens_seen": 57933824 }, { "epoch": 0.02, "learning_rate": 0.0009922965816080887, "loss": 0.2065, "theoretical_loss": 5.18750279539416, "tokens_seen": 58195968 }, { "epoch": 0.02, "learning_rate": 0.0009922163376665063, "loss": 0.2025, "theoretical_loss": 5.184044116822849, "tokens_seen": 58458112 }, { "epoch": 0.02, "learning_rate": 0.0009921360937249237, "loss": 0.2079, "theoretical_loss": 5.1806052339978965, "tokens_seen": 58720256 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.005310404114425182, "objective/train/docs_used": 29071, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.910093069076538, "objective/train/original_loss": 3.910093069076538, "objective/train/theoretical_loss": 5.1771859460001455, "objective/train/tokens_used": 79442400, "objective/train/value_avg": -0.01593017578125, "objective/train/value_loss": 0.0005049873143434525, "objective/train/value_max": -0.004070281982421875, "objective/train/value_min": -0.09368896484375, "objective/train/value_reward_corr": 0.3727184924290943, "objective/train/value_std": 0.00925445556640625, "objective/train/weight_avg": 1.005548357963562, "objective/train/weighted_lm_loss": 3.935572624206543, "objective/train/weights_max": 1.0910027027130127, "objective/train/weights_min": 0.37153780460357666, "theoretical_loss": 5.1771859460001455, "tokens_seen": 58982400 }, { "epoch": 0.02, "learning_rate": 0.0009920558497833415, "loss": 0.2002, "theoretical_loss": 5.1771859460001455, "tokens_seen": 58982400 }, { "epoch": 0.02, "learning_rate": 0.000991975605841759, "loss": 0.2064, "theoretical_loss": 5.1737860548311065, "tokens_seen": 59244544 }, { "epoch": 0.02, "learning_rate": 0.0009918953619001765, "loss": 0.204, "theoretical_loss": 5.170405365357794, "tokens_seen": 59506688 }, { "epoch": 0.02, "learning_rate": 0.0009918151179585941, "loss": 0.2007, "theoretical_loss": 5.167043685258852, "tokens_seen": 59768832 }, { "epoch": 0.02, "learning_rate": 0.0009917348740170117, "loss": 0.2029, "theoretical_loss": 5.163700824971922, "tokens_seen": 60030976 }, { "epoch": 0.02, "learning_rate": 0.0009916546300754293, "loss": 0.2062, "theoretical_loss": 5.160376597642223, "tokens_seen": 60293120 }, { "epoch": 0.02, "learning_rate": 0.000991574386133847, "loss": 0.208, "theoretical_loss": 5.157070819072301, "tokens_seen": 60555264 }, { "epoch": 0.02, "learning_rate": 0.0009914941421922646, "loss": 0.2057, "theoretical_loss": 5.153783307672935, "tokens_seen": 60817408 }, { "epoch": 0.02, "learning_rate": 0.000991413898250682, "loss": 0.2046, "theoretical_loss": 5.150513884415149, "tokens_seen": 61079552 }, { "epoch": 0.02, "learning_rate": 0.0009913336543090998, "loss": 0.2011, "theoretical_loss": 5.14726237278331, "tokens_seen": 61341696 }, { "epoch": 0.02, "learning_rate": 0.0009912534103675172, "loss": 0.2057, "theoretical_loss": 5.144028598729285, "tokens_seen": 61603840 }, { "epoch": 0.02, "learning_rate": 0.0009911731664259348, "loss": 0.2008, "theoretical_loss": 5.140812390627624, "tokens_seen": 61865984 }, { "epoch": 0.02, "learning_rate": 0.0009910929224843524, "loss": 0.2033, "theoretical_loss": 5.137613579231737, "tokens_seen": 62128128 }, { "epoch": 0.02, "learning_rate": 0.00099101267854277, "loss": 0.2025, "theoretical_loss": 5.134431997631053, "tokens_seen": 62390272 }, { "epoch": 0.02, "learning_rate": 0.0009909324346011876, "loss": 0.1987, "theoretical_loss": 5.1312674812091235, "tokens_seen": 62652416 }, { "epoch": 0.02, "learning_rate": 0.0009908521906596053, "loss": 0.1978, "theoretical_loss": 5.128119867602646, "tokens_seen": 62914560 }, { "epoch": 0.02, "learning_rate": 0.0009907719467180229, "loss": 0.2007, "theoretical_loss": 5.124988996661393, "tokens_seen": 63176704 }, { "epoch": 0.02, "learning_rate": 0.0009906917027764405, "loss": 0.1967, "theoretical_loss": 5.121874710409012, "tokens_seen": 63438848 }, { "epoch": 0.02, "learning_rate": 0.000990611458834858, "loss": 0.1992, "theoretical_loss": 5.118776853004677, "tokens_seen": 63700992 }, { "epoch": 0.02, "learning_rate": 0.0009905312148932755, "loss": 0.1973, "theoretical_loss": 5.115695270705579, "tokens_seen": 63963136 }, { "epoch": 0.02, "learning_rate": 0.000990450970951693, "loss": 0.1952, "theoretical_loss": 5.112629811830217, "tokens_seen": 64225280 }, { "epoch": 0.02, "learning_rate": 0.0009903707270101107, "loss": 0.1983, "theoretical_loss": 5.10958032672248, "tokens_seen": 64487424 }, { "epoch": 0.02, "learning_rate": 0.0009902904830685283, "loss": 0.1941, "theoretical_loss": 5.106546667716508, "tokens_seen": 64749568 }, { "epoch": 0.02, "learning_rate": 0.000990210239126946, "loss": 0.2008, "theoretical_loss": 5.103528689102281, "tokens_seen": 65011712 }, { "epoch": 0.02, "learning_rate": 0.0009901299951853636, "loss": 0.1951, "theoretical_loss": 5.100526247091967, "tokens_seen": 65273856 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.008375543169677258, "objective/train/docs_used": 31526, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.9859790802001953, "objective/train/original_loss": 3.9859793186187744, "objective/train/theoretical_loss": 5.097539199786951, "objective/train/tokens_used": 85996000, "objective/train/value_avg": -0.0173187255859375, "objective/train/value_loss": 0.0005732561694458127, "objective/train/value_max": -0.004886627197265625, "objective/train/value_min": -0.1669921875, "objective/train/value_reward_corr": 0.3654351277123077, "objective/train/value_std": 0.0103912353515625, "objective/train/weight_avg": 1.0086482763290405, "objective/train/weighted_lm_loss": 4.023629665374756, "objective/train/weights_max": 1.1241106986999512, "objective/train/weights_min": 0.49856844544410706, "theoretical_loss": 5.097539199786951, "tokens_seen": 65536000 }, { "epoch": 0.02, "learning_rate": 0.0009900497512437812, "loss": 0.1982, "theoretical_loss": 5.097539199786951, "tokens_seen": 65536000 }, { "epoch": 0.02, "learning_rate": 0.0009899695073021988, "loss": 0.1987, "theoretical_loss": 5.094567407145588, "tokens_seen": 65798144 }, { "epoch": 0.02, "learning_rate": 0.0009898892633606162, "loss": 0.1958, "theoretical_loss": 5.09161073095161, "tokens_seen": 66060288 }, { "epoch": 0.02, "learning_rate": 0.000989809019419034, "loss": 0.1928, "theoretical_loss": 5.0886690347832015, "tokens_seen": 66322432 }, { "epoch": 0.02, "learning_rate": 0.0009897287754774514, "loss": 0.1945, "theoretical_loss": 5.0857421839827275, "tokens_seen": 66584576 }, { "epoch": 0.02, "learning_rate": 0.000989648531535869, "loss": 0.1912, "theoretical_loss": 5.082830045627072, "tokens_seen": 66846720 }, { "epoch": 0.02, "learning_rate": 0.0009895682875942866, "loss": 0.1962, "theoretical_loss": 5.079932488498602, "tokens_seen": 67108864 }, { "epoch": 0.02, "learning_rate": 0.0009894880436527042, "loss": 0.1935, "theoretical_loss": 5.077049383056725, "tokens_seen": 67371008 }, { "epoch": 0.02, "learning_rate": 0.0009894077997111218, "loss": 0.1957, "theoretical_loss": 5.074180601410026, "tokens_seen": 67633152 }, { "epoch": 0.02, "learning_rate": 0.0009893275557695395, "loss": 0.1884, "theoretical_loss": 5.0713260172889845, "tokens_seen": 67895296 }, { "epoch": 0.02, "learning_rate": 0.000989247311827957, "loss": 0.1908, "theoretical_loss": 5.068485506019231, "tokens_seen": 68157440 }, { "epoch": 0.02, "learning_rate": 0.0009891670678863745, "loss": 0.1931, "theoretical_loss": 5.06565894449535, "tokens_seen": 68419584 }, { "epoch": 0.02, "learning_rate": 0.0009890868239447923, "loss": 0.1936, "theoretical_loss": 5.06284621115523, "tokens_seen": 68681728 }, { "epoch": 0.02, "learning_rate": 0.0009890065800032097, "loss": 0.198, "theoretical_loss": 5.060047185954893, "tokens_seen": 68943872 }, { "epoch": 0.02, "learning_rate": 0.0009889263360616273, "loss": 0.1926, "theoretical_loss": 5.057261750343864, "tokens_seen": 69206016 }, { "epoch": 0.02, "learning_rate": 0.000988846092120045, "loss": 0.1934, "theoretical_loss": 5.0544897872410095, "tokens_seen": 69468160 }, { "epoch": 0.02, "learning_rate": 0.0009887658481784625, "loss": 0.192, "theoretical_loss": 5.051731181010866, "tokens_seen": 69730304 }, { "epoch": 0.02, "learning_rate": 0.0009886856042368801, "loss": 0.1905, "theoretical_loss": 5.048985817440432, "tokens_seen": 69992448 }, { "epoch": 0.02, "learning_rate": 0.0009886053602952978, "loss": 0.1939, "theoretical_loss": 5.046253583716425, "tokens_seen": 70254592 }, { "epoch": 0.02, "learning_rate": 0.0009885251163537154, "loss": 0.1908, "theoretical_loss": 5.043534368402973, "tokens_seen": 70516736 }, { "epoch": 0.02, "learning_rate": 0.000988444872412133, "loss": 0.19, "theoretical_loss": 5.040828061419762, "tokens_seen": 70778880 }, { "epoch": 0.02, "learning_rate": 0.0009883646284705506, "loss": 0.1928, "theoretical_loss": 5.038134554020587, "tokens_seen": 71041024 }, { "epoch": 0.02, "learning_rate": 0.000988284384528968, "loss": 0.1893, "theoretical_loss": 5.03545373877234, "tokens_seen": 71303168 }, { "epoch": 0.02, "learning_rate": 0.0009882041405873858, "loss": 0.1902, "theoretical_loss": 5.032785509534391, "tokens_seen": 71565312 }, { "epoch": 0.02, "learning_rate": 0.0009881238966458032, "loss": 0.1882, "theoretical_loss": 5.030129761438376, "tokens_seen": 71827456 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.006676941178739071, "objective/train/docs_used": 33987, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.692817211151123, "objective/train/original_loss": 3.692817211151123, "objective/train/theoretical_loss": 5.0274863908683685, "objective/train/tokens_used": 92549600, "objective/train/value_avg": -0.01511383056640625, "objective/train/value_loss": 0.0004530175356194377, "objective/train/value_max": -0.00426483154296875, "objective/train/value_min": -0.100341796875, "objective/train/value_reward_corr": 0.2687911557831162, "objective/train/value_std": 0.00835418701171875, "objective/train/weight_avg": 1.0068917274475098, "objective/train/weighted_lm_loss": 3.7194132804870605, "objective/train/weights_max": 1.0913752317428589, "objective/train/weights_min": 0.37156614661216736, "theoretical_loss": 5.0274863908683685, "tokens_seen": 72089600 }, { "epoch": 0.02, "learning_rate": 0.0009880436527042208, "loss": 0.1875, "theoretical_loss": 5.0274863908683685, "tokens_seen": 72089600 }, { "epoch": 0.02, "learning_rate": 0.0009879634087626384, "loss": 0.1911, "theoretical_loss": 5.024855295441432, "tokens_seen": 72351744 }, { "epoch": 0.02, "learning_rate": 0.000987883164821056, "loss": 0.189, "theoretical_loss": 5.022236373988544, "tokens_seen": 72613888 }, { "epoch": 0.02, "learning_rate": 0.0009878029208794737, "loss": 0.1874, "theoretical_loss": 5.01962952653588, "tokens_seen": 72876032 }, { "epoch": 0.02, "learning_rate": 0.0009877226769378913, "loss": 0.1878, "theoretical_loss": 5.017034654286462, "tokens_seen": 73138176 }, { "epoch": 0.02, "learning_rate": 0.000987642432996309, "loss": 0.19, "theoretical_loss": 5.0144516596021385, "tokens_seen": 73400320 }, { "epoch": 0.02, "learning_rate": 0.0009875621890547263, "loss": 0.1827, "theoretical_loss": 5.011880445985916, "tokens_seen": 73662464 }, { "epoch": 0.02, "learning_rate": 0.000987481945113144, "loss": 0.1884, "theoretical_loss": 5.009320918064615, "tokens_seen": 73924608 }, { "epoch": 0.02, "learning_rate": 0.0009874017011715615, "loss": 0.1884, "theoretical_loss": 5.006772981571855, "tokens_seen": 74186752 }, { "epoch": 0.02, "learning_rate": 0.0009873214572299791, "loss": 0.1866, "theoretical_loss": 5.004236543331345, "tokens_seen": 74448896 }, { "epoch": 0.02, "learning_rate": 0.0009872412132883967, "loss": 0.1851, "theoretical_loss": 5.001711511240506, "tokens_seen": 74711040 }, { "epoch": 0.02, "learning_rate": 0.0009871609693468144, "loss": 0.1864, "theoretical_loss": 4.999197794254371, "tokens_seen": 74973184 }, { "epoch": 0.02, "learning_rate": 0.000987080725405232, "loss": 0.1857, "theoretical_loss": 4.9966953023697975, "tokens_seen": 75235328 }, { "epoch": 0.02, "learning_rate": 0.0009870004814636496, "loss": 0.1855, "theoretical_loss": 4.994203946609964, "tokens_seen": 75497472 }, { "epoch": 0.02, "learning_rate": 0.000986920237522067, "loss": 0.1828, "theoretical_loss": 4.991723639009154, "tokens_seen": 75759616 }, { "epoch": 0.02, "learning_rate": 0.0009868399935804848, "loss": 0.1839, "theoretical_loss": 4.989254292597813, "tokens_seen": 76021760 }, { "epoch": 0.02, "learning_rate": 0.0009867597496389022, "loss": 0.1871, "theoretical_loss": 4.986795821387878, "tokens_seen": 76283904 }, { "epoch": 0.02, "learning_rate": 0.0009866795056973198, "loss": 0.1852, "theoretical_loss": 4.984348140358374, "tokens_seen": 76546048 }, { "epoch": 0.02, "learning_rate": 0.0009865992617557374, "loss": 0.1871, "theoretical_loss": 4.981911165441273, "tokens_seen": 76808192 }, { "epoch": 0.02, "learning_rate": 0.000986519017814155, "loss": 0.1845, "theoretical_loss": 4.979484813507599, "tokens_seen": 77070336 }, { "epoch": 0.02, "learning_rate": 0.0009864387738725726, "loss": 0.1835, "theoretical_loss": 4.977069002353792, "tokens_seen": 77332480 }, { "epoch": 0.02, "learning_rate": 0.0009863585299309903, "loss": 0.1813, "theoretical_loss": 4.974663650688306, "tokens_seen": 77594624 }, { "epoch": 0.02, "learning_rate": 0.0009862782859894079, "loss": 0.1833, "theoretical_loss": 4.972268678118454, "tokens_seen": 77856768 }, { "epoch": 0.02, "learning_rate": 0.0009861980420478253, "loss": 0.1796, "theoretical_loss": 4.969884005137479, "tokens_seen": 78118912 }, { "epoch": 0.02, "learning_rate": 0.000986117798106243, "loss": 0.1841, "theoretical_loss": 4.967509553111862, "tokens_seen": 78381056 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.007857042364776134, "objective/train/docs_used": 36308, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.7754344940185547, "objective/train/original_loss": 3.775434970855713, "objective/train/theoretical_loss": 4.96514524426884, "objective/train/tokens_used": 99103200, "objective/train/value_avg": -0.01514434814453125, "objective/train/value_loss": 0.0004333880788180977, "objective/train/value_max": -0.003795623779296875, "objective/train/value_min": -0.1292724609375, "objective/train/value_reward_corr": 0.27335943213418523, "objective/train/value_std": 0.0078125, "objective/train/weight_avg": 1.0080602169036865, "objective/train/weighted_lm_loss": 3.8082289695739746, "objective/train/weights_max": 1.1106642484664917, "objective/train/weights_min": 0.3715236186981201, "theoretical_loss": 4.96514524426884, "tokens_seen": 78643200 }, { "epoch": 0.02, "learning_rate": 0.0009860375541646605, "loss": 0.1803, "theoretical_loss": 4.96514524426884, "tokens_seen": 78643200 }, { "epoch": 0.02, "learning_rate": 0.0009859573102230783, "loss": 0.1828, "theoretical_loss": 4.962791001684167, "tokens_seen": 78905344 }, { "epoch": 0.02, "learning_rate": 0.0009858770662814957, "loss": 0.1812, "theoretical_loss": 4.960446749270055, "tokens_seen": 79167488 }, { "epoch": 0.02, "learning_rate": 0.0009857968223399133, "loss": 0.1833, "theoretical_loss": 4.958112411763365, "tokens_seen": 79429632 }, { "epoch": 0.02, "learning_rate": 0.000985716578398331, "loss": 0.1819, "theoretical_loss": 4.955787914713962, "tokens_seen": 79691776 }, { "epoch": 0.02, "learning_rate": 0.0009856363344567486, "loss": 0.1828, "theoretical_loss": 4.953473184473312, "tokens_seen": 79953920 }, { "epoch": 0.02, "learning_rate": 0.0009855560905151662, "loss": 0.1815, "theoretical_loss": 4.951168148183246, "tokens_seen": 80216064 }, { "epoch": 0.02, "learning_rate": 0.0009854758465735838, "loss": 0.1808, "theoretical_loss": 4.948872733764926, "tokens_seen": 80478208 }, { "epoch": 0.02, "learning_rate": 0.0009853956026320014, "loss": 0.1792, "theoretical_loss": 4.946586869908014, "tokens_seen": 80740352 }, { "epoch": 0.02, "learning_rate": 0.0009853153586904188, "loss": 0.1821, "theoretical_loss": 4.944310486060004, "tokens_seen": 81002496 }, { "epoch": 0.02, "learning_rate": 0.0009852351147488366, "loss": 0.1818, "theoretical_loss": 4.942043512415751, "tokens_seen": 81264640 }, { "epoch": 0.02, "learning_rate": 0.000985154870807254, "loss": 0.1836, "theoretical_loss": 4.939785879907176, "tokens_seen": 81526784 }, { "epoch": 0.02, "learning_rate": 0.0009850746268656716, "loss": 0.1805, "theoretical_loss": 4.937537520193139, "tokens_seen": 81788928 }, { "epoch": 0.02, "learning_rate": 0.0009849943829240892, "loss": 0.181, "theoretical_loss": 4.93529836564949, "tokens_seen": 82051072 }, { "epoch": 0.02, "learning_rate": 0.0009849141389825069, "loss": 0.1797, "theoretical_loss": 4.933068349359283, "tokens_seen": 82313216 }, { "epoch": 0.03, "learning_rate": 0.0009848338950409245, "loss": 0.1773, "theoretical_loss": 4.93084740510316, "tokens_seen": 82575360 }, { "epoch": 0.03, "learning_rate": 0.000984753651099342, "loss": 0.1776, "theoretical_loss": 4.928635467349885, "tokens_seen": 82837504 }, { "epoch": 0.03, "learning_rate": 0.0009846734071577597, "loss": 0.1764, "theoretical_loss": 4.92643247124705, "tokens_seen": 83099648 }, { "epoch": 0.03, "learning_rate": 0.0009845931632161773, "loss": 0.1805, "theoretical_loss": 4.924238352611924, "tokens_seen": 83361792 }, { "epoch": 0.03, "learning_rate": 0.0009845129192745947, "loss": 0.1789, "theoretical_loss": 4.922053047922455, "tokens_seen": 83623936 }, { "epoch": 0.03, "learning_rate": 0.0009844326753330123, "loss": 0.1756, "theoretical_loss": 4.919876494308432, "tokens_seen": 83886080 }, { "epoch": 0.03, "learning_rate": 0.00098435243139143, "loss": 0.1804, "theoretical_loss": 4.917708629542775, "tokens_seen": 84148224 }, { "epoch": 0.03, "learning_rate": 0.0009842721874498475, "loss": 0.1778, "theoretical_loss": 4.915549392032985, "tokens_seen": 84410368 }, { "epoch": 0.03, "learning_rate": 0.0009841919435082652, "loss": 0.1807, "theoretical_loss": 4.913398720812719, "tokens_seen": 84672512 }, { "epoch": 0.03, "learning_rate": 0.0009841116995666828, "loss": 0.1762, "theoretical_loss": 4.9112565555335115, "tokens_seen": 84934656 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.008176760748028755, "objective/train/docs_used": 38678, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.4570322036743164, "objective/train/original_loss": 3.4570322036743164, "objective/train/theoretical_loss": 4.909122836456632, "objective/train/tokens_used": 105656800, "objective/train/value_avg": -0.0123748779296875, "objective/train/value_loss": 0.00042682766797952354, "objective/train/value_max": -0.00313568115234375, "objective/train/value_min": -0.06854248046875, "objective/train/value_reward_corr": 0.12430929764516488, "objective/train/value_std": 0.005199432373046875, "objective/train/weight_avg": 1.0083661079406738, "objective/train/weighted_lm_loss": 3.4894821643829346, "objective/train/weights_max": 1.0679434537887573, "objective/train/weights_min": 0.3725539743900299, "theoretical_loss": 4.909122836456632, "tokens_seen": 85196800 }, { "epoch": 0.03, "learning_rate": 0.0009840314556251004, "loss": 0.1783, "theoretical_loss": 4.909122836456632, "tokens_seen": 85196800 }, { "epoch": 0.03, "learning_rate": 0.0009839512116835178, "loss": 0.1773, "theoretical_loss": 4.906997504445066, "tokens_seen": 85458944 }, { "epoch": 0.03, "learning_rate": 0.0009838709677419356, "loss": 0.1792, "theoretical_loss": 4.904880500955633, "tokens_seen": 85721088 }, { "epoch": 0.03, "learning_rate": 0.000983790723800353, "loss": 0.1752, "theoretical_loss": 4.90277176803123, "tokens_seen": 85983232 }, { "epoch": 0.03, "learning_rate": 0.0009837104798587706, "loss": 0.1747, "theoretical_loss": 4.9006712482931984, "tokens_seen": 86245376 }, { "epoch": 0.03, "learning_rate": 0.0009836302359171882, "loss": 0.176, "theoretical_loss": 4.89857888493381, "tokens_seen": 86507520 }, { "epoch": 0.03, "learning_rate": 0.0009835499919756058, "loss": 0.1777, "theoretical_loss": 4.896494621708882, "tokens_seen": 86769664 }, { "epoch": 0.03, "learning_rate": 0.0009834697480340234, "loss": 0.1748, "theoretical_loss": 4.8944184029305, "tokens_seen": 87031808 }, { "epoch": 0.03, "learning_rate": 0.000983389504092441, "loss": 0.1762, "theoretical_loss": 4.892350173459863, "tokens_seen": 87293952 }, { "epoch": 0.03, "learning_rate": 0.0009833092601508587, "loss": 0.1736, "theoretical_loss": 4.890289878700239, "tokens_seen": 87556096 }, { "epoch": 0.03, "learning_rate": 0.0009832290162092763, "loss": 0.1736, "theoretical_loss": 4.888237464590028, "tokens_seen": 87818240 }, { "epoch": 0.03, "learning_rate": 0.000983148772267694, "loss": 0.1769, "theoretical_loss": 4.8861928775959464, "tokens_seen": 88080384 }, { "epoch": 0.03, "learning_rate": 0.0009830685283261113, "loss": 0.1752, "theoretical_loss": 4.884156064706302, "tokens_seen": 88342528 }, { "epoch": 0.03, "learning_rate": 0.0009829882843845291, "loss": 0.1744, "theoretical_loss": 4.882126973424384, "tokens_seen": 88604672 }, { "epoch": 0.03, "learning_rate": 0.0009829080404429465, "loss": 0.1731, "theoretical_loss": 4.880105551761961, "tokens_seen": 88866816 }, { "epoch": 0.03, "learning_rate": 0.0009828277965013641, "loss": 0.1783, "theoretical_loss": 4.87809174823286, "tokens_seen": 89128960 }, { "epoch": 0.03, "learning_rate": 0.0009827475525597817, "loss": 0.1719, "theoretical_loss": 4.876085511846673, "tokens_seen": 89391104 }, { "epoch": 0.03, "learning_rate": 0.0009826673086181994, "loss": 0.1769, "theoretical_loss": 4.874086792102535, "tokens_seen": 89653248 }, { "epoch": 0.03, "learning_rate": 0.000982587064676617, "loss": 0.1751, "theoretical_loss": 4.872095538983015, "tokens_seen": 89915392 }, { "epoch": 0.03, "learning_rate": 0.0009825068207350346, "loss": 0.1779, "theoretical_loss": 4.870111702948094, "tokens_seen": 90177536 }, { "epoch": 0.03, "learning_rate": 0.0009824265767934522, "loss": 0.1747, "theoretical_loss": 4.868135234929232, "tokens_seen": 90439680 }, { "epoch": 0.03, "learning_rate": 0.0009823463328518696, "loss": 0.1749, "theoretical_loss": 4.866166086323535, "tokens_seen": 90701824 }, { "epoch": 0.03, "learning_rate": 0.0009822660889102874, "loss": 0.1782, "theoretical_loss": 4.864204208988003, "tokens_seen": 90963968 }, { "epoch": 0.03, "learning_rate": 0.0009821858449687048, "loss": 0.1738, "theoretical_loss": 4.86224955523387, "tokens_seen": 91226112 }, { "epoch": 0.03, "learning_rate": 0.0009821056010271224, "loss": 0.1774, "theoretical_loss": 4.860302077821023, "tokens_seen": 91488256 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.005717393010854721, "objective/train/docs_used": 40748, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.387242317199707, "objective/train/original_loss": 3.387242555618286, "objective/train/theoretical_loss": 4.858361729952518, "objective/train/tokens_used": 112210400, "objective/train/value_avg": -0.01448822021484375, "objective/train/value_loss": 0.0005706871743313968, "objective/train/value_max": -0.0030155181884765625, "objective/train/value_min": -0.09588623046875, "objective/train/value_reward_corr": 0.3036400333241696, "objective/train/value_std": 0.007572174072265625, "objective/train/weight_avg": 1.0059770345687866, "objective/train/weighted_lm_loss": 3.4078683853149414, "objective/train/weights_max": 1.0813897848129272, "objective/train/weights_min": 0.37298911809921265, "theoretical_loss": 4.858361729952518, "tokens_seen": 91750400 }, { "epoch": 0.03, "learning_rate": 0.00098202535708554, "loss": 0.1731, "theoretical_loss": 4.858361729952518, "tokens_seen": 91750400 }, { "epoch": 0.03, "learning_rate": 0.0009819451131439577, "loss": 0.1724, "theoretical_loss": 4.856428465269159, "tokens_seen": 92012544 }, { "epoch": 0.03, "learning_rate": 0.0009818648692023753, "loss": 0.1731, "theoretical_loss": 4.8545022378441836, "tokens_seen": 92274688 }, { "epoch": 0.03, "learning_rate": 0.0009817846252607929, "loss": 0.1745, "theoretical_loss": 4.852583002178001, "tokens_seen": 92536832 }, { "epoch": 0.03, "learning_rate": 0.0009817043813192105, "loss": 0.1718, "theoretical_loss": 4.8506707131930344, "tokens_seen": 92798976 }, { "epoch": 0.03, "learning_rate": 0.000981624137377628, "loss": 0.1729, "theoretical_loss": 4.848765326228618, "tokens_seen": 93061120 }, { "epoch": 0.03, "learning_rate": 0.0009815438934360455, "loss": 0.1688, "theoretical_loss": 4.846866797035984, "tokens_seen": 93323264 }, { "epoch": 0.03, "learning_rate": 0.0009814636494944631, "loss": 0.1717, "theoretical_loss": 4.844975081773322, "tokens_seen": 93585408 }, { "epoch": 0.03, "learning_rate": 0.0009813834055528807, "loss": 0.1714, "theoretical_loss": 4.843090137000904, "tokens_seen": 93847552 }, { "epoch": 0.03, "learning_rate": 0.0009813031616112983, "loss": 0.1716, "theoretical_loss": 4.841211919676287, "tokens_seen": 94109696 }, { "epoch": 0.03, "learning_rate": 0.000981222917669716, "loss": 0.1673, "theoretical_loss": 4.839340387149586, "tokens_seen": 94371840 }, { "epoch": 0.03, "learning_rate": 0.0009811426737281336, "loss": 0.1707, "theoretical_loss": 4.837475497158817, "tokens_seen": 94633984 }, { "epoch": 0.03, "learning_rate": 0.0009810624297865512, "loss": 0.1687, "theoretical_loss": 4.835617207825303, "tokens_seen": 94896128 }, { "epoch": 0.03, "learning_rate": 0.0009809821858449686, "loss": 0.1669, "theoretical_loss": 4.83376547764915, "tokens_seen": 95158272 }, { "epoch": 0.03, "learning_rate": 0.0009809019419033864, "loss": 0.1717, "theoretical_loss": 4.831920265504792, "tokens_seen": 95420416 }, { "epoch": 0.03, "learning_rate": 0.0009808216979618038, "loss": 0.1723, "theoretical_loss": 4.830081530636594, "tokens_seen": 95682560 }, { "epoch": 0.03, "learning_rate": 0.0009807414540202216, "loss": 0.1717, "theoretical_loss": 4.8282492326545245, "tokens_seen": 95944704 }, { "epoch": 0.03, "learning_rate": 0.000980661210078639, "loss": 0.1714, "theoretical_loss": 4.826423331529884, "tokens_seen": 96206848 }, { "epoch": 0.03, "learning_rate": 0.0009805809661370566, "loss": 0.1687, "theoretical_loss": 4.824603787591102, "tokens_seen": 96468992 }, { "epoch": 0.03, "learning_rate": 0.0009805007221954742, "loss": 0.1724, "theoretical_loss": 4.822790561519591, "tokens_seen": 96731136 }, { "epoch": 0.03, "learning_rate": 0.0009804204782538919, "loss": 0.1713, "theoretical_loss": 4.8209836143456535, "tokens_seen": 96993280 }, { "epoch": 0.03, "learning_rate": 0.0009803402343123095, "loss": 0.167, "theoretical_loss": 4.81918290744446, "tokens_seen": 97255424 }, { "epoch": 0.03, "learning_rate": 0.000980259990370727, "loss": 0.1705, "theoretical_loss": 4.817388402532074, "tokens_seen": 97517568 }, { "epoch": 0.03, "learning_rate": 0.0009801797464291447, "loss": 0.176, "theoretical_loss": 4.815600061661536, "tokens_seen": 97779712 }, { "epoch": 0.03, "learning_rate": 0.000980099502487562, "loss": 0.17, "theoretical_loss": 4.813817847219008, "tokens_seen": 98041856 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.005961360409855843, "objective/train/docs_used": 43181, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.3599514961242676, "objective/train/original_loss": 3.3599514961242676, "objective/train/theoretical_loss": 4.812041721919962, "objective/train/tokens_used": 118764000, "objective/train/value_avg": -0.01464080810546875, "objective/train/value_loss": 0.0005292649148032069, "objective/train/value_max": -0.003040313720703125, "objective/train/value_min": -0.2239990234375, "objective/train/value_reward_corr": 0.26425426870008045, "objective/train/value_std": 0.0106048583984375, "objective/train/weight_avg": 1.0062075853347778, "objective/train/weighted_lm_loss": 3.379601001739502, "objective/train/weights_max": 1.1572779417037964, "objective/train/weights_min": 0.37088075280189514, "theoretical_loss": 4.812041721919962, "tokens_seen": 98304000 }, { "epoch": 0.03, "learning_rate": 0.00098001925854598, "loss": 0.165, "theoretical_loss": 4.812041721919962, "tokens_seen": 98304000 }, { "epoch": 0.03, "learning_rate": 0.0009799390146043973, "loss": 0.1663, "theoretical_loss": 4.810271648805427, "tokens_seen": 98566144 }, { "epoch": 0.03, "learning_rate": 0.000979858770662815, "loss": 0.17, "theoretical_loss": 4.8085075912383015, "tokens_seen": 98828288 }, { "epoch": 0.03, "learning_rate": 0.0009797785267212325, "loss": 0.1685, "theoretical_loss": 4.806749512899687, "tokens_seen": 99090432 }, { "epoch": 0.03, "learning_rate": 0.0009796982827796502, "loss": 0.1684, "theoretical_loss": 4.804997377785307, "tokens_seen": 99352576 }, { "epoch": 0.03, "learning_rate": 0.0009796180388380678, "loss": 0.1671, "theoretical_loss": 4.8032511502019535, "tokens_seen": 99614720 }, { "epoch": 0.03, "learning_rate": 0.0009795377948964854, "loss": 0.1671, "theoretical_loss": 4.801510794763988, "tokens_seen": 99876864 }, { "epoch": 0.03, "learning_rate": 0.000979457550954903, "loss": 0.1664, "theoretical_loss": 4.799776276389897, "tokens_seen": 100139008 }, { "epoch": 0.03, "learning_rate": 0.0009793773070133206, "loss": 0.1693, "theoretical_loss": 4.798047560298882, "tokens_seen": 100401152 }, { "epoch": 0.03, "learning_rate": 0.0009792970630717382, "loss": 0.1692, "theoretical_loss": 4.796324612007515, "tokens_seen": 100663296 }, { "epoch": 0.03, "learning_rate": 0.0009792168191301556, "loss": 0.1668, "theoretical_loss": 4.794607397326421, "tokens_seen": 100925440 }, { "epoch": 0.03, "learning_rate": 0.0009791365751885732, "loss": 0.1686, "theoretical_loss": 4.792895882357019, "tokens_seen": 101187584 }, { "epoch": 0.03, "learning_rate": 0.0009790563312469908, "loss": 0.1683, "theoretical_loss": 4.791190033488302, "tokens_seen": 101449728 }, { "epoch": 0.03, "learning_rate": 0.0009789760873054085, "loss": 0.1653, "theoretical_loss": 4.7894898173936635, "tokens_seen": 101711872 }, { "epoch": 0.03, "learning_rate": 0.000978895843363826, "loss": 0.1643, "theoretical_loss": 4.787795201027757, "tokens_seen": 101974016 }, { "epoch": 0.03, "learning_rate": 0.0009788155994222437, "loss": 0.1674, "theoretical_loss": 4.786106151623423, "tokens_seen": 102236160 }, { "epoch": 0.03, "learning_rate": 0.0009787353554806613, "loss": 0.1617, "theoretical_loss": 4.784422636688622, "tokens_seen": 102498304 }, { "epoch": 0.03, "learning_rate": 0.000978655111539079, "loss": 0.1637, "theoretical_loss": 4.782744624003442, "tokens_seen": 102760448 }, { "epoch": 0.03, "learning_rate": 0.0009785748675974963, "loss": 0.1669, "theoretical_loss": 4.781072081617127, "tokens_seen": 103022592 }, { "epoch": 0.03, "learning_rate": 0.000978494623655914, "loss": 0.1658, "theoretical_loss": 4.779404977845148, "tokens_seen": 103284736 }, { "epoch": 0.03, "learning_rate": 0.0009784143797143315, "loss": 0.1642, "theoretical_loss": 4.777743281266321, "tokens_seen": 103546880 }, { "epoch": 0.03, "learning_rate": 0.0009783341357727491, "loss": 0.1616, "theoretical_loss": 4.776086960719956, "tokens_seen": 103809024 }, { "epoch": 0.03, "learning_rate": 0.0009782538918311668, "loss": 0.1677, "theoretical_loss": 4.774435985303043, "tokens_seen": 104071168 }, { "epoch": 0.03, "learning_rate": 0.0009781736478895844, "loss": 0.1606, "theoretical_loss": 4.772790324367482, "tokens_seen": 104333312 }, { "epoch": 0.03, "learning_rate": 0.000978093403948002, "loss": 0.1655, "theoretical_loss": 4.771149947517346, "tokens_seen": 104595456 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.005723345559090376, "objective/train/docs_used": 45444, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.301978349685669, "objective/train/original_loss": 3.3019778728485107, "objective/train/theoretical_loss": 4.769514824606178, "objective/train/tokens_used": 125317600, "objective/train/value_avg": -0.01215362548828125, "objective/train/value_loss": 0.000492334773298353, "objective/train/value_max": -0.0025310516357421875, "objective/train/value_min": -0.21240234375, "objective/train/value_reward_corr": 0.35604829265860916, "objective/train/value_std": 0.007457733154296875, "objective/train/weight_avg": 1.005944848060608, "objective/train/weighted_lm_loss": 3.3237457275390625, "objective/train/weights_max": 1.124742031097412, "objective/train/weights_min": 0.3697309195995331, "theoretical_loss": 4.769514824606178, "tokens_seen": 104857600 }, { "epoch": 0.03, "learning_rate": 0.0009780131600064196, "loss": 0.1616, "theoretical_loss": 4.769514824606178, "tokens_seen": 104857600 }, { "epoch": 0.03, "learning_rate": 0.0009779329160648372, "loss": 0.1593, "theoretical_loss": 4.7678849257343305, "tokens_seen": 105119744 }, { "epoch": 0.03, "learning_rate": 0.0009778526721232546, "loss": 0.1619, "theoretical_loss": 4.766260221246329, "tokens_seen": 105381888 }, { "epoch": 0.03, "learning_rate": 0.0009777724281816724, "loss": 0.1613, "theoretical_loss": 4.764640681728281, "tokens_seen": 105644032 }, { "epoch": 0.03, "learning_rate": 0.0009776921842400898, "loss": 0.1637, "theoretical_loss": 4.763026278005314, "tokens_seen": 105906176 }, { "epoch": 0.03, "learning_rate": 0.0009776119402985074, "loss": 0.1604, "theoretical_loss": 4.761416981139046, "tokens_seen": 106168320 }, { "epoch": 0.03, "learning_rate": 0.000977531696356925, "loss": 0.1622, "theoretical_loss": 4.75981276242509, "tokens_seen": 106430464 }, { "epoch": 0.03, "learning_rate": 0.0009774514524153427, "loss": 0.1616, "theoretical_loss": 4.758213593390595, "tokens_seen": 106692608 }, { "epoch": 0.03, "learning_rate": 0.0009773712084737603, "loss": 0.1605, "theoretical_loss": 4.756619445791808, "tokens_seen": 106954752 }, { "epoch": 0.03, "learning_rate": 0.0009772909645321779, "loss": 0.1627, "theoretical_loss": 4.755030291611678, "tokens_seen": 107216896 }, { "epoch": 0.03, "learning_rate": 0.0009772107205905955, "loss": 0.1632, "theoretical_loss": 4.753446103057492, "tokens_seen": 107479040 }, { "epoch": 0.03, "learning_rate": 0.000977130476649013, "loss": 0.1618, "theoretical_loss": 4.751866852558529, "tokens_seen": 107741184 }, { "epoch": 0.03, "learning_rate": 0.0009770502327074307, "loss": 0.1627, "theoretical_loss": 4.7502925127637585, "tokens_seen": 108003328 }, { "epoch": 0.03, "learning_rate": 0.0009769699887658481, "loss": 0.162, "theoretical_loss": 4.7487230565395535, "tokens_seen": 108265472 }, { "epoch": 0.03, "learning_rate": 0.0009768897448242657, "loss": 0.1609, "theoretical_loss": 4.747158456967452, "tokens_seen": 108527616 }, { "epoch": 0.03, "learning_rate": 0.0009768095008826833, "loss": 0.1614, "theoretical_loss": 4.745598687341927, "tokens_seen": 108789760 }, { "epoch": 0.03, "learning_rate": 0.000976729256941101, "loss": 0.1622, "theoretical_loss": 4.744043721168196, "tokens_seen": 109051904 }, { "epoch": 0.03, "learning_rate": 0.0009766490129995186, "loss": 0.1593, "theoretical_loss": 4.74249353216006, "tokens_seen": 109314048 }, { "epoch": 0.03, "learning_rate": 0.0009765687690579362, "loss": 0.1589, "theoretical_loss": 4.740948094237761, "tokens_seen": 109576192 }, { "epoch": 0.03, "learning_rate": 0.0009764885251163538, "loss": 0.1604, "theoretical_loss": 4.739407381525874, "tokens_seen": 109838336 }, { "epoch": 0.03, "learning_rate": 0.0009764082811747713, "loss": 0.1601, "theoretical_loss": 4.7378713683512235, "tokens_seen": 110100480 }, { "epoch": 0.03, "learning_rate": 0.0009763280372331889, "loss": 0.1592, "theoretical_loss": 4.7363400292408215, "tokens_seen": 110362624 }, { "epoch": 0.03, "learning_rate": 0.0009762477932916065, "loss": 0.1597, "theoretical_loss": 4.734813338919842, "tokens_seen": 110624768 }, { "epoch": 0.03, "learning_rate": 0.000976167549350024, "loss": 0.1574, "theoretical_loss": 4.733291272309609, "tokens_seen": 110886912 }, { "epoch": 0.03, "learning_rate": 0.0009760873054084417, "loss": 0.1573, "theoretical_loss": 4.731773804525616, "tokens_seen": 111149056 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.005369019228965044, "objective/train/docs_used": 47788, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.1628293991088867, "objective/train/original_loss": 3.1628289222717285, "objective/train/theoretical_loss": 4.73026091087557, "objective/train/tokens_used": 131871200, "objective/train/value_avg": -0.0128021240234375, "objective/train/value_loss": 0.0003724124107975513, "objective/train/value_max": -0.0025615692138671875, "objective/train/value_min": -0.206787109375, "objective/train/value_reward_corr": 0.46127366019409083, "objective/train/value_std": 0.007617950439453125, "objective/train/weight_avg": 1.0055452585220337, "objective/train/weighted_lm_loss": 3.180939197540283, "objective/train/weights_max": 1.0874791145324707, "objective/train/weights_min": 0.3774433732032776, "theoretical_loss": 4.73026091087557, "tokens_seen": 111411200 }, { "epoch": 0.03, "learning_rate": 0.0009760070614668593, "loss": 0.1562, "theoretical_loss": 4.73026091087557, "tokens_seen": 111411200 }, { "epoch": 0.03, "learning_rate": 0.0009759268175252769, "loss": 0.1578, "theoretical_loss": 4.728752566857459, "tokens_seen": 111673344 }, { "epoch": 0.03, "learning_rate": 0.0009758465735836945, "loss": 0.1577, "theoretical_loss": 4.72724874815764, "tokens_seen": 111935488 }, { "epoch": 0.03, "learning_rate": 0.0009757663296421121, "loss": 0.1575, "theoretical_loss": 4.725749430648958, "tokens_seen": 112197632 }, { "epoch": 0.03, "learning_rate": 0.0009756860857005296, "loss": 0.1584, "theoretical_loss": 4.724254590388881, "tokens_seen": 112459776 }, { "epoch": 0.03, "learning_rate": 0.0009756058417589472, "loss": 0.1563, "theoretical_loss": 4.722764203617663, "tokens_seen": 112721920 }, { "epoch": 0.03, "learning_rate": 0.0009755255978173648, "loss": 0.1581, "theoretical_loss": 4.7212782467565235, "tokens_seen": 112984064 }, { "epoch": 0.03, "learning_rate": 0.0009754453538757823, "loss": 0.1575, "theoretical_loss": 4.719796696405858, "tokens_seen": 113246208 }, { "epoch": 0.03, "learning_rate": 0.0009753651099342, "loss": 0.1553, "theoretical_loss": 4.718319529343462, "tokens_seen": 113508352 }, { "epoch": 0.03, "learning_rate": 0.0009752848659926175, "loss": 0.1576, "theoretical_loss": 4.716846722522781, "tokens_seen": 113770496 }, { "epoch": 0.03, "learning_rate": 0.0009752046220510353, "loss": 0.1545, "theoretical_loss": 4.715378253071181, "tokens_seen": 114032640 }, { "epoch": 0.03, "learning_rate": 0.0009751243781094528, "loss": 0.1534, "theoretical_loss": 4.713914098288242, "tokens_seen": 114294784 }, { "epoch": 0.03, "learning_rate": 0.0009750441341678703, "loss": 0.1551, "theoretical_loss": 4.712454235644064, "tokens_seen": 114556928 }, { "epoch": 0.03, "learning_rate": 0.000974963890226288, "loss": 0.155, "theoretical_loss": 4.710998642777606, "tokens_seen": 114819072 }, { "epoch": 0.03, "learning_rate": 0.0009748836462847055, "loss": 0.1589, "theoretical_loss": 4.709547297495034, "tokens_seen": 115081216 }, { "epoch": 0.03, "learning_rate": 0.0009748034023431231, "loss": 0.1516, "theoretical_loss": 4.708100177768094, "tokens_seen": 115343360 }, { "epoch": 0.04, "learning_rate": 0.0009747231584015407, "loss": 0.155, "theoretical_loss": 4.7066572617325075, "tokens_seen": 115605504 }, { "epoch": 0.04, "learning_rate": 0.0009746429144599583, "loss": 0.1575, "theoretical_loss": 4.705218527686375, "tokens_seen": 115867648 }, { "epoch": 0.04, "learning_rate": 0.0009745626705183758, "loss": 0.1577, "theoretical_loss": 4.703783954088612, "tokens_seen": 116129792 }, { "epoch": 0.04, "learning_rate": 0.0009744824265767935, "loss": 0.1576, "theoretical_loss": 4.702353519557398, "tokens_seen": 116391936 }, { "epoch": 0.04, "learning_rate": 0.0009744021826352111, "loss": 0.1537, "theoretical_loss": 4.700927202868639, "tokens_seen": 116654080 }, { "epoch": 0.04, "learning_rate": 0.0009743219386936286, "loss": 0.1527, "theoretical_loss": 4.699504982954452, "tokens_seen": 116916224 }, { "epoch": 0.04, "learning_rate": 0.0009742416947520463, "loss": 0.1534, "theoretical_loss": 4.698086838901676, "tokens_seen": 117178368 }, { "epoch": 0.04, "learning_rate": 0.0009741614508104638, "loss": 0.1519, "theoretical_loss": 4.696672749950385, "tokens_seen": 117440512 }, { "epoch": 0.04, "learning_rate": 0.0009740812068688814, "loss": 0.1517, "theoretical_loss": 4.695262695492428, "tokens_seen": 117702656 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.006793029140681028, "objective/train/docs_used": 50256, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 3.074131488800049, "objective/train/original_loss": 3.0741310119628906, "objective/train/theoretical_loss": 4.693856655069986, "objective/train/tokens_used": 138424800, "objective/train/value_avg": -0.0142974853515625, "objective/train/value_loss": 0.000627185101620853, "objective/train/value_max": -0.00250244140625, "objective/train/value_min": -0.256591796875, "objective/train/value_reward_corr": 0.2673630119001533, "objective/train/value_std": 0.0104522705078125, "objective/train/weight_avg": 1.0070631504058838, "objective/train/weighted_lm_loss": 3.0982470512390137, "objective/train/weights_max": 1.1906453371047974, "objective/train/weights_min": 0.2276872992515564, "theoretical_loss": 4.693856655069986, "tokens_seen": 117964800 }, { "epoch": 0.04, "learning_rate": 0.000974000962927299, "loss": 0.1531, "theoretical_loss": 4.693856655069986, "tokens_seen": 117964800 }, { "epoch": 0.04, "learning_rate": 0.0009739207189857165, "loss": 0.1542, "theoretical_loss": 4.692454608374145, "tokens_seen": 118226944 }, { "epoch": 0.04, "learning_rate": 0.0009738404750441343, "loss": 0.1509, "theoretical_loss": 4.6910565352434785, "tokens_seen": 118489088 }, { "epoch": 0.04, "learning_rate": 0.0009737602311025518, "loss": 0.1512, "theoretical_loss": 4.689662415662662, "tokens_seen": 118751232 }, { "epoch": 0.04, "learning_rate": 0.0009736799871609694, "loss": 0.1504, "theoretical_loss": 4.688272229761087, "tokens_seen": 119013376 }, { "epoch": 0.04, "learning_rate": 0.000973599743219387, "loss": 0.1515, "theoretical_loss": 4.686885957811503, "tokens_seen": 119275520 }, { "epoch": 0.04, "learning_rate": 0.0009735194992778046, "loss": 0.147, "theoretical_loss": 4.685503580228671, "tokens_seen": 119537664 }, { "epoch": 0.04, "learning_rate": 0.0009734392553362221, "loss": 0.1475, "theoretical_loss": 4.684125077568028, "tokens_seen": 119799808 }, { "epoch": 0.04, "learning_rate": 0.0009733590113946397, "loss": 0.1524, "theoretical_loss": 4.682750430524376, "tokens_seen": 120061952 }, { "epoch": 0.04, "learning_rate": 0.0009732787674530573, "loss": 0.1518, "theoretical_loss": 4.6813796199305795, "tokens_seen": 120324096 }, { "epoch": 0.04, "learning_rate": 0.0009731985235114748, "loss": 0.151, "theoretical_loss": 4.68001262675628, "tokens_seen": 120586240 }, { "epoch": 0.04, "learning_rate": 0.0009731182795698925, "loss": 0.1465, "theoretical_loss": 4.678649432106627, "tokens_seen": 120848384 }, { "epoch": 0.04, "learning_rate": 0.00097303803562831, "loss": 0.1496, "theoretical_loss": 4.677290017221017, "tokens_seen": 121110528 }, { "epoch": 0.04, "learning_rate": 0.0009729577916867277, "loss": 0.1461, "theoretical_loss": 4.675934363471857, "tokens_seen": 121372672 }, { "epoch": 0.04, "learning_rate": 0.0009728775477451453, "loss": 0.1474, "theoretical_loss": 4.674582452363334, "tokens_seen": 121634816 }, { "epoch": 0.04, "learning_rate": 0.0009727973038035629, "loss": 0.1463, "theoretical_loss": 4.673234265530201, "tokens_seen": 121896960 }, { "epoch": 0.04, "learning_rate": 0.0009727170598619804, "loss": 0.1463, "theoretical_loss": 4.671889784736576, "tokens_seen": 122159104 }, { "epoch": 0.04, "learning_rate": 0.000972636815920398, "loss": 0.1478, "theoretical_loss": 4.670548991874758, "tokens_seen": 122421248 }, { "epoch": 0.04, "learning_rate": 0.0009725565719788156, "loss": 0.1463, "theoretical_loss": 4.669211868964052, "tokens_seen": 122683392 }, { "epoch": 0.04, "learning_rate": 0.0009724763280372332, "loss": 0.1496, "theoretical_loss": 4.66787839814961, "tokens_seen": 122945536 }, { "epoch": 0.04, "learning_rate": 0.0009723960840956508, "loss": 0.1477, "theoretical_loss": 4.666548561701285, "tokens_seen": 123207680 }, { "epoch": 0.04, "learning_rate": 0.0009723158401540683, "loss": 0.1463, "theoretical_loss": 4.665222342012491, "tokens_seen": 123469824 }, { "epoch": 0.04, "learning_rate": 0.0009722355962124861, "loss": 0.1472, "theoretical_loss": 4.663899721599093, "tokens_seen": 123731968 }, { "epoch": 0.04, "learning_rate": 0.0009721553522709036, "loss": 0.1446, "theoretical_loss": 4.6625806830982865, "tokens_seen": 123994112 }, { "epoch": 0.04, "learning_rate": 0.0009720751083293211, "loss": 0.1434, "theoretical_loss": 4.661265209267507, "tokens_seen": 124256256 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.006344511639326811, "objective/train/docs_used": 52572, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.8837485313415527, "objective/train/original_loss": 2.8837485313415527, "objective/train/theoretical_loss": 4.659953282983348, "objective/train/tokens_used": 144978400, "objective/train/value_avg": -0.011322021484375, "objective/train/value_loss": 0.0003046532510779798, "objective/train/value_max": -0.001964569091796875, "objective/train/value_min": -0.193603515625, "objective/train/value_reward_corr": 0.3217214219555613, "objective/train/value_std": 0.007747650146484375, "objective/train/weight_avg": 1.0064884424209595, "objective/train/weighted_lm_loss": 2.9043095111846924, "objective/train/weights_max": 1.1398353576660156, "objective/train/weights_min": 0.37116101384162903, "theoretical_loss": 4.659953282983348, "tokens_seen": 124518400 }, { "epoch": 0.04, "learning_rate": 0.0009719948643877388, "loss": 0.1463, "theoretical_loss": 4.659953282983348, "tokens_seen": 124518400 }, { "epoch": 0.04, "learning_rate": 0.0009719146204461563, "loss": 0.1482, "theoretical_loss": 4.658644887240481, "tokens_seen": 124780544 }, { "epoch": 0.04, "learning_rate": 0.0009718343765045739, "loss": 0.147, "theoretical_loss": 4.657340005150602, "tokens_seen": 125042688 }, { "epoch": 0.04, "learning_rate": 0.0009717541325629915, "loss": 0.1462, "theoretical_loss": 4.656038619941382, "tokens_seen": 125304832 }, { "epoch": 0.04, "learning_rate": 0.0009716738886214091, "loss": 0.1434, "theoretical_loss": 4.654740714955429, "tokens_seen": 125566976 }, { "epoch": 0.04, "learning_rate": 0.0009715936446798266, "loss": 0.1433, "theoretical_loss": 4.653446273649259, "tokens_seen": 125829120 }, { "epoch": 0.04, "learning_rate": 0.0009715134007382443, "loss": 0.1455, "theoretical_loss": 4.652155279592286, "tokens_seen": 126091264 }, { "epoch": 0.04, "learning_rate": 0.0009714331567966619, "loss": 0.1429, "theoretical_loss": 4.650867716465819, "tokens_seen": 126353408 }, { "epoch": 0.04, "learning_rate": 0.0009713529128550794, "loss": 0.1428, "theoretical_loss": 4.6495835680620665, "tokens_seen": 126615552 }, { "epoch": 0.04, "learning_rate": 0.0009712726689134971, "loss": 0.1406, "theoretical_loss": 4.648302818283158, "tokens_seen": 126877696 }, { "epoch": 0.04, "learning_rate": 0.0009711924249719146, "loss": 0.1459, "theoretical_loss": 4.64702545114017, "tokens_seen": 127139840 }, { "epoch": 0.04, "learning_rate": 0.0009711121810303323, "loss": 0.1425, "theoretical_loss": 4.645751450752172, "tokens_seen": 127401984 }, { "epoch": 0.04, "learning_rate": 0.0009710319370887498, "loss": 0.1386, "theoretical_loss": 4.644480801345268, "tokens_seen": 127664128 }, { "epoch": 0.04, "learning_rate": 0.0009709516931471673, "loss": 0.1435, "theoretical_loss": 4.643213487251664, "tokens_seen": 127926272 }, { "epoch": 0.04, "learning_rate": 0.000970871449205585, "loss": 0.1397, "theoretical_loss": 4.641949492908737, "tokens_seen": 128188416 }, { "epoch": 0.04, "learning_rate": 0.0009707912052640026, "loss": 0.1406, "theoretical_loss": 4.640688802858113, "tokens_seen": 128450560 }, { "epoch": 0.04, "learning_rate": 0.0009707109613224202, "loss": 0.1436, "theoretical_loss": 4.63943140174476, "tokens_seen": 128712704 }, { "epoch": 0.04, "learning_rate": 0.0009706307173808378, "loss": 0.1405, "theoretical_loss": 4.6381772743160905, "tokens_seen": 128974848 }, { "epoch": 0.04, "learning_rate": 0.0009705504734392554, "loss": 0.1423, "theoretical_loss": 4.636926405421065, "tokens_seen": 129236992 }, { "epoch": 0.04, "learning_rate": 0.0009704702294976729, "loss": 0.1387, "theoretical_loss": 4.635678780009318, "tokens_seen": 129499136 }, { "epoch": 0.04, "learning_rate": 0.0009703899855560905, "loss": 0.1392, "theoretical_loss": 4.634434383130284, "tokens_seen": 129761280 }, { "epoch": 0.04, "learning_rate": 0.0009703097416145081, "loss": 0.1422, "theoretical_loss": 4.633193199932336, "tokens_seen": 130023424 }, { "epoch": 0.04, "learning_rate": 0.0009702294976729256, "loss": 0.1425, "theoretical_loss": 4.631955215661932, "tokens_seen": 130285568 }, { "epoch": 0.04, "learning_rate": 0.0009701492537313433, "loss": 0.1425, "theoretical_loss": 4.630720415662774, "tokens_seen": 130547712 }, { "epoch": 0.04, "learning_rate": 0.0009700690097897609, "loss": 0.1353, "theoretical_loss": 4.629488785374969, "tokens_seen": 130809856 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.005258428864181042, "objective/train/docs_used": 54897, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.703356981277466, "objective/train/original_loss": 2.7033567428588867, "objective/train/theoretical_loss": 4.628260310334209, "objective/train/tokens_used": 151532000, "objective/train/value_avg": -0.01247406005859375, "objective/train/value_loss": 0.0005161120789125562, "objective/train/value_max": -0.0016937255859375, "objective/train/value_min": -0.182861328125, "objective/train/value_reward_corr": 0.283905402927882, "objective/train/value_std": 0.01013946533203125, "objective/train/weight_avg": 1.0054808855056763, "objective/train/weighted_lm_loss": 2.7186498641967773, "objective/train/weights_max": 1.149869441986084, "objective/train/weights_min": 0.2252407968044281, "theoretical_loss": 4.628260310334209, "tokens_seen": 131072000 }, { "epoch": 0.04, "learning_rate": 0.0009699887658481786, "loss": 0.1388, "theoretical_loss": 4.628260310334209, "tokens_seen": 131072000 }, { "epoch": 0.04, "learning_rate": 0.0009699085219065961, "loss": 0.1394, "theoretical_loss": 4.6270349761709495, "tokens_seen": 131334144 }, { "epoch": 0.04, "learning_rate": 0.0009698282779650137, "loss": 0.1354, "theoretical_loss": 4.625812768609601, "tokens_seen": 131596288 }, { "epoch": 0.04, "learning_rate": 0.0009697480340234313, "loss": 0.1392, "theoretical_loss": 4.62459367346773, "tokens_seen": 131858432 }, { "epoch": 0.04, "learning_rate": 0.0009696677900818488, "loss": 0.1383, "theoretical_loss": 4.623377676655271, "tokens_seen": 132120576 }, { "epoch": 0.04, "learning_rate": 0.0009695875461402664, "loss": 0.1387, "theoretical_loss": 4.622164764173735, "tokens_seen": 132382720 }, { "epoch": 0.04, "learning_rate": 0.000969507302198684, "loss": 0.1389, "theoretical_loss": 4.62095492211544, "tokens_seen": 132644864 }, { "epoch": 0.04, "learning_rate": 0.0009694270582571016, "loss": 0.1438, "theoretical_loss": 4.6197481366627455, "tokens_seen": 132907008 }, { "epoch": 0.04, "learning_rate": 0.0009693468143155191, "loss": 0.1385, "theoretical_loss": 4.618544394087287, "tokens_seen": 133169152 }, { "epoch": 0.04, "learning_rate": 0.0009692665703739369, "loss": 0.1327, "theoretical_loss": 4.617343680749233, "tokens_seen": 133431296 }, { "epoch": 0.04, "learning_rate": 0.0009691863264323544, "loss": 0.1351, "theoretical_loss": 4.61614598309653, "tokens_seen": 133693440 }, { "epoch": 0.04, "learning_rate": 0.0009691060824907719, "loss": 0.1364, "theoretical_loss": 4.614951287664179, "tokens_seen": 133955584 }, { "epoch": 0.04, "learning_rate": 0.0009690258385491896, "loss": 0.1374, "theoretical_loss": 4.613759581073502, "tokens_seen": 134217728 }, { "epoch": 0.04, "learning_rate": 0.0009689455946076071, "loss": 0.1366, "theoretical_loss": 4.612570850031418, "tokens_seen": 134479872 }, { "epoch": 0.04, "learning_rate": 0.0009688653506660247, "loss": 0.1337, "theoretical_loss": 4.611385081329736, "tokens_seen": 134742016 }, { "epoch": 0.04, "learning_rate": 0.0009687851067244423, "loss": 0.1349, "theoretical_loss": 4.610202261844444, "tokens_seen": 135004160 }, { "epoch": 0.04, "learning_rate": 0.0009687048627828599, "loss": 0.1352, "theoretical_loss": 4.6090223785350135, "tokens_seen": 135266304 }, { "epoch": 0.04, "learning_rate": 0.0009686246188412776, "loss": 0.1329, "theoretical_loss": 4.607845418443706, "tokens_seen": 135528448 }, { "epoch": 0.04, "learning_rate": 0.0009685443748996951, "loss": 0.1354, "theoretical_loss": 4.606671368694888, "tokens_seen": 135790592 }, { "epoch": 0.04, "learning_rate": 0.0009684641309581127, "loss": 0.1341, "theoretical_loss": 4.6055002164943595, "tokens_seen": 136052736 }, { "epoch": 0.04, "learning_rate": 0.0009683838870165303, "loss": 0.1295, "theoretical_loss": 4.604331949128672, "tokens_seen": 136314880 }, { "epoch": 0.04, "learning_rate": 0.0009683036430749479, "loss": 0.134, "theoretical_loss": 4.603166553964474, "tokens_seen": 136577024 }, { "epoch": 0.04, "learning_rate": 0.0009682233991333654, "loss": 0.1367, "theoretical_loss": 4.60200401844785, "tokens_seen": 136839168 }, { "epoch": 0.04, "learning_rate": 0.0009681431551917831, "loss": 0.1308, "theoretical_loss": 4.6008443301036746, "tokens_seen": 137101312 }, { "epoch": 0.04, "learning_rate": 0.0009680629112502006, "loss": 0.1338, "theoretical_loss": 4.5996874765349585, "tokens_seen": 137363456 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.0035103908739984035, "objective/train/docs_used": 57180, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.7642529010772705, "objective/train/original_loss": 2.7642529010772705, "objective/train/theoretical_loss": 4.598533445422221, "objective/train/tokens_used": 158085600, "objective/train/value_avg": -0.01120758056640625, "objective/train/value_loss": 0.0006862494046799839, "objective/train/value_max": -0.0017547607421875, "objective/train/value_min": -0.2978515625, "objective/train/value_reward_corr": 0.42945186696590126, "objective/train/value_std": 0.00952911376953125, "objective/train/weight_avg": 1.0038180351257324, "objective/train/weighted_lm_loss": 2.776818037033081, "objective/train/weights_max": 1.2096216678619385, "objective/train/weights_min": 0.36997780203819275, "theoretical_loss": 4.598533445422221, "tokens_seen": 137625600 }, { "epoch": 0.04, "learning_rate": 0.0009679826673086181, "loss": 0.1328, "theoretical_loss": 4.598533445422221, "tokens_seen": 137625600 }, { "epoch": 0.04, "learning_rate": 0.0009679024233670359, "loss": 0.1312, "theoretical_loss": 4.597382224522855, "tokens_seen": 137887744 }, { "epoch": 0.04, "learning_rate": 0.0009678221794254534, "loss": 0.1327, "theoretical_loss": 4.596233801670502, "tokens_seen": 138149888 }, { "epoch": 0.04, "learning_rate": 0.000967741935483871, "loss": 0.1315, "theoretical_loss": 4.595088164774435, "tokens_seen": 138412032 }, { "epoch": 0.04, "learning_rate": 0.0009676616915422886, "loss": 0.134, "theoretical_loss": 4.593945301818941, "tokens_seen": 138674176 }, { "epoch": 0.04, "learning_rate": 0.0009675814476007062, "loss": 0.1287, "theoretical_loss": 4.592805200862726, "tokens_seen": 138936320 }, { "epoch": 0.04, "learning_rate": 0.0009675012036591238, "loss": 0.1307, "theoretical_loss": 4.591667850038302, "tokens_seen": 139198464 }, { "epoch": 0.04, "learning_rate": 0.0009674209597175413, "loss": 0.13, "theoretical_loss": 4.590533237551401, "tokens_seen": 139460608 }, { "epoch": 0.04, "learning_rate": 0.0009673407157759589, "loss": 0.1303, "theoretical_loss": 4.589401351680385, "tokens_seen": 139722752 }, { "epoch": 0.04, "learning_rate": 0.0009672604718343765, "loss": 0.1324, "theoretical_loss": 4.588272180775659, "tokens_seen": 139984896 }, { "epoch": 0.04, "learning_rate": 0.0009671802278927941, "loss": 0.1279, "theoretical_loss": 4.587145713259102, "tokens_seen": 140247040 }, { "epoch": 0.04, "learning_rate": 0.0009670999839512117, "loss": 0.1287, "theoretical_loss": 4.5860219376234905, "tokens_seen": 140509184 }, { "epoch": 0.04, "learning_rate": 0.0009670197400096294, "loss": 0.1311, "theoretical_loss": 4.584900842431934, "tokens_seen": 140771328 }, { "epoch": 0.04, "learning_rate": 0.0009669394960680469, "loss": 0.1271, "theoretical_loss": 4.583782416317316, "tokens_seen": 141033472 }, { "epoch": 0.04, "learning_rate": 0.0009668592521264644, "loss": 0.1287, "theoretical_loss": 4.582666647981739, "tokens_seen": 141295616 }, { "epoch": 0.04, "learning_rate": 0.0009667790081848821, "loss": 0.1268, "theoretical_loss": 4.581553526195974, "tokens_seen": 141557760 }, { "epoch": 0.04, "learning_rate": 0.0009666987642432996, "loss": 0.1293, "theoretical_loss": 4.580443039798922, "tokens_seen": 141819904 }, { "epoch": 0.04, "learning_rate": 0.0009666185203017172, "loss": 0.1296, "theoretical_loss": 4.57933517769707, "tokens_seen": 142082048 }, { "epoch": 0.04, "learning_rate": 0.0009665382763601348, "loss": 0.1308, "theoretical_loss": 4.578229928863959, "tokens_seen": 142344192 }, { "epoch": 0.04, "learning_rate": 0.0009664580324185524, "loss": 0.1281, "theoretical_loss": 4.57712728233966, "tokens_seen": 142606336 }, { "epoch": 0.04, "learning_rate": 0.00096637778847697, "loss": 0.1267, "theoretical_loss": 4.576027227230245, "tokens_seen": 142868480 }, { "epoch": 0.04, "learning_rate": 0.0009662975445353877, "loss": 0.1325, "theoretical_loss": 4.574929752707274, "tokens_seen": 143130624 }, { "epoch": 0.04, "learning_rate": 0.0009662173005938052, "loss": 0.1283, "theoretical_loss": 4.573834848007284, "tokens_seen": 143392768 }, { "epoch": 0.04, "learning_rate": 0.0009661370566522228, "loss": 0.127, "theoretical_loss": 4.572742502431272, "tokens_seen": 143654912 }, { "epoch": 0.04, "learning_rate": 0.0009660568127106404, "loss": 0.1264, "theoretical_loss": 4.571652705344202, "tokens_seen": 143917056 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.0026565194129943848, "objective/train/docs_used": 59583, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.5324413776397705, "objective/train/original_loss": 2.5324416160583496, "objective/train/theoretical_loss": 4.570565446174504, "objective/train/tokens_used": 164639200, "objective/train/value_avg": -0.01026153564453125, "objective/train/value_loss": 0.0005388536374084651, "objective/train/value_max": -0.0011034011840820312, "objective/train/value_min": -0.2158203125, "objective/train/value_reward_corr": 0.41669318991791116, "objective/train/value_std": 0.0081024169921875, "objective/train/weight_avg": 1.0029032230377197, "objective/train/weighted_lm_loss": 2.539125442504883, "objective/train/weights_max": 1.1576155424118042, "objective/train/weights_min": 0.37208810448646545, "theoretical_loss": 4.570565446174504, "tokens_seen": 144179200 }, { "epoch": 0.04, "learning_rate": 0.0009659765687690579, "loss": 0.127, "theoretical_loss": 4.570565446174504, "tokens_seen": 144179200 }, { "epoch": 0.04, "learning_rate": 0.0009658963248274756, "loss": 0.1256, "theoretical_loss": 4.569480714413578, "tokens_seen": 144441344 }, { "epoch": 0.04, "learning_rate": 0.0009658160808858931, "loss": 0.1276, "theoretical_loss": 4.568398499615305, "tokens_seen": 144703488 }, { "epoch": 0.04, "learning_rate": 0.0009657358369443107, "loss": 0.1274, "theoretical_loss": 4.56731879139557, "tokens_seen": 144965632 }, { "epoch": 0.04, "learning_rate": 0.0009656555930027284, "loss": 0.1258, "theoretical_loss": 4.566241579431776, "tokens_seen": 145227776 }, { "epoch": 0.04, "learning_rate": 0.0009655753490611459, "loss": 0.1274, "theoretical_loss": 4.565166853462371, "tokens_seen": 145489920 }, { "epoch": 0.04, "learning_rate": 0.0009654951051195635, "loss": 0.1289, "theoretical_loss": 4.564094603286375, "tokens_seen": 145752064 }, { "epoch": 0.04, "learning_rate": 0.0009654148611779811, "loss": 0.1239, "theoretical_loss": 4.5630248187629245, "tokens_seen": 146014208 }, { "epoch": 0.04, "learning_rate": 0.0009653346172363987, "loss": 0.1252, "theoretical_loss": 4.561957489810798, "tokens_seen": 146276352 }, { "epoch": 0.04, "learning_rate": 0.0009652543732948162, "loss": 0.1234, "theoretical_loss": 4.5608926064079665, "tokens_seen": 146538496 }, { "epoch": 0.04, "learning_rate": 0.0009651741293532339, "loss": 0.127, "theoretical_loss": 4.559830158591139, "tokens_seen": 146800640 }, { "epoch": 0.04, "learning_rate": 0.0009650938854116514, "loss": 0.1215, "theoretical_loss": 4.558770136455316, "tokens_seen": 147062784 }, { "epoch": 0.04, "learning_rate": 0.0009650136414700689, "loss": 0.1245, "theoretical_loss": 4.557712530153342, "tokens_seen": 147324928 }, { "epoch": 0.04, "learning_rate": 0.0009649333975284866, "loss": 0.1217, "theoretical_loss": 4.556657329895469, "tokens_seen": 147587072 }, { "epoch": 0.04, "learning_rate": 0.0009648531535869042, "loss": 0.125, "theoretical_loss": 4.5556045259489135, "tokens_seen": 147849216 }, { "epoch": 0.04, "learning_rate": 0.0009647729096453219, "loss": 0.1258, "theoretical_loss": 4.554554108637437, "tokens_seen": 148111360 }, { "epoch": 0.04, "learning_rate": 0.0009646926657037394, "loss": 0.1232, "theoretical_loss": 4.553506068340907, "tokens_seen": 148373504 }, { "epoch": 0.05, "learning_rate": 0.000964612421762157, "loss": 0.1224, "theoretical_loss": 4.552460395494878, "tokens_seen": 148635648 }, { "epoch": 0.05, "learning_rate": 0.0009645321778205746, "loss": 0.1258, "theoretical_loss": 4.55141708059017, "tokens_seen": 148897792 }, { "epoch": 0.05, "learning_rate": 0.0009644519338789921, "loss": 0.1209, "theoretical_loss": 4.5503761141724555, "tokens_seen": 149159936 }, { "epoch": 0.05, "learning_rate": 0.0009643716899374097, "loss": 0.1227, "theoretical_loss": 4.549337486841843, "tokens_seen": 149422080 }, { "epoch": 0.05, "learning_rate": 0.0009642914459958273, "loss": 0.1231, "theoretical_loss": 4.548301189252473, "tokens_seen": 149684224 }, { "epoch": 0.05, "learning_rate": 0.000964211202054245, "loss": 0.1237, "theoretical_loss": 4.54726721211211, "tokens_seen": 149946368 }, { "epoch": 0.05, "learning_rate": 0.0009641309581126625, "loss": 0.1212, "theoretical_loss": 4.546235546181743, "tokens_seen": 150208512 }, { "epoch": 0.05, "learning_rate": 0.0009640507141710802, "loss": 0.118, "theoretical_loss": 4.545206182275189, "tokens_seen": 150470656 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.002808451419696212, "objective/train/docs_used": 61752, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.444796085357666, "objective/train/original_loss": 2.444795846939087, "objective/train/theoretical_loss": 4.5441791112587016, "objective/train/tokens_used": 171192800, "objective/train/value_avg": -0.01094818115234375, "objective/train/value_loss": 0.0004207800084259361, "objective/train/value_max": -0.0013093948364257812, "objective/train/value_min": -0.256103515625, "objective/train/value_reward_corr": 0.4595803265485007, "objective/train/value_std": 0.00933074951171875, "objective/train/weight_avg": 1.0030062198638916, "objective/train/weighted_lm_loss": 2.4522807598114014, "objective/train/weights_max": 1.231403112411499, "objective/train/weights_min": 0.3714272677898407, "theoretical_loss": 4.5441791112587016, "tokens_seen": 150732800 }, { "epoch": 0.05, "learning_rate": 0.0009639704702294977, "loss": 0.1196, "theoretical_loss": 4.5441791112587016, "tokens_seen": 150732800 }, { "epoch": 0.05, "learning_rate": 0.0009638902262879152, "loss": 0.1256, "theoretical_loss": 4.5431543240505725, "tokens_seen": 150994944 }, { "epoch": 0.05, "learning_rate": 0.0009638099823463329, "loss": 0.123, "theoretical_loss": 4.5421318116207585, "tokens_seen": 151257088 }, { "epoch": 0.05, "learning_rate": 0.0009637297384047504, "loss": 0.1184, "theoretical_loss": 4.541111564990485, "tokens_seen": 151519232 }, { "epoch": 0.05, "learning_rate": 0.0009636494944631681, "loss": 0.1206, "theoretical_loss": 4.540093575231879, "tokens_seen": 151781376 }, { "epoch": 0.05, "learning_rate": 0.0009635692505215856, "loss": 0.119, "theoretical_loss": 4.539077833467582, "tokens_seen": 152043520 }, { "epoch": 0.05, "learning_rate": 0.0009634890065800032, "loss": 0.1207, "theoretical_loss": 4.538064330870389, "tokens_seen": 152305664 }, { "epoch": 0.05, "learning_rate": 0.0009634087626384209, "loss": 0.1227, "theoretical_loss": 4.537053058662869, "tokens_seen": 152567808 }, { "epoch": 0.05, "learning_rate": 0.0009633285186968385, "loss": 0.1211, "theoretical_loss": 4.536044008117005, "tokens_seen": 152829952 }, { "epoch": 0.05, "learning_rate": 0.000963248274755256, "loss": 0.1239, "theoretical_loss": 4.535037170553833, "tokens_seen": 153092096 }, { "epoch": 0.05, "learning_rate": 0.0009631680308136736, "loss": 0.1197, "theoretical_loss": 4.534032537343078, "tokens_seen": 153354240 }, { "epoch": 0.05, "learning_rate": 0.0009630877868720912, "loss": 0.1227, "theoretical_loss": 4.533030099902803, "tokens_seen": 153616384 }, { "epoch": 0.05, "learning_rate": 0.0009630075429305087, "loss": 0.1232, "theoretical_loss": 4.53202984969905, "tokens_seen": 153878528 }, { "epoch": 0.05, "learning_rate": 0.0009629272989889264, "loss": 0.1246, "theoretical_loss": 4.531031778245499, "tokens_seen": 154140672 }, { "epoch": 0.05, "learning_rate": 0.0009628470550473439, "loss": 0.1214, "theoretical_loss": 4.530035877103115, "tokens_seen": 154402816 }, { "epoch": 0.05, "learning_rate": 0.0009627668111057615, "loss": 0.121, "theoretical_loss": 4.529042137879809, "tokens_seen": 154664960 }, { "epoch": 0.05, "learning_rate": 0.0009626865671641792, "loss": 0.1184, "theoretical_loss": 4.528050552230092, "tokens_seen": 154927104 }, { "epoch": 0.05, "learning_rate": 0.0009626063232225967, "loss": 0.1205, "theoretical_loss": 4.527061111854746, "tokens_seen": 155189248 }, { "epoch": 0.05, "learning_rate": 0.0009625260792810143, "loss": 0.1217, "theoretical_loss": 4.526073808500481, "tokens_seen": 155451392 }, { "epoch": 0.05, "learning_rate": 0.0009624458353394319, "loss": 0.1181, "theoretical_loss": 4.525088633959613, "tokens_seen": 155713536 }, { "epoch": 0.05, "learning_rate": 0.0009623655913978495, "loss": 0.1176, "theoretical_loss": 4.524105580069728, "tokens_seen": 155975680 }, { "epoch": 0.05, "learning_rate": 0.0009622853474562671, "loss": 0.1178, "theoretical_loss": 4.523124638713361, "tokens_seen": 156237824 }, { "epoch": 0.05, "learning_rate": 0.0009622051035146847, "loss": 0.1206, "theoretical_loss": 4.522145801817673, "tokens_seen": 156499968 }, { "epoch": 0.05, "learning_rate": 0.0009621248595731022, "loss": 0.1165, "theoretical_loss": 4.521169061354129, "tokens_seen": 156762112 }, { "epoch": 0.05, "learning_rate": 0.0009620446156315198, "loss": 0.1179, "theoretical_loss": 4.520194409338185, "tokens_seen": 157024256 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.002041465835645795, "objective/train/docs_used": 64097, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.572587728500366, "objective/train/original_loss": 2.572587728500366, "objective/train/theoretical_loss": 4.519221837828971, "objective/train/tokens_used": 177746400, "objective/train/value_avg": -0.00862884521484375, "objective/train/value_loss": 0.00036377867218106985, "objective/train/value_max": -0.0010986328125, "objective/train/value_min": -0.177490234375, "objective/train/value_reward_corr": 0.3439090032549563, "objective/train/value_std": 0.005954742431640625, "objective/train/weight_avg": 1.002203106880188, "objective/train/weighted_lm_loss": 2.5783188343048096, "objective/train/weights_max": 1.1067639589309692, "objective/train/weights_min": 0.22429333627223969, "theoretical_loss": 4.519221837828971, "tokens_seen": 157286400 }, { "epoch": 0.05, "learning_rate": 0.0009619643716899374, "loss": 0.1204, "theoretical_loss": 4.519221837828971, "tokens_seen": 157286400 }, { "epoch": 0.05, "learning_rate": 0.000961884127748355, "loss": 0.1203, "theoretical_loss": 4.51825133892898, "tokens_seen": 157548544 }, { "epoch": 0.05, "learning_rate": 0.0009618038838067727, "loss": 0.1186, "theoretical_loss": 4.517282904783764, "tokens_seen": 157810688 }, { "epoch": 0.05, "learning_rate": 0.0009617236398651902, "loss": 0.1215, "theoretical_loss": 4.516316527581621, "tokens_seen": 158072832 }, { "epoch": 0.05, "learning_rate": 0.0009616433959236078, "loss": 0.1222, "theoretical_loss": 4.515352199553295, "tokens_seen": 158334976 }, { "epoch": 0.05, "learning_rate": 0.0009615631519820254, "loss": 0.1203, "theoretical_loss": 4.514389912971679, "tokens_seen": 158597120 }, { "epoch": 0.05, "learning_rate": 0.0009614829080404429, "loss": 0.1202, "theoretical_loss": 4.513429660151513, "tokens_seen": 158859264 }, { "epoch": 0.05, "learning_rate": 0.0009614026640988605, "loss": 0.1169, "theoretical_loss": 4.51247143344909, "tokens_seen": 159121408 }, { "epoch": 0.05, "learning_rate": 0.0009613224201572781, "loss": 0.1199, "theoretical_loss": 4.511515225261961, "tokens_seen": 159383552 }, { "epoch": 0.05, "learning_rate": 0.0009612421762156957, "loss": 0.1184, "theoretical_loss": 4.5105610280286506, "tokens_seen": 159645696 }, { "epoch": 0.05, "learning_rate": 0.0009611619322741134, "loss": 0.1197, "theoretical_loss": 4.509608834228365, "tokens_seen": 159907840 }, { "epoch": 0.05, "learning_rate": 0.000961081688332531, "loss": 0.1157, "theoretical_loss": 4.508658636380705, "tokens_seen": 160169984 }, { "epoch": 0.05, "learning_rate": 0.0009610014443909485, "loss": 0.1206, "theoretical_loss": 4.507710427045389, "tokens_seen": 160432128 }, { "epoch": 0.05, "learning_rate": 0.0009609212004493661, "loss": 0.1179, "theoretical_loss": 4.50676419882197, "tokens_seen": 160694272 }, { "epoch": 0.05, "learning_rate": 0.0009608409565077837, "loss": 0.1181, "theoretical_loss": 4.505819944349556, "tokens_seen": 160956416 }, { "epoch": 0.05, "learning_rate": 0.0009607607125662012, "loss": 0.1152, "theoretical_loss": 4.504877656306535, "tokens_seen": 161218560 }, { "epoch": 0.05, "learning_rate": 0.0009606804686246189, "loss": 0.1187, "theoretical_loss": 4.503937327410306, "tokens_seen": 161480704 }, { "epoch": 0.05, "learning_rate": 0.0009606002246830364, "loss": 0.1185, "theoretical_loss": 4.502998950417004, "tokens_seen": 161742848 }, { "epoch": 0.05, "learning_rate": 0.000960519980741454, "loss": 0.1186, "theoretical_loss": 4.502062518121232, "tokens_seen": 162004992 }, { "epoch": 0.05, "learning_rate": 0.0009604397367998717, "loss": 0.1161, "theoretical_loss": 4.501128023355796, "tokens_seen": 162267136 }, { "epoch": 0.05, "learning_rate": 0.0009603594928582893, "loss": 0.116, "theoretical_loss": 4.500195458991443, "tokens_seen": 162529280 }, { "epoch": 0.05, "learning_rate": 0.0009602792489167068, "loss": 0.1189, "theoretical_loss": 4.499264817936593, "tokens_seen": 162791424 }, { "epoch": 0.05, "learning_rate": 0.0009601990049751244, "loss": 0.1202, "theoretical_loss": 4.498336093137089, "tokens_seen": 163053568 }, { "epoch": 0.05, "learning_rate": 0.000960118761033542, "loss": 0.1179, "theoretical_loss": 4.49740927757593, "tokens_seen": 163315712 }, { "epoch": 0.05, "learning_rate": 0.0009600385170919595, "loss": 0.1162, "theoretical_loss": 4.496484364273021, "tokens_seen": 163577856 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.004073688294738531, "objective/train/docs_used": 66569, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.1198720932006836, "objective/train/original_loss": 2.1198720932006836, "objective/train/theoretical_loss": 4.495561346284925, "objective/train/tokens_used": 184300000, "objective/train/value_avg": -0.009857177734375, "objective/train/value_loss": 0.00025354541139677167, "objective/train/value_max": -0.0008263587951660156, "objective/train/value_min": -0.55126953125, "objective/train/value_reward_corr": 0.4736301748253403, "objective/train/value_std": 0.00994110107421875, "objective/train/weight_avg": 1.004190444946289, "objective/train/weighted_lm_loss": 2.12888765335083, "objective/train/weights_max": 1.1655560731887817, "objective/train/weights_min": 0.36955323815345764, "theoretical_loss": 4.495561346284925, "tokens_seen": 163840000 }, { "epoch": 0.05, "learning_rate": 0.0009599582731503772, "loss": 0.1154, "theoretical_loss": 4.495561346284925, "tokens_seen": 163840000 }, { "epoch": 0.05, "learning_rate": 0.0009598780292087947, "loss": 0.117, "theoretical_loss": 4.494640216704598, "tokens_seen": 164102144 }, { "epoch": 0.05, "learning_rate": 0.0009597977852672124, "loss": 0.1187, "theoretical_loss": 4.493720968661158, "tokens_seen": 164364288 }, { "epoch": 0.05, "learning_rate": 0.00095971754132563, "loss": 0.1155, "theoretical_loss": 4.492803595319623, "tokens_seen": 164626432 }, { "epoch": 0.05, "learning_rate": 0.0009596372973840475, "loss": 0.1145, "theoretical_loss": 4.49188808988068, "tokens_seen": 164888576 }, { "epoch": 0.05, "learning_rate": 0.0009595570534424652, "loss": 0.1111, "theoretical_loss": 4.490974445580429, "tokens_seen": 165150720 }, { "epoch": 0.05, "learning_rate": 0.0009594768095008827, "loss": 0.1124, "theoretical_loss": 4.490062655690153, "tokens_seen": 165412864 }, { "epoch": 0.05, "learning_rate": 0.0009593965655593003, "loss": 0.1125, "theoretical_loss": 4.489152713516077, "tokens_seen": 165675008 }, { "epoch": 0.05, "learning_rate": 0.0009593163216177179, "loss": 0.1103, "theoretical_loss": 4.488244612399129, "tokens_seen": 165937152 }, { "epoch": 0.05, "learning_rate": 0.0009592360776761355, "loss": 0.1118, "theoretical_loss": 4.487338345714707, "tokens_seen": 166199296 }, { "epoch": 0.05, "learning_rate": 0.000959155833734553, "loss": 0.1136, "theoretical_loss": 4.486433906872448, "tokens_seen": 166461440 }, { "epoch": 0.05, "learning_rate": 0.0009590755897929706, "loss": 0.1148, "theoretical_loss": 4.485531289315997, "tokens_seen": 166723584 }, { "epoch": 0.05, "learning_rate": 0.0009589953458513882, "loss": 0.1092, "theoretical_loss": 4.484630486522775, "tokens_seen": 166985728 }, { "epoch": 0.05, "learning_rate": 0.0009589151019098058, "loss": 0.1164, "theoretical_loss": 4.483731492003757, "tokens_seen": 167247872 }, { "epoch": 0.05, "learning_rate": 0.0009588348579682235, "loss": 0.1126, "theoretical_loss": 4.482834299303246, "tokens_seen": 167510016 }, { "epoch": 0.05, "learning_rate": 0.000958754614026641, "loss": 0.1125, "theoretical_loss": 4.481938901998647, "tokens_seen": 167772160 }, { "epoch": 0.05, "learning_rate": 0.0009586743700850587, "loss": 0.1122, "theoretical_loss": 4.481045293700248, "tokens_seen": 168034304 }, { "epoch": 0.05, "learning_rate": 0.0009585941261434762, "loss": 0.1135, "theoretical_loss": 4.480153468051001, "tokens_seen": 168296448 }, { "epoch": 0.05, "learning_rate": 0.0009585138822018937, "loss": 0.1153, "theoretical_loss": 4.4792634187263065, "tokens_seen": 168558592 }, { "epoch": 0.05, "learning_rate": 0.0009584336382603114, "loss": 0.1143, "theoretical_loss": 4.4783751394337905, "tokens_seen": 168820736 }, { "epoch": 0.05, "learning_rate": 0.0009583533943187289, "loss": 0.1091, "theoretical_loss": 4.4774886239131, "tokens_seen": 169082880 }, { "epoch": 0.05, "learning_rate": 0.0009582731503771465, "loss": 0.116, "theoretical_loss": 4.476603865935683, "tokens_seen": 169345024 }, { "epoch": 0.05, "learning_rate": 0.0009581929064355642, "loss": 0.1161, "theoretical_loss": 4.475720859304583, "tokens_seen": 169607168 }, { "epoch": 0.05, "learning_rate": 0.0009581126624939818, "loss": 0.1111, "theoretical_loss": 4.474839597854226, "tokens_seen": 169869312 }, { "epoch": 0.05, "learning_rate": 0.0009580324185523993, "loss": 0.1142, "theoretical_loss": 4.473960075450218, "tokens_seen": 170131456 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.004443011246621609, "objective/train/docs_used": 69022, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.34761905670166, "objective/train/original_loss": 2.34761905670166, "objective/train/theoretical_loss": 4.473082285989134, "objective/train/tokens_used": 190853600, "objective/train/value_avg": -0.009521484375, "objective/train/value_loss": 0.00034313887590542436, "objective/train/value_max": -0.001094818115234375, "objective/train/value_min": -0.2257080078125, "objective/train/value_reward_corr": 0.3206139523753071, "objective/train/value_std": 0.00771331787109375, "objective/train/weight_avg": 1.0045922994613647, "objective/train/weighted_lm_loss": 2.357492446899414, "objective/train/weights_max": 1.1233699321746826, "objective/train/weights_min": 0.23052240908145905, "theoretical_loss": 4.473082285989134, "tokens_seen": 170393600 }, { "epoch": 0.05, "learning_rate": 0.0009579521746108169, "loss": 0.1172, "theoretical_loss": 4.473082285989134, "tokens_seen": 170393600 }, { "epoch": 0.05, "learning_rate": 0.0009578719306692345, "loss": 0.114, "theoretical_loss": 4.472206223398325, "tokens_seen": 170655744 }, { "epoch": 0.05, "learning_rate": 0.000957791686727652, "loss": 0.1133, "theoretical_loss": 4.471331881635698, "tokens_seen": 170917888 }, { "epoch": 0.05, "learning_rate": 0.0009577114427860697, "loss": 0.1127, "theoretical_loss": 4.470459254689533, "tokens_seen": 171180032 }, { "epoch": 0.05, "learning_rate": 0.0009576311988444872, "loss": 0.1129, "theoretical_loss": 4.469588336578277, "tokens_seen": 171442176 }, { "epoch": 0.05, "learning_rate": 0.0009575509549029048, "loss": 0.1119, "theoretical_loss": 4.468719121350343, "tokens_seen": 171704320 }, { "epoch": 0.05, "learning_rate": 0.0009574707109613225, "loss": 0.1102, "theoretical_loss": 4.467851603083923, "tokens_seen": 171966464 }, { "epoch": 0.05, "learning_rate": 0.00095739046701974, "loss": 0.1137, "theoretical_loss": 4.466985775886784, "tokens_seen": 172228608 }, { "epoch": 0.05, "learning_rate": 0.0009573102230781577, "loss": 0.1119, "theoretical_loss": 4.466121633896087, "tokens_seen": 172490752 }, { "epoch": 0.05, "learning_rate": 0.0009572299791365752, "loss": 0.1152, "theoretical_loss": 4.465259171278182, "tokens_seen": 172752896 }, { "epoch": 0.05, "learning_rate": 0.0009571497351949928, "loss": 0.1116, "theoretical_loss": 4.464398382228435, "tokens_seen": 173015040 }, { "epoch": 0.05, "learning_rate": 0.0009570694912534104, "loss": 0.1146, "theoretical_loss": 4.463539260971023, "tokens_seen": 173277184 }, { "epoch": 0.05, "learning_rate": 0.000956989247311828, "loss": 0.1148, "theoretical_loss": 4.462681801758762, "tokens_seen": 173539328 }, { "epoch": 0.05, "learning_rate": 0.0009569090033702455, "loss": 0.1142, "theoretical_loss": 4.461825998872914, "tokens_seen": 173801472 }, { "epoch": 0.05, "learning_rate": 0.0009568287594286632, "loss": 0.1076, "theoretical_loss": 4.460971846623005, "tokens_seen": 174063616 }, { "epoch": 0.05, "learning_rate": 0.0009567485154870808, "loss": 0.1106, "theoretical_loss": 4.460119339346643, "tokens_seen": 174325760 }, { "epoch": 0.05, "learning_rate": 0.0009566682715454983, "loss": 0.1086, "theoretical_loss": 4.45926847140934, "tokens_seen": 174587904 }, { "epoch": 0.05, "learning_rate": 0.000956588027603916, "loss": 0.1096, "theoretical_loss": 4.45841923720433, "tokens_seen": 174850048 }, { "epoch": 0.05, "learning_rate": 0.0009565077836623335, "loss": 0.1163, "theoretical_loss": 4.4575716311523905, "tokens_seen": 175112192 }, { "epoch": 0.05, "learning_rate": 0.0009564275397207511, "loss": 0.1118, "theoretical_loss": 4.456725647701669, "tokens_seen": 175374336 }, { "epoch": 0.05, "learning_rate": 0.0009563472957791687, "loss": 0.1112, "theoretical_loss": 4.455881281327508, "tokens_seen": 175636480 }, { "epoch": 0.05, "learning_rate": 0.0009562670518375863, "loss": 0.1138, "theoretical_loss": 4.4550385265322685, "tokens_seen": 175898624 }, { "epoch": 0.05, "learning_rate": 0.0009561868078960038, "loss": 0.1107, "theoretical_loss": 4.45419737784516, "tokens_seen": 176160768 }, { "epoch": 0.05, "learning_rate": 0.0009561065639544214, "loss": 0.113, "theoretical_loss": 4.45335782982207, "tokens_seen": 176422912 }, { "epoch": 0.05, "learning_rate": 0.000956026320012839, "loss": 0.1134, "theoretical_loss": 4.452519877045393, "tokens_seen": 176685056 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.004205191973596811, "objective/train/docs_used": 71338, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.1437151432037354, "objective/train/original_loss": 2.1437151432037354, "objective/train/theoretical_loss": 4.451683514123864, "objective/train/tokens_used": 197407200, "objective/train/value_avg": -0.009735107421875, "objective/train/value_loss": 0.00020696282444987446, "objective/train/value_max": -0.0012941360473632812, "objective/train/value_min": -0.50732421875, "objective/train/value_reward_corr": 0.42861167714862647, "objective/train/value_std": 0.00954437255859375, "objective/train/weight_avg": 1.0043038129806519, "objective/train/weighted_lm_loss": 2.1534533500671387, "objective/train/weights_max": 1.4412035942077637, "objective/train/weights_min": 0.3694094717502594, "theoretical_loss": 4.451683514123864, "tokens_seen": 176947200 }, { "epoch": 0.05, "learning_rate": 0.0009559460760712567, "loss": 0.1134, "theoretical_loss": 4.451683514123864, "tokens_seen": 176947200 }, { "epoch": 0.05, "learning_rate": 0.0009558658321296743, "loss": 0.1131, "theoretical_loss": 4.450848735692391, "tokens_seen": 177209344 }, { "epoch": 0.05, "learning_rate": 0.0009557855881880918, "loss": 0.1129, "theoretical_loss": 4.450015536411886, "tokens_seen": 177471488 }, { "epoch": 0.05, "learning_rate": 0.0009557053442465095, "loss": 0.1109, "theoretical_loss": 4.449183910969108, "tokens_seen": 177733632 }, { "epoch": 0.05, "learning_rate": 0.000955625100304927, "loss": 0.1089, "theoretical_loss": 4.448353854076494, "tokens_seen": 177995776 }, { "epoch": 0.05, "learning_rate": 0.0009555448563633445, "loss": 0.1123, "theoretical_loss": 4.4475253604719995, "tokens_seen": 178257920 }, { "epoch": 0.05, "learning_rate": 0.0009554646124217622, "loss": 0.1079, "theoretical_loss": 4.446698424918937, "tokens_seen": 178520064 }, { "epoch": 0.05, "learning_rate": 0.0009553843684801797, "loss": 0.1115, "theoretical_loss": 4.44587304220582, "tokens_seen": 178782208 }, { "epoch": 0.05, "learning_rate": 0.0009553041245385973, "loss": 0.1076, "theoretical_loss": 4.4450492071462, "tokens_seen": 179044352 }, { "epoch": 0.05, "learning_rate": 0.000955223880597015, "loss": 0.1146, "theoretical_loss": 4.444226914578513, "tokens_seen": 179306496 }, { "epoch": 0.05, "learning_rate": 0.0009551436366554326, "loss": 0.1136, "theoretical_loss": 4.4434061593659235, "tokens_seen": 179568640 }, { "epoch": 0.05, "learning_rate": 0.0009550633927138501, "loss": 0.1129, "theoretical_loss": 4.442586936396171, "tokens_seen": 179830784 }, { "epoch": 0.05, "learning_rate": 0.0009549831487722677, "loss": 0.1116, "theoretical_loss": 4.441769240581412, "tokens_seen": 180092928 }, { "epoch": 0.05, "learning_rate": 0.0009549029048306853, "loss": 0.1162, "theoretical_loss": 4.440953066858077, "tokens_seen": 180355072 }, { "epoch": 0.05, "learning_rate": 0.0009548226608891028, "loss": 0.1104, "theoretical_loss": 4.4401384101867105, "tokens_seen": 180617216 }, { "epoch": 0.05, "learning_rate": 0.0009547424169475205, "loss": 0.1108, "theoretical_loss": 4.439325265551826, "tokens_seen": 180879360 }, { "epoch": 0.05, "learning_rate": 0.000954662173005938, "loss": 0.1117, "theoretical_loss": 4.438513627961757, "tokens_seen": 181141504 }, { "epoch": 0.05, "learning_rate": 0.0009545819290643557, "loss": 0.1118, "theoretical_loss": 4.437703492448509, "tokens_seen": 181403648 }, { "epoch": 0.06, "learning_rate": 0.0009545016851227733, "loss": 0.1139, "theoretical_loss": 4.436894854067614, "tokens_seen": 181665792 }, { "epoch": 0.06, "learning_rate": 0.0009544214411811908, "loss": 0.1126, "theoretical_loss": 4.436087707897984, "tokens_seen": 181927936 }, { "epoch": 0.06, "learning_rate": 0.0009543411972396085, "loss": 0.1126, "theoretical_loss": 4.435282049041769, "tokens_seen": 182190080 }, { "epoch": 0.06, "learning_rate": 0.000954260953298026, "loss": 0.1106, "theoretical_loss": 4.434477872624212, "tokens_seen": 182452224 }, { "epoch": 0.06, "learning_rate": 0.0009541807093564436, "loss": 0.1133, "theoretical_loss": 4.433675173793507, "tokens_seen": 182714368 }, { "epoch": 0.06, "learning_rate": 0.0009541004654148612, "loss": 0.1116, "theoretical_loss": 4.43287394772066, "tokens_seen": 182976512 }, { "epoch": 0.06, "learning_rate": 0.0009540202214732788, "loss": 0.1147, "theoretical_loss": 4.43207418959935, "tokens_seen": 183238656 }, { "epoch": 0.06, "objective/train/advantage_avg": -0.000987537787295878, "objective/train/docs_used": 73723, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.2004032135009766, "objective/train/original_loss": 2.2004029750823975, "objective/train/theoretical_loss": 4.431275894645784, "objective/train/tokens_used": 203960800, "objective/train/value_avg": -0.00931549072265625, "objective/train/value_loss": 0.001105409930460155, "objective/train/value_max": -0.0009927749633789062, "objective/train/value_min": -0.28662109375, "objective/train/value_reward_corr": 0.2604573666222557, "objective/train/value_std": 0.00833892822265625, "objective/train/weight_avg": 0.9995287656784058, "objective/train/weighted_lm_loss": 2.196560859680176, "objective/train/weights_max": 1.1982022523880005, "objective/train/weights_min": 0.36976757645606995, "theoretical_loss": 4.431275894645784, "tokens_seen": 183500800 }, { "epoch": 0.06, "learning_rate": 0.0009539399775316963, "loss": 0.1075, "theoretical_loss": 4.431275894645784, "tokens_seen": 183500800 }, { "epoch": 0.06, "learning_rate": 0.000953859733590114, "loss": 0.1111, "theoretical_loss": 4.43047905809857, "tokens_seen": 183762944 }, { "epoch": 0.06, "learning_rate": 0.0009537794896485315, "loss": 0.1096, "theoretical_loss": 4.42968367521857, "tokens_seen": 184025088 }, { "epoch": 0.06, "learning_rate": 0.000953699245706949, "loss": 0.1113, "theoretical_loss": 4.428889741288771, "tokens_seen": 184287232 }, { "epoch": 0.06, "learning_rate": 0.0009536190017653668, "loss": 0.1123, "theoretical_loss": 4.428097251614145, "tokens_seen": 184549376 }, { "epoch": 0.06, "learning_rate": 0.0009535387578237843, "loss": 0.1104, "theoretical_loss": 4.427306201521524, "tokens_seen": 184811520 }, { "epoch": 0.06, "learning_rate": 0.000953458513882202, "loss": 0.1098, "theoretical_loss": 4.426516586359458, "tokens_seen": 185073664 }, { "epoch": 0.06, "learning_rate": 0.0009533782699406195, "loss": 0.1119, "theoretical_loss": 4.425728401498089, "tokens_seen": 185335808 }, { "epoch": 0.06, "learning_rate": 0.0009532980259990371, "loss": 0.1123, "theoretical_loss": 4.424941642329019, "tokens_seen": 185597952 }, { "epoch": 0.06, "learning_rate": 0.0009532177820574547, "loss": 0.1109, "theoretical_loss": 4.42415630426518, "tokens_seen": 185860096 }, { "epoch": 0.06, "learning_rate": 0.0009531375381158722, "loss": 0.1093, "theoretical_loss": 4.423372382740707, "tokens_seen": 186122240 }, { "epoch": 0.06, "learning_rate": 0.0009530572941742898, "loss": 0.1064, "theoretical_loss": 4.422589873210806, "tokens_seen": 186384384 }, { "epoch": 0.06, "learning_rate": 0.0009529770502327075, "loss": 0.1071, "theoretical_loss": 4.4218087711516345, "tokens_seen": 186646528 }, { "epoch": 0.06, "learning_rate": 0.0009528968062911251, "loss": 0.1123, "theoretical_loss": 4.421029072060167, "tokens_seen": 186908672 }, { "epoch": 0.06, "learning_rate": 0.0009528165623495426, "loss": 0.1095, "theoretical_loss": 4.420250771454078, "tokens_seen": 187170816 }, { "epoch": 0.06, "learning_rate": 0.0009527363184079603, "loss": 0.1135, "theoretical_loss": 4.419473864871613, "tokens_seen": 187432960 }, { "epoch": 0.06, "learning_rate": 0.0009526560744663778, "loss": 0.1138, "theoretical_loss": 4.4186983478714685, "tokens_seen": 187695104 }, { "epoch": 0.06, "learning_rate": 0.0009525758305247953, "loss": 0.1132, "theoretical_loss": 4.417924216032667, "tokens_seen": 187957248 }, { "epoch": 0.06, "learning_rate": 0.000952495586583213, "loss": 0.1117, "theoretical_loss": 4.417151464954437, "tokens_seen": 188219392 }, { "epoch": 0.06, "learning_rate": 0.0009524153426416305, "loss": 0.1107, "theoretical_loss": 4.416380090256095, "tokens_seen": 188481536 }, { "epoch": 0.06, "learning_rate": 0.0009523350987000481, "loss": 0.1148, "theoretical_loss": 4.415610087576923, "tokens_seen": 188743680 }, { "epoch": 0.06, "learning_rate": 0.0009522548547584658, "loss": 0.1119, "theoretical_loss": 4.414841452576049, "tokens_seen": 189005824 }, { "epoch": 0.06, "learning_rate": 0.0009521746108168834, "loss": 0.1084, "theoretical_loss": 4.414074180932333, "tokens_seen": 189267968 }, { "epoch": 0.06, "learning_rate": 0.000952094366875301, "loss": 0.1087, "theoretical_loss": 4.413308268344249, "tokens_seen": 189530112 }, { "epoch": 0.06, "learning_rate": 0.0009520141229337185, "loss": 0.1113, "theoretical_loss": 4.412543710529766, "tokens_seen": 189792256 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.002621724735945463, "objective/train/docs_used": 76077, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.243046760559082, "objective/train/original_loss": 2.243046760559082, "objective/train/theoretical_loss": 4.411780503226238, "objective/train/tokens_used": 210514400, "objective/train/value_avg": -0.00859832763671875, "objective/train/value_loss": 0.0002450415340717882, "objective/train/value_max": -0.0006189346313476562, "objective/train/value_min": -0.332275390625, "objective/train/value_reward_corr": 0.4247518874973535, "objective/train/value_std": 0.008056640625, "objective/train/weight_avg": 1.002734899520874, "objective/train/weighted_lm_loss": 2.2489123344421387, "objective/train/weights_max": 1.2309222221374512, "objective/train/weights_min": 0.3706021308898926, "theoretical_loss": 4.411780503226238, "tokens_seen": 190054400 }, { "epoch": 0.06, "learning_rate": 0.0009519338789921361, "loss": 0.1102, "theoretical_loss": 4.411780503226238, "tokens_seen": 190054400 }, { "epoch": 0.06, "learning_rate": 0.0009518536350505537, "loss": 0.1125, "theoretical_loss": 4.4110186421902835, "tokens_seen": 190316544 }, { "epoch": 0.06, "learning_rate": 0.0009517733911089713, "loss": 0.1074, "theoretical_loss": 4.4102581231976785, "tokens_seen": 190578688 }, { "epoch": 0.06, "learning_rate": 0.0009516931471673888, "loss": 0.1119, "theoretical_loss": 4.409498942043237, "tokens_seen": 190840832 }, { "epoch": 0.06, "learning_rate": 0.0009516129032258065, "loss": 0.1106, "theoretical_loss": 4.408741094540707, "tokens_seen": 191102976 }, { "epoch": 0.06, "learning_rate": 0.000951532659284224, "loss": 0.108, "theoretical_loss": 4.407984576522653, "tokens_seen": 191365120 }, { "epoch": 0.06, "learning_rate": 0.0009514524153426416, "loss": 0.1106, "theoretical_loss": 4.407229383840347, "tokens_seen": 191627264 }, { "epoch": 0.06, "learning_rate": 0.0009513721714010593, "loss": 0.1092, "theoretical_loss": 4.406475512363663, "tokens_seen": 191889408 }, { "epoch": 0.06, "learning_rate": 0.0009512919274594768, "loss": 0.1056, "theoretical_loss": 4.405722957980962, "tokens_seen": 192151552 }, { "epoch": 0.06, "learning_rate": 0.0009512116835178944, "loss": 0.1093, "theoretical_loss": 4.40497171659899, "tokens_seen": 192413696 }, { "epoch": 0.06, "learning_rate": 0.000951131439576312, "loss": 0.1061, "theoretical_loss": 4.404221784142768, "tokens_seen": 192675840 }, { "epoch": 0.06, "learning_rate": 0.0009510511956347296, "loss": 0.1098, "theoretical_loss": 4.403473156555487, "tokens_seen": 192937984 }, { "epoch": 0.06, "learning_rate": 0.0009509709516931472, "loss": 0.107, "theoretical_loss": 4.402725829798397, "tokens_seen": 193200128 }, { "epoch": 0.06, "learning_rate": 0.0009508907077515648, "loss": 0.1066, "theoretical_loss": 4.4019797998507135, "tokens_seen": 193462272 }, { "epoch": 0.06, "learning_rate": 0.0009508104638099823, "loss": 0.1042, "theoretical_loss": 4.401235062709502, "tokens_seen": 193724416 }, { "epoch": 0.06, "learning_rate": 0.0009507302198684, "loss": 0.1081, "theoretical_loss": 4.400491614389582, "tokens_seen": 193986560 }, { "epoch": 0.06, "learning_rate": 0.0009506499759268176, "loss": 0.1048, "theoretical_loss": 4.3997494509234185, "tokens_seen": 194248704 }, { "epoch": 0.06, "learning_rate": 0.0009505697319852351, "loss": 0.1083, "theoretical_loss": 4.399008568361027, "tokens_seen": 194510848 }, { "epoch": 0.06, "learning_rate": 0.0009504894880436528, "loss": 0.1097, "theoretical_loss": 4.398268962769867, "tokens_seen": 194772992 }, { "epoch": 0.06, "learning_rate": 0.0009504092441020703, "loss": 0.1125, "theoretical_loss": 4.397530630234744, "tokens_seen": 195035136 }, { "epoch": 0.06, "learning_rate": 0.0009503290001604879, "loss": 0.1067, "theoretical_loss": 4.396793566857708, "tokens_seen": 195297280 }, { "epoch": 0.06, "learning_rate": 0.0009502487562189055, "loss": 0.1057, "theoretical_loss": 4.396057768757957, "tokens_seen": 195559424 }, { "epoch": 0.06, "learning_rate": 0.000950168512277323, "loss": 0.1076, "theoretical_loss": 4.395323232071737, "tokens_seen": 195821568 }, { "epoch": 0.06, "learning_rate": 0.0009500882683357406, "loss": 0.1075, "theoretical_loss": 4.394589952952247, "tokens_seen": 196083712 }, { "epoch": 0.06, "learning_rate": 0.0009500080243941583, "loss": 0.1034, "theoretical_loss": 4.393857927569534, "tokens_seen": 196345856 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.0034133398439735174, "objective/train/docs_used": 78313, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.0345728397369385, "objective/train/original_loss": 2.0345726013183594, "objective/train/theoretical_loss": 4.393127152110409, "objective/train/tokens_used": 217068000, "objective/train/value_avg": -0.01177978515625, "objective/train/value_loss": 0.00046790740452706814, "objective/train/value_max": -0.00142669677734375, "objective/train/value_min": -0.64306640625, "objective/train/value_reward_corr": 0.5403801085285318, "objective/train/value_std": 0.01461029052734375, "objective/train/weight_avg": 1.0036334991455078, "objective/train/weighted_lm_loss": 2.0406572818756104, "objective/train/weights_max": 1.7312231063842773, "objective/train/weights_min": 0.3701556921005249, "theoretical_loss": 4.393127152110409, "tokens_seen": 196608000 }, { "epoch": 0.06, "learning_rate": 0.0009499277804525759, "loss": 0.1063, "theoretical_loss": 4.393127152110409, "tokens_seen": 196608000 }, { "epoch": 0.06, "learning_rate": 0.0009498475365109934, "loss": 0.1069, "theoretical_loss": 4.392397622778343, "tokens_seen": 196870144 }, { "epoch": 0.06, "learning_rate": 0.0009497672925694111, "loss": 0.1068, "theoretical_loss": 4.391669335793372, "tokens_seen": 197132288 }, { "epoch": 0.06, "learning_rate": 0.0009496870486278286, "loss": 0.1043, "theoretical_loss": 4.39094228739201, "tokens_seen": 197394432 }, { "epoch": 0.06, "learning_rate": 0.0009496068046862462, "loss": 0.1086, "theoretical_loss": 4.390216473827143, "tokens_seen": 197656576 }, { "epoch": 0.06, "learning_rate": 0.0009495265607446638, "loss": 0.1027, "theoretical_loss": 4.389491891367953, "tokens_seen": 197918720 }, { "epoch": 0.06, "learning_rate": 0.0009494463168030813, "loss": 0.1075, "theoretical_loss": 4.388768536299808, "tokens_seen": 198180864 }, { "epoch": 0.06, "learning_rate": 0.000949366072861499, "loss": 0.1096, "theoretical_loss": 4.388046404924184, "tokens_seen": 198443008 }, { "epoch": 0.06, "learning_rate": 0.0009492858289199166, "loss": 0.1058, "theoretical_loss": 4.387325493558566, "tokens_seen": 198705152 }, { "epoch": 0.06, "learning_rate": 0.0009492055849783342, "loss": 0.1068, "theoretical_loss": 4.386605798536362, "tokens_seen": 198967296 }, { "epoch": 0.06, "learning_rate": 0.0009491253410367518, "loss": 0.109, "theoretical_loss": 4.385887316206812, "tokens_seen": 199229440 }, { "epoch": 0.06, "learning_rate": 0.0009490450970951693, "loss": 0.1029, "theoretical_loss": 4.385170042934896, "tokens_seen": 199491584 }, { "epoch": 0.06, "learning_rate": 0.0009489648531535869, "loss": 0.1046, "theoretical_loss": 4.384453975101251, "tokens_seen": 199753728 }, { "epoch": 0.06, "learning_rate": 0.0009488846092120045, "loss": 0.107, "theoretical_loss": 4.38373910910208, "tokens_seen": 200015872 }, { "epoch": 0.06, "learning_rate": 0.0009488043652704221, "loss": 0.1056, "theoretical_loss": 4.383025441349063, "tokens_seen": 200278016 }, { "epoch": 0.06, "learning_rate": 0.0009487241213288396, "loss": 0.1099, "theoretical_loss": 4.382312968269276, "tokens_seen": 200540160 }, { "epoch": 0.06, "learning_rate": 0.0009486438773872573, "loss": 0.105, "theoretical_loss": 4.381601686305098, "tokens_seen": 200802304 }, { "epoch": 0.06, "learning_rate": 0.0009485636334456749, "loss": 0.1039, "theoretical_loss": 4.38089159191413, "tokens_seen": 201064448 }, { "epoch": 0.06, "learning_rate": 0.0009484833895040924, "loss": 0.1014, "theoretical_loss": 4.380182681569111, "tokens_seen": 201326592 }, { "epoch": 0.06, "learning_rate": 0.0009484031455625101, "loss": 0.1062, "theoretical_loss": 4.379474951757829, "tokens_seen": 201588736 }, { "epoch": 0.06, "learning_rate": 0.0009483229016209276, "loss": 0.107, "theoretical_loss": 4.378768398983042, "tokens_seen": 201850880 }, { "epoch": 0.06, "learning_rate": 0.0009482426576793453, "loss": 0.1042, "theoretical_loss": 4.378063019762392, "tokens_seen": 202113024 }, { "epoch": 0.06, "learning_rate": 0.0009481624137377628, "loss": 0.105, "theoretical_loss": 4.377358810628324, "tokens_seen": 202375168 }, { "epoch": 0.06, "learning_rate": 0.0009480821697961804, "loss": 0.1045, "theoretical_loss": 4.3766557681280025, "tokens_seen": 202637312 }, { "epoch": 0.06, "learning_rate": 0.000948001925854598, "loss": 0.1059, "theoretical_loss": 4.375953888823233, "tokens_seen": 202899456 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.0012575258733704686, "objective/train/docs_used": 80605, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.184474229812622, "objective/train/original_loss": 2.184473991394043, "objective/train/theoretical_loss": 4.375253169290376, "objective/train/tokens_used": 223621600, "objective/train/value_avg": -0.01114654541015625, "objective/train/value_loss": 0.000553421676158905, "objective/train/value_max": -0.0007014274597167969, "objective/train/value_min": -0.646484375, "objective/train/value_reward_corr": 0.5880496589440656, "objective/train/value_std": 0.0163116455078125, "objective/train/weight_avg": 1.0015053749084473, "objective/train/weighted_lm_loss": 2.1872315406799316, "objective/train/weights_max": 1.1879366636276245, "objective/train/weights_min": 0.3710477352142334, "theoretical_loss": 4.375253169290376, "tokens_seen": 203161600 }, { "epoch": 0.06, "learning_rate": 0.0009479216819130155, "loss": 0.1046, "theoretical_loss": 4.375253169290376, "tokens_seen": 203161600 }, { "epoch": 0.06, "learning_rate": 0.0009478414379714331, "loss": 0.1057, "theoretical_loss": 4.374553606120274, "tokens_seen": 203423744 }, { "epoch": 0.06, "learning_rate": 0.0009477611940298508, "loss": 0.1046, "theoretical_loss": 4.373855195918162, "tokens_seen": 203685888 }, { "epoch": 0.06, "learning_rate": 0.0009476809500882684, "loss": 0.1076, "theoretical_loss": 4.3731579353036, "tokens_seen": 203948032 }, { "epoch": 0.06, "learning_rate": 0.0009476007061466859, "loss": 0.1036, "theoretical_loss": 4.372461820910382, "tokens_seen": 204210176 }, { "epoch": 0.06, "learning_rate": 0.0009475204622051036, "loss": 0.1063, "theoretical_loss": 4.371766849386468, "tokens_seen": 204472320 }, { "epoch": 0.06, "learning_rate": 0.0009474402182635211, "loss": 0.1066, "theoretical_loss": 4.3710730173939005, "tokens_seen": 204734464 }, { "epoch": 0.06, "learning_rate": 0.0009473599743219387, "loss": 0.1061, "theoretical_loss": 4.370380321608731, "tokens_seen": 204996608 }, { "epoch": 0.06, "learning_rate": 0.0009472797303803563, "loss": 0.1077, "theoretical_loss": 4.369688758720937, "tokens_seen": 205258752 }, { "epoch": 0.06, "learning_rate": 0.0009471994864387738, "loss": 0.1037, "theoretical_loss": 4.368998325434355, "tokens_seen": 205520896 }, { "epoch": 0.06, "learning_rate": 0.0009471192424971916, "loss": 0.1023, "theoretical_loss": 4.3683090184666, "tokens_seen": 205783040 }, { "epoch": 0.06, "learning_rate": 0.0009470389985556091, "loss": 0.1015, "theoretical_loss": 4.367620834548987, "tokens_seen": 206045184 }, { "epoch": 0.06, "learning_rate": 0.0009469587546140267, "loss": 0.104, "theoretical_loss": 4.3669337704264635, "tokens_seen": 206307328 }, { "epoch": 0.06, "learning_rate": 0.0009468785106724443, "loss": 0.1068, "theoretical_loss": 4.366247822857533, "tokens_seen": 206569472 }, { "epoch": 0.06, "learning_rate": 0.0009467982667308619, "loss": 0.1048, "theoretical_loss": 4.365562988614176, "tokens_seen": 206831616 }, { "epoch": 0.06, "learning_rate": 0.0009467180227892794, "loss": 0.1034, "theoretical_loss": 4.364879264481787, "tokens_seen": 207093760 }, { "epoch": 0.06, "learning_rate": 0.000946637778847697, "loss": 0.1035, "theoretical_loss": 4.364196647259092, "tokens_seen": 207355904 }, { "epoch": 0.06, "learning_rate": 0.0009465575349061146, "loss": 0.1061, "theoretical_loss": 4.363515133758084, "tokens_seen": 207618048 }, { "epoch": 0.06, "learning_rate": 0.0009464772909645321, "loss": 0.1075, "theoretical_loss": 4.3628347208039475, "tokens_seen": 207880192 }, { "epoch": 0.06, "learning_rate": 0.0009463970470229499, "loss": 0.1018, "theoretical_loss": 4.362155405234985, "tokens_seen": 208142336 }, { "epoch": 0.06, "learning_rate": 0.0009463168030813674, "loss": 0.109, "theoretical_loss": 4.361477183902554, "tokens_seen": 208404480 }, { "epoch": 0.06, "learning_rate": 0.000946236559139785, "loss": 0.1052, "theoretical_loss": 4.360800053670989, "tokens_seen": 208666624 }, { "epoch": 0.06, "learning_rate": 0.0009461563151982026, "loss": 0.1031, "theoretical_loss": 4.360124011417536, "tokens_seen": 208928768 }, { "epoch": 0.06, "learning_rate": 0.0009460760712566201, "loss": 0.1047, "theoretical_loss": 4.359449054032282, "tokens_seen": 209190912 }, { "epoch": 0.06, "learning_rate": 0.0009459958273150377, "loss": 0.1042, "theoretical_loss": 4.358775178418089, "tokens_seen": 209453056 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.0007151683676056564, "objective/train/docs_used": 82917, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.0514161586761475, "objective/train/original_loss": 2.0514163970947266, "objective/train/theoretical_loss": 4.358102381490517, "objective/train/tokens_used": 230175200, "objective/train/value_avg": -0.01009368896484375, "objective/train/value_loss": 0.0004249107150826603, "objective/train/value_max": -0.0008263587951660156, "objective/train/value_min": -0.296142578125, "objective/train/value_reward_corr": 0.48272459031421056, "objective/train/value_std": 0.01027679443359375, "objective/train/weight_avg": 1.000907063484192, "objective/train/weighted_lm_loss": 2.0531537532806396, "objective/train/weights_max": 1.2134668827056885, "objective/train/weights_min": 0.370398610830307, "theoretical_loss": 4.358102381490517, "tokens_seen": 209715200 }, { "epoch": 0.06, "learning_rate": 0.0009459155833734553, "loss": 0.1048, "theoretical_loss": 4.358102381490517, "tokens_seen": 209715200 }, { "epoch": 0.06, "learning_rate": 0.0009458353394318729, "loss": 0.1051, "theoretical_loss": 4.3574306601777675, "tokens_seen": 209977344 }, { "epoch": 0.06, "learning_rate": 0.0009457550954902905, "loss": 0.1009, "theoretical_loss": 4.356760011420608, "tokens_seen": 210239488 }, { "epoch": 0.06, "learning_rate": 0.0009456748515487081, "loss": 0.1085, "theoretical_loss": 4.3560904321723095, "tokens_seen": 210501632 }, { "epoch": 0.06, "learning_rate": 0.0009455946076071257, "loss": 0.1056, "theoretical_loss": 4.355421919398576, "tokens_seen": 210763776 }, { "epoch": 0.06, "learning_rate": 0.0009455143636655433, "loss": 0.1054, "theoretical_loss": 4.354754470077481, "tokens_seen": 211025920 }, { "epoch": 0.06, "learning_rate": 0.0009454341197239609, "loss": 0.1029, "theoretical_loss": 4.354088081199402, "tokens_seen": 211288064 }, { "epoch": 0.06, "learning_rate": 0.0009453538757823784, "loss": 0.1038, "theoretical_loss": 4.3534227497669535, "tokens_seen": 211550208 }, { "epoch": 0.06, "learning_rate": 0.0009452736318407961, "loss": 0.1031, "theoretical_loss": 4.352758472794923, "tokens_seen": 211812352 }, { "epoch": 0.06, "learning_rate": 0.0009451933878992136, "loss": 0.1055, "theoretical_loss": 4.352095247310208, "tokens_seen": 212074496 }, { "epoch": 0.06, "learning_rate": 0.0009451131439576312, "loss": 0.1042, "theoretical_loss": 4.351433070351748, "tokens_seen": 212336640 }, { "epoch": 0.06, "learning_rate": 0.0009450329000160488, "loss": 0.1056, "theoretical_loss": 4.350771938970466, "tokens_seen": 212598784 }, { "epoch": 0.06, "learning_rate": 0.0009449526560744663, "loss": 0.1022, "theoretical_loss": 4.350111850229202, "tokens_seen": 212860928 }, { "epoch": 0.06, "learning_rate": 0.000944872412132884, "loss": 0.101, "theoretical_loss": 4.34945280120265, "tokens_seen": 213123072 }, { "epoch": 0.06, "learning_rate": 0.0009447921681913016, "loss": 0.1025, "theoretical_loss": 4.348794788977298, "tokens_seen": 213385216 }, { "epoch": 0.06, "learning_rate": 0.0009447119242497192, "loss": 0.1035, "theoretical_loss": 4.348137810651366, "tokens_seen": 213647360 }, { "epoch": 0.06, "learning_rate": 0.0009446316803081368, "loss": 0.1016, "theoretical_loss": 4.347481863334738, "tokens_seen": 213909504 }, { "epoch": 0.06, "learning_rate": 0.0009445514363665544, "loss": 0.104, "theoretical_loss": 4.346826944148912, "tokens_seen": 214171648 }, { "epoch": 0.06, "learning_rate": 0.0009444711924249719, "loss": 0.1048, "theoretical_loss": 4.3461730502269305, "tokens_seen": 214433792 }, { "epoch": 0.07, "learning_rate": 0.0009443909484833896, "loss": 0.1036, "theoretical_loss": 4.345520178713323, "tokens_seen": 214695936 }, { "epoch": 0.07, "learning_rate": 0.0009443107045418071, "loss": 0.1078, "theoretical_loss": 4.344868326764045, "tokens_seen": 214958080 }, { "epoch": 0.07, "learning_rate": 0.0009442304606002246, "loss": 0.1053, "theoretical_loss": 4.344217491546422, "tokens_seen": 215220224 }, { "epoch": 0.07, "learning_rate": 0.0009441502166586424, "loss": 0.1035, "theoretical_loss": 4.343567670239084, "tokens_seen": 215482368 }, { "epoch": 0.07, "learning_rate": 0.0009440699727170599, "loss": 0.1054, "theoretical_loss": 4.342918860031914, "tokens_seen": 215744512 }, { "epoch": 0.07, "learning_rate": 0.0009439897287754775, "loss": 0.1003, "theoretical_loss": 4.342271058125983, "tokens_seen": 216006656 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.0016028911340981722, "objective/train/docs_used": 85191, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.0327014923095703, "objective/train/original_loss": 2.032701253890991, "objective/train/theoretical_loss": 4.341624261733497, "objective/train/tokens_used": 236728800, "objective/train/value_avg": -0.00833892822265625, "objective/train/value_loss": 0.00031526113161817193, "objective/train/value_max": -0.0006666183471679688, "objective/train/value_min": -0.25830078125, "objective/train/value_reward_corr": 0.44630488005091173, "objective/train/value_std": 0.008331298828125, "objective/train/weight_avg": 1.0017452239990234, "objective/train/weighted_lm_loss": 2.0359926223754883, "objective/train/weights_max": 1.1291202306747437, "objective/train/weights_min": 0.36889123916625977, "theoretical_loss": 4.341624261733497, "tokens_seen": 216268800 }, { "epoch": 0.07, "learning_rate": 0.0009439094848338951, "loss": 0.1044, "theoretical_loss": 4.341624261733497, "tokens_seen": 216268800 }, { "epoch": 0.07, "learning_rate": 0.0009438292408923127, "loss": 0.1056, "theoretical_loss": 4.340978468077735, "tokens_seen": 216530944 }, { "epoch": 0.07, "learning_rate": 0.0009437489969507302, "loss": 0.1036, "theoretical_loss": 4.340333674392992, "tokens_seen": 216793088 }, { "epoch": 0.07, "learning_rate": 0.0009436687530091478, "loss": 0.105, "theoretical_loss": 4.339689877924531, "tokens_seen": 217055232 }, { "epoch": 0.07, "learning_rate": 0.0009435885090675654, "loss": 0.1036, "theoretical_loss": 4.3390470759285105, "tokens_seen": 217317376 }, { "epoch": 0.07, "learning_rate": 0.0009435082651259829, "loss": 0.1036, "theoretical_loss": 4.338405265671941, "tokens_seen": 217579520 }, { "epoch": 0.07, "learning_rate": 0.0009434280211844006, "loss": 0.099, "theoretical_loss": 4.337764444432625, "tokens_seen": 217841664 }, { "epoch": 0.07, "learning_rate": 0.0009433477772428182, "loss": 0.1036, "theoretical_loss": 4.337124609499101, "tokens_seen": 218103808 }, { "epoch": 0.07, "learning_rate": 0.0009432675333012359, "loss": 0.1, "theoretical_loss": 4.336485758170589, "tokens_seen": 218365952 }, { "epoch": 0.07, "learning_rate": 0.0009431872893596534, "loss": 0.1058, "theoretical_loss": 4.335847887756934, "tokens_seen": 218628096 }, { "epoch": 0.07, "learning_rate": 0.0009431070454180709, "loss": 0.1014, "theoretical_loss": 4.335210995578553, "tokens_seen": 218890240 }, { "epoch": 0.07, "learning_rate": 0.0009430268014764886, "loss": 0.1035, "theoretical_loss": 4.334575078966383, "tokens_seen": 219152384 }, { "epoch": 0.07, "learning_rate": 0.0009429465575349061, "loss": 0.1044, "theoretical_loss": 4.333940135261823, "tokens_seen": 219414528 }, { "epoch": 0.07, "learning_rate": 0.0009428663135933237, "loss": 0.1014, "theoretical_loss": 4.333306161816684, "tokens_seen": 219676672 }, { "epoch": 0.07, "learning_rate": 0.0009427860696517413, "loss": 0.1056, "theoretical_loss": 4.332673155993131, "tokens_seen": 219938816 }, { "epoch": 0.07, "learning_rate": 0.000942705825710159, "loss": 0.1012, "theoretical_loss": 4.332041115163636, "tokens_seen": 220200960 }, { "epoch": 0.07, "learning_rate": 0.0009426255817685765, "loss": 0.1045, "theoretical_loss": 4.331410036710925, "tokens_seen": 220463104 }, { "epoch": 0.07, "learning_rate": 0.0009425453378269941, "loss": 0.1026, "theoretical_loss": 4.330779918027919, "tokens_seen": 220725248 }, { "epoch": 0.07, "learning_rate": 0.0009424650938854117, "loss": 0.1044, "theoretical_loss": 4.330150756517692, "tokens_seen": 220987392 }, { "epoch": 0.07, "learning_rate": 0.0009423848499438292, "loss": 0.1022, "theoretical_loss": 4.3295225495934115, "tokens_seen": 221249536 }, { "epoch": 0.07, "learning_rate": 0.0009423046060022469, "loss": 0.0974, "theoretical_loss": 4.328895294678292, "tokens_seen": 221511680 }, { "epoch": 0.07, "learning_rate": 0.0009422243620606644, "loss": 0.1027, "theoretical_loss": 4.32826898920554, "tokens_seen": 221773824 }, { "epoch": 0.07, "learning_rate": 0.0009421441181190821, "loss": 0.1052, "theoretical_loss": 4.3276436306183115, "tokens_seen": 222035968 }, { "epoch": 0.07, "learning_rate": 0.0009420638741774996, "loss": 0.0996, "theoretical_loss": 4.327019216369651, "tokens_seen": 222298112 }, { "epoch": 0.07, "learning_rate": 0.0009419836302359171, "loss": 0.101, "theoretical_loss": 4.32639574392245, "tokens_seen": 222560256 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.002008789451792836, "objective/train/docs_used": 87601, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8882607221603394, "objective/train/original_loss": 1.8882604837417603, "objective/train/theoretical_loss": 4.325773210749392, "objective/train/tokens_used": 243282400, "objective/train/value_avg": -0.01070404052734375, "objective/train/value_loss": 0.0003292278270237148, "objective/train/value_max": -0.0006799697875976562, "objective/train/value_min": -0.82373046875, "objective/train/value_reward_corr": 0.602646911223948, "objective/train/value_std": 0.01348876953125, "objective/train/weight_avg": 1.002161979675293, "objective/train/weighted_lm_loss": 1.892277717590332, "objective/train/weights_max": 1.4122933149337769, "objective/train/weights_min": 0.370164155960083, "theoretical_loss": 4.325773210749392, "tokens_seen": 222822400 }, { "epoch": 0.07, "learning_rate": 0.0009419033862943349, "loss": 0.0975, "theoretical_loss": 4.325773210749392, "tokens_seen": 222822400 }, { "epoch": 0.07, "learning_rate": 0.0009418231423527524, "loss": 0.1018, "theoretical_loss": 4.325151614332908, "tokens_seen": 223084544 }, { "epoch": 0.07, "learning_rate": 0.00094174289841117, "loss": 0.1032, "theoretical_loss": 4.3245309521651265, "tokens_seen": 223346688 }, { "epoch": 0.07, "learning_rate": 0.0009416626544695876, "loss": 0.103, "theoretical_loss": 4.323911221747817, "tokens_seen": 223608832 }, { "epoch": 0.07, "learning_rate": 0.0009415824105280052, "loss": 0.1049, "theoretical_loss": 4.323292420592356, "tokens_seen": 223870976 }, { "epoch": 0.07, "learning_rate": 0.0009415021665864227, "loss": 0.1024, "theoretical_loss": 4.322674546219666, "tokens_seen": 224133120 }, { "epoch": 0.07, "learning_rate": 0.0009414219226448404, "loss": 0.1029, "theoretical_loss": 4.322057596160174, "tokens_seen": 224395264 }, { "epoch": 0.07, "learning_rate": 0.0009413416787032579, "loss": 0.0989, "theoretical_loss": 4.321441567953762, "tokens_seen": 224657408 }, { "epoch": 0.07, "learning_rate": 0.0009412614347616754, "loss": 0.1013, "theoretical_loss": 4.320826459149725, "tokens_seen": 224919552 }, { "epoch": 0.07, "learning_rate": 0.0009411811908200932, "loss": 0.1075, "theoretical_loss": 4.3202122673067125, "tokens_seen": 225181696 }, { "epoch": 0.07, "learning_rate": 0.0009411009468785107, "loss": 0.1004, "theoretical_loss": 4.319598989992695, "tokens_seen": 225443840 }, { "epoch": 0.07, "learning_rate": 0.0009410207029369283, "loss": 0.1046, "theoretical_loss": 4.318986624784908, "tokens_seen": 225705984 }, { "epoch": 0.07, "learning_rate": 0.0009409404589953459, "loss": 0.103, "theoretical_loss": 4.318375169269813, "tokens_seen": 225968128 }, { "epoch": 0.07, "learning_rate": 0.0009408602150537635, "loss": 0.1013, "theoretical_loss": 4.317764621043046, "tokens_seen": 226230272 }, { "epoch": 0.07, "learning_rate": 0.0009407799711121811, "loss": 0.0995, "theoretical_loss": 4.317154977709375, "tokens_seen": 226492416 }, { "epoch": 0.07, "learning_rate": 0.0009406997271705986, "loss": 0.0999, "theoretical_loss": 4.3165462368826555, "tokens_seen": 226754560 }, { "epoch": 0.07, "learning_rate": 0.0009406194832290162, "loss": 0.0995, "theoretical_loss": 4.315938396185782, "tokens_seen": 227016704 }, { "epoch": 0.07, "learning_rate": 0.0009405392392874338, "loss": 0.1, "theoretical_loss": 4.315331453250648, "tokens_seen": 227278848 }, { "epoch": 0.07, "learning_rate": 0.0009404589953458514, "loss": 0.0992, "theoretical_loss": 4.314725405718099, "tokens_seen": 227540992 }, { "epoch": 0.07, "learning_rate": 0.000940378751404269, "loss": 0.0997, "theoretical_loss": 4.314120251237887, "tokens_seen": 227803136 }, { "epoch": 0.07, "learning_rate": 0.0009402985074626867, "loss": 0.1027, "theoretical_loss": 4.31351598746863, "tokens_seen": 228065280 }, { "epoch": 0.07, "learning_rate": 0.0009402182635211042, "loss": 0.1014, "theoretical_loss": 4.312912612077767, "tokens_seen": 228327424 }, { "epoch": 0.07, "learning_rate": 0.0009401380195795217, "loss": 0.1017, "theoretical_loss": 4.312310122741512, "tokens_seen": 228589568 }, { "epoch": 0.07, "learning_rate": 0.0009400577756379394, "loss": 0.1026, "theoretical_loss": 4.311708517144817, "tokens_seen": 228851712 }, { "epoch": 0.07, "learning_rate": 0.0009399775316963569, "loss": 0.102, "theoretical_loss": 4.311107792981323, "tokens_seen": 229113856 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.0002358820493100211, "objective/train/docs_used": 90061, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.085197687149048, "objective/train/original_loss": 2.085197925567627, "objective/train/theoretical_loss": 4.310507947953321, "objective/train/tokens_used": 249836000, "objective/train/value_avg": -0.0097808837890625, "objective/train/value_loss": 0.0005574537208303809, "objective/train/value_max": -0.0007238388061523438, "objective/train/value_min": -0.52783203125, "objective/train/value_reward_corr": 0.7022453186963163, "objective/train/value_std": 0.01198577880859375, "objective/train/weight_avg": 1.0004955530166626, "objective/train/weighted_lm_loss": 2.0866754055023193, "objective/train/weights_max": 1.2493910789489746, "objective/train/weights_min": 0.37043818831443787, "theoretical_loss": 4.310507947953321, "tokens_seen": 229376000 }, { "epoch": 0.07, "learning_rate": 0.0009398972877547745, "loss": 0.103, "theoretical_loss": 4.310507947953321, "tokens_seen": 229376000 }, { "epoch": 0.07, "learning_rate": 0.0009398170438131921, "loss": 0.1039, "theoretical_loss": 4.309908979771709, "tokens_seen": 229638144 }, { "epoch": 0.07, "learning_rate": 0.0009397367998716097, "loss": 0.1039, "theoretical_loss": 4.3093108861559495, "tokens_seen": 229900288 }, { "epoch": 0.07, "learning_rate": 0.0009396565559300272, "loss": 0.1018, "theoretical_loss": 4.308713664834029, "tokens_seen": 230162432 }, { "epoch": 0.07, "learning_rate": 0.0009395763119884449, "loss": 0.0956, "theoretical_loss": 4.308117313542413, "tokens_seen": 230424576 }, { "epoch": 0.07, "learning_rate": 0.0009394960680468625, "loss": 0.102, "theoretical_loss": 4.30752183002601, "tokens_seen": 230686720 }, { "epoch": 0.07, "learning_rate": 0.0009394158241052801, "loss": 0.0988, "theoretical_loss": 4.3069272120381275, "tokens_seen": 230948864 }, { "epoch": 0.07, "learning_rate": 0.0009393355801636977, "loss": 0.102, "theoretical_loss": 4.30633345734043, "tokens_seen": 231211008 }, { "epoch": 0.07, "learning_rate": 0.0009392553362221152, "loss": 0.0999, "theoretical_loss": 4.3057405637029, "tokens_seen": 231473152 }, { "epoch": 0.07, "learning_rate": 0.0009391750922805329, "loss": 0.1013, "theoretical_loss": 4.305148528903798, "tokens_seen": 231735296 }, { "epoch": 0.07, "learning_rate": 0.0009390948483389504, "loss": 0.1018, "theoretical_loss": 4.304557350729623, "tokens_seen": 231997440 }, { "epoch": 0.07, "learning_rate": 0.0009390146043973679, "loss": 0.0998, "theoretical_loss": 4.303967026975072, "tokens_seen": 232259584 }, { "epoch": 0.07, "learning_rate": 0.0009389343604557857, "loss": 0.1022, "theoretical_loss": 4.303377555442998, "tokens_seen": 232521728 }, { "epoch": 0.07, "learning_rate": 0.0009388541165142032, "loss": 0.0975, "theoretical_loss": 4.302788933944375, "tokens_seen": 232783872 }, { "epoch": 0.07, "learning_rate": 0.0009387738725726208, "loss": 0.1006, "theoretical_loss": 4.302201160298255, "tokens_seen": 233046016 }, { "epoch": 0.07, "learning_rate": 0.0009386936286310384, "loss": 0.101, "theoretical_loss": 4.301614232331733, "tokens_seen": 233308160 }, { "epoch": 0.07, "learning_rate": 0.000938613384689456, "loss": 0.1004, "theoretical_loss": 4.301028147879904, "tokens_seen": 233570304 }, { "epoch": 0.07, "learning_rate": 0.0009385331407478735, "loss": 0.0979, "theoretical_loss": 4.300442904785831, "tokens_seen": 233832448 }, { "epoch": 0.07, "learning_rate": 0.0009384528968062911, "loss": 0.1012, "theoretical_loss": 4.299858500900495, "tokens_seen": 234094592 }, { "epoch": 0.07, "learning_rate": 0.0009383726528647087, "loss": 0.1021, "theoretical_loss": 4.2992749340827725, "tokens_seen": 234356736 }, { "epoch": 0.07, "learning_rate": 0.0009382924089231262, "loss": 0.1026, "theoretical_loss": 4.298692202199386, "tokens_seen": 234618880 }, { "epoch": 0.07, "learning_rate": 0.000938212164981544, "loss": 0.1007, "theoretical_loss": 4.298110303124871, "tokens_seen": 234881024 }, { "epoch": 0.07, "learning_rate": 0.0009381319210399615, "loss": 0.102, "theoretical_loss": 4.29752923474154, "tokens_seen": 235143168 }, { "epoch": 0.07, "learning_rate": 0.0009380516770983792, "loss": 0.1003, "theoretical_loss": 4.29694899493944, "tokens_seen": 235405312 }, { "epoch": 0.07, "learning_rate": 0.0009379714331567967, "loss": 0.1013, "theoretical_loss": 4.2963695816163225, "tokens_seen": 235667456 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.001282443176023662, "objective/train/docs_used": 92471, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.023468494415283, "objective/train/original_loss": 2.023468494415283, "objective/train/theoretical_loss": 4.295790992677603, "objective/train/tokens_used": 256389600, "objective/train/value_avg": -0.00916290283203125, "objective/train/value_loss": 0.00042869814205914736, "objective/train/value_max": -0.0003845691680908203, "objective/train/value_min": -0.7470703125, "objective/train/value_reward_corr": 0.596518820655821, "objective/train/value_std": 0.0174102783203125, "objective/train/weight_avg": 1.0014816522598267, "objective/train/weighted_lm_loss": 2.02492618560791, "objective/train/weights_max": 1.7296583652496338, "objective/train/weights_min": 0.3692615330219269, "theoretical_loss": 4.295790992677603, "tokens_seen": 235929600 }, { "epoch": 0.07, "learning_rate": 0.0009378911892152143, "loss": 0.0992, "theoretical_loss": 4.295790992677603, "tokens_seen": 235929600 }, { "epoch": 0.07, "learning_rate": 0.0009378109452736319, "loss": 0.1008, "theoretical_loss": 4.2952132260363225, "tokens_seen": 236191744 }, { "epoch": 0.07, "learning_rate": 0.0009377307013320494, "loss": 0.0992, "theoretical_loss": 4.294636279613117, "tokens_seen": 236453888 }, { "epoch": 0.07, "learning_rate": 0.000937650457390467, "loss": 0.1033, "theoretical_loss": 4.294060151336178, "tokens_seen": 236716032 }, { "epoch": 0.07, "learning_rate": 0.0009375702134488846, "loss": 0.1017, "theoretical_loss": 4.293484839141217, "tokens_seen": 236978176 }, { "epoch": 0.07, "learning_rate": 0.0009374899695073022, "loss": 0.0994, "theoretical_loss": 4.29291034097143, "tokens_seen": 237240320 }, { "epoch": 0.07, "learning_rate": 0.0009374097255657198, "loss": 0.0964, "theoretical_loss": 4.2923366547774595, "tokens_seen": 237502464 }, { "epoch": 0.07, "learning_rate": 0.0009373294816241375, "loss": 0.0991, "theoretical_loss": 4.2917637785173675, "tokens_seen": 237764608 }, { "epoch": 0.07, "learning_rate": 0.000937249237682555, "loss": 0.1, "theoretical_loss": 4.291191710156591, "tokens_seen": 238026752 }, { "epoch": 0.07, "learning_rate": 0.0009371689937409725, "loss": 0.0996, "theoretical_loss": 4.290620447667912, "tokens_seen": 238288896 }, { "epoch": 0.07, "learning_rate": 0.0009370887497993902, "loss": 0.0973, "theoretical_loss": 4.290049989031424, "tokens_seen": 238551040 }, { "epoch": 0.07, "learning_rate": 0.0009370085058578077, "loss": 0.1026, "theoretical_loss": 4.289480332234493, "tokens_seen": 238813184 }, { "epoch": 0.07, "learning_rate": 0.0009369282619162254, "loss": 0.1009, "theoretical_loss": 4.288911475271731, "tokens_seen": 239075328 }, { "epoch": 0.07, "learning_rate": 0.0009368480179746429, "loss": 0.0999, "theoretical_loss": 4.288343416144952, "tokens_seen": 239337472 }, { "epoch": 0.07, "learning_rate": 0.0009367677740330605, "loss": 0.1019, "theoretical_loss": 4.287776152863146, "tokens_seen": 239599616 }, { "epoch": 0.07, "learning_rate": 0.0009366875300914782, "loss": 0.1016, "theoretical_loss": 4.287209683442444, "tokens_seen": 239861760 }, { "epoch": 0.07, "learning_rate": 0.0009366072861498957, "loss": 0.1001, "theoretical_loss": 4.286644005906081, "tokens_seen": 240123904 }, { "epoch": 0.07, "learning_rate": 0.0009365270422083133, "loss": 0.1011, "theoretical_loss": 4.286079118284368, "tokens_seen": 240386048 }, { "epoch": 0.07, "learning_rate": 0.0009364467982667309, "loss": 0.1036, "theoretical_loss": 4.285515018614655, "tokens_seen": 240648192 }, { "epoch": 0.07, "learning_rate": 0.0009363665543251485, "loss": 0.0961, "theoretical_loss": 4.2849517049412995, "tokens_seen": 240910336 }, { "epoch": 0.07, "learning_rate": 0.000936286310383566, "loss": 0.0975, "theoretical_loss": 4.284389175315636, "tokens_seen": 241172480 }, { "epoch": 0.07, "learning_rate": 0.0009362060664419837, "loss": 0.0993, "theoretical_loss": 4.283827427795939, "tokens_seen": 241434624 }, { "epoch": 0.07, "learning_rate": 0.0009361258225004012, "loss": 0.0984, "theoretical_loss": 4.283266460447394, "tokens_seen": 241696768 }, { "epoch": 0.07, "learning_rate": 0.0009360455785588187, "loss": 0.0963, "theoretical_loss": 4.282706271342066, "tokens_seen": 241958912 }, { "epoch": 0.07, "learning_rate": 0.0009359653346172365, "loss": 0.1022, "theoretical_loss": 4.282146858558866, "tokens_seen": 242221056 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.0023586128372699022, "objective/train/docs_used": 94878, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.9628684520721436, "objective/train/original_loss": 1.9628684520721436, "objective/train/theoretical_loss": 4.281588220183519, "objective/train/tokens_used": 262943200, "objective/train/value_avg": -0.010406494140625, "objective/train/value_loss": 0.00042588188080117106, "objective/train/value_max": -0.0003249645233154297, "objective/train/value_min": -0.403564453125, "objective/train/value_reward_corr": 0.6426931088980384, "objective/train/value_std": 0.01442718505859375, "objective/train/weight_avg": 1.0025511980056763, "objective/train/weighted_lm_loss": 1.9680471420288086, "objective/train/weights_max": 1.2811800241470337, "objective/train/weights_min": 0.3709741532802582, "theoretical_loss": 4.281588220183519, "tokens_seen": 242483200 }, { "epoch": 0.07, "learning_rate": 0.000935885090675654, "loss": 0.0976, "theoretical_loss": 4.281588220183519, "tokens_seen": 242483200 }, { "epoch": 0.07, "learning_rate": 0.0009358048467340716, "loss": 0.0992, "theoretical_loss": 4.281030354308533, "tokens_seen": 242745344 }, { "epoch": 0.07, "learning_rate": 0.0009357246027924892, "loss": 0.0984, "theoretical_loss": 4.280473259033169, "tokens_seen": 243007488 }, { "epoch": 0.07, "learning_rate": 0.0009356443588509068, "loss": 0.0976, "theoretical_loss": 4.27991693246341, "tokens_seen": 243269632 }, { "epoch": 0.07, "learning_rate": 0.0009355641149093244, "loss": 0.0983, "theoretical_loss": 4.279361372711923, "tokens_seen": 243531776 }, { "epoch": 0.07, "learning_rate": 0.0009354838709677419, "loss": 0.1021, "theoretical_loss": 4.278806577898042, "tokens_seen": 243793920 }, { "epoch": 0.07, "learning_rate": 0.0009354036270261595, "loss": 0.0976, "theoretical_loss": 4.278252546147724, "tokens_seen": 244056064 }, { "epoch": 0.07, "learning_rate": 0.0009353233830845771, "loss": 0.0992, "theoretical_loss": 4.277699275593523, "tokens_seen": 244318208 }, { "epoch": 0.07, "learning_rate": 0.0009352431391429948, "loss": 0.0946, "theoretical_loss": 4.277146764374566, "tokens_seen": 244580352 }, { "epoch": 0.07, "learning_rate": 0.0009351628952014123, "loss": 0.0996, "theoretical_loss": 4.276595010636514, "tokens_seen": 244842496 }, { "epoch": 0.07, "learning_rate": 0.00093508265125983, "loss": 0.0991, "theoretical_loss": 4.276044012531534, "tokens_seen": 245104640 }, { "epoch": 0.07, "learning_rate": 0.0009350024073182475, "loss": 0.1007, "theoretical_loss": 4.275493768218274, "tokens_seen": 245366784 }, { "epoch": 0.07, "learning_rate": 0.0009349221633766651, "loss": 0.0973, "theoretical_loss": 4.274944275861828, "tokens_seen": 245628928 }, { "epoch": 0.07, "learning_rate": 0.0009348419194350827, "loss": 0.0975, "theoretical_loss": 4.274395533633712, "tokens_seen": 245891072 }, { "epoch": 0.07, "learning_rate": 0.0009347616754935002, "loss": 0.0955, "theoretical_loss": 4.273847539711825, "tokens_seen": 246153216 }, { "epoch": 0.07, "learning_rate": 0.0009346814315519178, "loss": 0.1006, "theoretical_loss": 4.273300292280435, "tokens_seen": 246415360 }, { "epoch": 0.07, "learning_rate": 0.0009346011876103354, "loss": 0.0985, "theoretical_loss": 4.272753789530134, "tokens_seen": 246677504 }, { "epoch": 0.07, "learning_rate": 0.000934520943668753, "loss": 0.0955, "theoretical_loss": 4.272208029657822, "tokens_seen": 246939648 }, { "epoch": 0.07, "learning_rate": 0.0009344406997271707, "loss": 0.0968, "theoretical_loss": 4.271663010866669, "tokens_seen": 247201792 }, { "epoch": 0.07, "learning_rate": 0.0009343604557855883, "loss": 0.0999, "theoretical_loss": 4.2711187313660925, "tokens_seen": 247463936 }, { "epoch": 0.08, "learning_rate": 0.0009342802118440058, "loss": 0.0981, "theoretical_loss": 4.270575189371727, "tokens_seen": 247726080 }, { "epoch": 0.08, "learning_rate": 0.0009341999679024234, "loss": 0.0988, "theoretical_loss": 4.270032383105398, "tokens_seen": 247988224 }, { "epoch": 0.08, "learning_rate": 0.000934119723960841, "loss": 0.097, "theoretical_loss": 4.269490310795089, "tokens_seen": 248250368 }, { "epoch": 0.08, "learning_rate": 0.0009340394800192585, "loss": 0.1021, "theoretical_loss": 4.268948970674917, "tokens_seen": 248512512 }, { "epoch": 0.08, "learning_rate": 0.0009339592360776762, "loss": 0.101, "theoretical_loss": 4.268408360985109, "tokens_seen": 248774656 }, { "epoch": 0.08, "objective/train/advantage_avg": 5.6569744629086927e-05, "objective/train/docs_used": 97104, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.9197639226913452, "objective/train/original_loss": 1.9197638034820557, "objective/train/theoretical_loss": 4.267868479971966, "objective/train/tokens_used": 269496800, "objective/train/value_avg": -0.00948333740234375, "objective/train/value_loss": 0.0004118235665373504, "objective/train/value_max": -0.0005464553833007812, "objective/train/value_min": -0.8017578125, "objective/train/value_reward_corr": 0.6645390257593566, "objective/train/value_std": 0.015777587890625, "objective/train/weight_avg": 1.0002477169036865, "objective/train/weighted_lm_loss": 1.9196513891220093, "objective/train/weights_max": 1.9581830501556396, "objective/train/weights_min": 0.38205578923225403, "theoretical_loss": 4.267868479971966, "tokens_seen": 249036800 }, { "epoch": 0.08, "learning_rate": 0.0009338789921360937, "loss": 0.0993, "theoretical_loss": 4.267868479971966, "tokens_seen": 249036800 }, { "epoch": 0.08, "learning_rate": 0.0009337987481945113, "loss": 0.0985, "theoretical_loss": 4.267329325887841, "tokens_seen": 249298944 }, { "epoch": 0.08, "learning_rate": 0.000933718504252929, "loss": 0.0921, "theoretical_loss": 4.266790896991109, "tokens_seen": 249561088 }, { "epoch": 0.08, "learning_rate": 0.0009336382603113465, "loss": 0.1, "theoretical_loss": 4.266253191546146, "tokens_seen": 249823232 }, { "epoch": 0.08, "learning_rate": 0.0009335580163697641, "loss": 0.0989, "theoretical_loss": 4.265716207823292, "tokens_seen": 250085376 }, { "epoch": 0.08, "learning_rate": 0.0009334777724281817, "loss": 0.099, "theoretical_loss": 4.2651799440988345, "tokens_seen": 250347520 }, { "epoch": 0.08, "learning_rate": 0.0009333975284865993, "loss": 0.0971, "theoretical_loss": 4.2646443986549745, "tokens_seen": 250609664 }, { "epoch": 0.08, "learning_rate": 0.0009333172845450168, "loss": 0.0986, "theoretical_loss": 4.264109569779803, "tokens_seen": 250871808 }, { "epoch": 0.08, "learning_rate": 0.0009332370406034345, "loss": 0.1009, "theoretical_loss": 4.263575455767277, "tokens_seen": 251133952 }, { "epoch": 0.08, "learning_rate": 0.000933156796661852, "loss": 0.0992, "theoretical_loss": 4.263042054917186, "tokens_seen": 251396096 }, { "epoch": 0.08, "learning_rate": 0.0009330765527202696, "loss": 0.0989, "theoretical_loss": 4.262509365535134, "tokens_seen": 251658240 }, { "epoch": 0.08, "learning_rate": 0.0009329963087786873, "loss": 0.0963, "theoretical_loss": 4.261977385932512, "tokens_seen": 251920384 }, { "epoch": 0.08, "learning_rate": 0.0009329160648371048, "loss": 0.0979, "theoretical_loss": 4.261446114426466, "tokens_seen": 252182528 }, { "epoch": 0.08, "learning_rate": 0.0009328358208955225, "loss": 0.097, "theoretical_loss": 4.260915549339879, "tokens_seen": 252444672 }, { "epoch": 0.08, "learning_rate": 0.00093275557695394, "loss": 0.0967, "theoretical_loss": 4.2603856890013425, "tokens_seen": 252706816 }, { "epoch": 0.08, "learning_rate": 0.0009326753330123576, "loss": 0.0988, "theoretical_loss": 4.25985653174513, "tokens_seen": 252968960 }, { "epoch": 0.08, "learning_rate": 0.0009325950890707752, "loss": 0.0945, "theoretical_loss": 4.259328075911173, "tokens_seen": 253231104 }, { "epoch": 0.08, "learning_rate": 0.0009325148451291927, "loss": 0.0985, "theoretical_loss": 4.258800319845038, "tokens_seen": 253493248 }, { "epoch": 0.08, "learning_rate": 0.0009324346011876103, "loss": 0.0971, "theoretical_loss": 4.258273261897896, "tokens_seen": 253755392 }, { "epoch": 0.08, "learning_rate": 0.0009323543572460279, "loss": 0.0967, "theoretical_loss": 4.257746900426506, "tokens_seen": 254017536 }, { "epoch": 0.08, "learning_rate": 0.0009322741133044456, "loss": 0.097, "theoretical_loss": 4.25722123379318, "tokens_seen": 254279680 }, { "epoch": 0.08, "learning_rate": 0.0009321938693628631, "loss": 0.1006, "theoretical_loss": 4.256696260365768, "tokens_seen": 254541824 }, { "epoch": 0.08, "learning_rate": 0.0009321136254212808, "loss": 0.0994, "theoretical_loss": 4.256171978517629, "tokens_seen": 254803968 }, { "epoch": 0.08, "learning_rate": 0.0009320333814796983, "loss": 0.0948, "theoretical_loss": 4.255648386627607, "tokens_seen": 255066112 }, { "epoch": 0.08, "learning_rate": 0.000931953137538116, "loss": 0.0982, "theoretical_loss": 4.255125483080007, "tokens_seen": 255328256 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.0025481332559138536, "objective/train/docs_used": 99403, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.921761393547058, "objective/train/original_loss": 1.9217615127563477, "objective/train/theoretical_loss": 4.254603266264572, "objective/train/tokens_used": 276050400, "objective/train/value_avg": -0.008270263671875, "objective/train/value_loss": 0.000349750422174111, "objective/train/value_max": -0.0004355907440185547, "objective/train/value_min": -0.50537109375, "objective/train/value_reward_corr": 0.5029395423614991, "objective/train/value_std": 0.01042938232421875, "objective/train/weight_avg": 1.0027012825012207, "objective/train/weighted_lm_loss": 1.9272537231445312, "objective/train/weights_max": 1.5571717023849487, "objective/train/weights_min": 0.36996087431907654, "theoretical_loss": 4.254603266264572, "tokens_seen": 255590400 }, { "epoch": 0.08, "learning_rate": 0.0009318728935965335, "loss": 0.0981, "theoretical_loss": 4.254603266264572, "tokens_seen": 255590400 }, { "epoch": 0.08, "learning_rate": 0.000931792649654951, "loss": 0.0957, "theoretical_loss": 4.254081734576458, "tokens_seen": 255852544 }, { "epoch": 0.08, "learning_rate": 0.0009317124057133687, "loss": 0.0963, "theoretical_loss": 4.253560886416212, "tokens_seen": 256114688 }, { "epoch": 0.08, "learning_rate": 0.0009316321617717862, "loss": 0.0968, "theoretical_loss": 4.253040720189746, "tokens_seen": 256376832 }, { "epoch": 0.08, "learning_rate": 0.0009315519178302038, "loss": 0.0969, "theoretical_loss": 4.252521234308315, "tokens_seen": 256638976 }, { "epoch": 0.08, "learning_rate": 0.0009314716738886215, "loss": 0.0961, "theoretical_loss": 4.2520024271884935, "tokens_seen": 256901120 }, { "epoch": 0.08, "learning_rate": 0.0009313914299470391, "loss": 0.0978, "theoretical_loss": 4.251484297252151, "tokens_seen": 257163264 }, { "epoch": 0.08, "learning_rate": 0.0009313111860054566, "loss": 0.1015, "theoretical_loss": 4.250966842926434, "tokens_seen": 257425408 }, { "epoch": 0.08, "learning_rate": 0.0009312309420638742, "loss": 0.0991, "theoretical_loss": 4.250450062643734, "tokens_seen": 257687552 }, { "epoch": 0.08, "learning_rate": 0.0009311506981222918, "loss": 0.095, "theoretical_loss": 4.249933954841672, "tokens_seen": 257949696 }, { "epoch": 0.08, "learning_rate": 0.0009310704541807093, "loss": 0.1023, "theoretical_loss": 4.2494185179630755, "tokens_seen": 258211840 }, { "epoch": 0.08, "learning_rate": 0.000930990210239127, "loss": 0.0993, "theoretical_loss": 4.24890375045595, "tokens_seen": 258473984 }, { "epoch": 0.08, "learning_rate": 0.0009309099662975445, "loss": 0.0972, "theoretical_loss": 4.248389650773463, "tokens_seen": 258736128 }, { "epoch": 0.08, "learning_rate": 0.0009308297223559621, "loss": 0.0985, "theoretical_loss": 4.24787621737392, "tokens_seen": 258998272 }, { "epoch": 0.08, "learning_rate": 0.0009307494784143798, "loss": 0.0969, "theoretical_loss": 4.247363448720739, "tokens_seen": 259260416 }, { "epoch": 0.08, "learning_rate": 0.0009306692344727973, "loss": 0.0985, "theoretical_loss": 4.246851343282432, "tokens_seen": 259522560 }, { "epoch": 0.08, "learning_rate": 0.000930588990531215, "loss": 0.1008, "theoretical_loss": 4.246339899532582, "tokens_seen": 259784704 }, { "epoch": 0.08, "learning_rate": 0.0009305087465896325, "loss": 0.0969, "theoretical_loss": 4.245829115949818, "tokens_seen": 260046848 }, { "epoch": 0.08, "learning_rate": 0.0009304285026480501, "loss": 0.1007, "theoretical_loss": 4.245318991017802, "tokens_seen": 260308992 }, { "epoch": 0.08, "learning_rate": 0.0009303482587064677, "loss": 0.098, "theoretical_loss": 4.244809523225195, "tokens_seen": 260571136 }, { "epoch": 0.08, "learning_rate": 0.0009302680147648853, "loss": 0.0997, "theoretical_loss": 4.244300711065646, "tokens_seen": 260833280 }, { "epoch": 0.08, "learning_rate": 0.0009301877708233028, "loss": 0.0933, "theoretical_loss": 4.243792553037767, "tokens_seen": 261095424 }, { "epoch": 0.08, "learning_rate": 0.0009301075268817204, "loss": 0.0978, "theoretical_loss": 4.243285047645106, "tokens_seen": 261357568 }, { "epoch": 0.08, "learning_rate": 0.000930027282940138, "loss": 0.1023, "theoretical_loss": 4.242778193396136, "tokens_seen": 261619712 }, { "epoch": 0.08, "learning_rate": 0.0009299470389985556, "loss": 0.0989, "theoretical_loss": 4.242271988804228, "tokens_seen": 261881856 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.004064733162522316, "objective/train/docs_used": 101934, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 2.00811505317688, "objective/train/original_loss": 2.008115291595459, "objective/train/theoretical_loss": 4.241766432387629, "objective/train/tokens_used": 282604000, "objective/train/value_avg": -0.00791168212890625, "objective/train/value_loss": 0.00014644188922829926, "objective/train/value_max": -0.0006613731384277344, "objective/train/value_min": -0.193603515625, "objective/train/value_reward_corr": 0.33414470602713064, "objective/train/value_std": 0.006023406982421875, "objective/train/weight_avg": 1.0041327476501465, "objective/train/weighted_lm_loss": 2.0166854858398438, "objective/train/weights_max": 1.1917407512664795, "objective/train/weights_min": 0.37122613191604614, "theoretical_loss": 4.241766432387629, "tokens_seen": 262144000 }, { "epoch": 0.08, "learning_rate": 0.0009298667950569733, "loss": 0.0953, "theoretical_loss": 4.241766432387629, "tokens_seen": 262144000 }, { "epoch": 0.08, "learning_rate": 0.0009297865511153908, "loss": 0.0968, "theoretical_loss": 4.241261522669445, "tokens_seen": 262406144 }, { "epoch": 0.08, "learning_rate": 0.0009297063071738084, "loss": 0.0992, "theoretical_loss": 4.240757258177617, "tokens_seen": 262668288 }, { "epoch": 0.08, "learning_rate": 0.000929626063232226, "loss": 0.0957, "theoretical_loss": 4.240253637444903, "tokens_seen": 262930432 }, { "epoch": 0.08, "learning_rate": 0.0009295458192906435, "loss": 0.0951, "theoretical_loss": 4.239750659008854, "tokens_seen": 263192576 }, { "epoch": 0.08, "learning_rate": 0.0009294655753490611, "loss": 0.0968, "theoretical_loss": 4.2392483214117975, "tokens_seen": 263454720 }, { "epoch": 0.08, "learning_rate": 0.0009293853314074787, "loss": 0.0992, "theoretical_loss": 4.238746623200815, "tokens_seen": 263716864 }, { "epoch": 0.08, "learning_rate": 0.0009293050874658963, "loss": 0.0982, "theoretical_loss": 4.238245562927722, "tokens_seen": 263979008 }, { "epoch": 0.08, "learning_rate": 0.000929224843524314, "loss": 0.0987, "theoretical_loss": 4.237745139149047, "tokens_seen": 264241152 }, { "epoch": 0.08, "learning_rate": 0.0009291445995827316, "loss": 0.0959, "theoretical_loss": 4.237245350426015, "tokens_seen": 264503296 }, { "epoch": 0.08, "learning_rate": 0.0009290643556411491, "loss": 0.0935, "theoretical_loss": 4.236746195324523, "tokens_seen": 264765440 }, { "epoch": 0.08, "learning_rate": 0.0009289841116995667, "loss": 0.1002, "theoretical_loss": 4.2362476724151215, "tokens_seen": 265027584 }, { "epoch": 0.08, "learning_rate": 0.0009289038677579843, "loss": 0.0973, "theoretical_loss": 4.235749780272998, "tokens_seen": 265289728 }, { "epoch": 0.08, "learning_rate": 0.0009288236238164018, "loss": 0.1, "theoretical_loss": 4.235252517477956, "tokens_seen": 265551872 }, { "epoch": 0.08, "learning_rate": 0.0009287433798748195, "loss": 0.0943, "theoretical_loss": 4.23475588261439, "tokens_seen": 265814016 }, { "epoch": 0.08, "learning_rate": 0.000928663135933237, "loss": 0.0966, "theoretical_loss": 4.234259874271275, "tokens_seen": 266076160 }, { "epoch": 0.08, "learning_rate": 0.0009285828919916546, "loss": 0.0969, "theoretical_loss": 4.23376449104214, "tokens_seen": 266338304 }, { "epoch": 0.08, "learning_rate": 0.0009285026480500723, "loss": 0.1002, "theoretical_loss": 4.233269731525055, "tokens_seen": 266600448 }, { "epoch": 0.08, "learning_rate": 0.0009284224041084899, "loss": 0.0978, "theoretical_loss": 4.232775594322605, "tokens_seen": 266862592 }, { "epoch": 0.08, "learning_rate": 0.0009283421601669074, "loss": 0.0976, "theoretical_loss": 4.232282078041876, "tokens_seen": 267124736 }, { "epoch": 0.08, "learning_rate": 0.000928261916225325, "loss": 0.0978, "theoretical_loss": 4.231789181294436, "tokens_seen": 267386880 }, { "epoch": 0.08, "learning_rate": 0.0009281816722837426, "loss": 0.1, "theoretical_loss": 4.231296902696314, "tokens_seen": 267649024 }, { "epoch": 0.08, "learning_rate": 0.0009281014283421601, "loss": 0.0956, "theoretical_loss": 4.230805240867982, "tokens_seen": 267911168 }, { "epoch": 0.08, "learning_rate": 0.0009280211844005778, "loss": 0.097, "theoretical_loss": 4.230314194434336, "tokens_seen": 268173312 }, { "epoch": 0.08, "learning_rate": 0.0009279409404589953, "loss": 0.0974, "theoretical_loss": 4.229823762024681, "tokens_seen": 268435456 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.0018670569406822324, "objective/train/docs_used": 104173, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8005046844482422, "objective/train/original_loss": 1.8005049228668213, "objective/train/theoretical_loss": 4.2293339422727065, "objective/train/tokens_used": 289157600, "objective/train/value_avg": -0.00882720947265625, "objective/train/value_loss": 0.0002679454628378153, "objective/train/value_max": -0.00035691261291503906, "objective/train/value_min": -0.283203125, "objective/train/value_reward_corr": 0.5715762562194157, "objective/train/value_std": 0.01021575927734375, "objective/train/weight_avg": 1.0019898414611816, "objective/train/weighted_lm_loss": 1.803560495376587, "objective/train/weights_max": 1.1879379749298096, "objective/train/weights_min": 0.3708355128765106, "theoretical_loss": 4.2293339422727065, "tokens_seen": 268697600 }, { "epoch": 0.08, "learning_rate": 0.000927860696517413, "loss": 0.0972, "theoretical_loss": 4.2293339422727065, "tokens_seen": 268697600 }, { "epoch": 0.08, "learning_rate": 0.0009277804525758306, "loss": 0.0986, "theoretical_loss": 4.228844733816474, "tokens_seen": 268959744 }, { "epoch": 0.08, "learning_rate": 0.0009277002086342481, "loss": 0.0948, "theoretical_loss": 4.228356135298394, "tokens_seen": 269221888 }, { "epoch": 0.08, "learning_rate": 0.0009276199646926658, "loss": 0.0938, "theoretical_loss": 4.227868145365211, "tokens_seen": 269484032 }, { "epoch": 0.08, "learning_rate": 0.0009275397207510833, "loss": 0.0966, "theoretical_loss": 4.227380762667987, "tokens_seen": 269746176 }, { "epoch": 0.08, "learning_rate": 0.0009274594768095009, "loss": 0.0951, "theoretical_loss": 4.226893985862076, "tokens_seen": 270008320 }, { "epoch": 0.08, "learning_rate": 0.0009273792328679185, "loss": 0.0985, "theoretical_loss": 4.226407813607116, "tokens_seen": 270270464 }, { "epoch": 0.08, "learning_rate": 0.0009272989889263361, "loss": 0.0967, "theoretical_loss": 4.2259222445670055, "tokens_seen": 270532608 }, { "epoch": 0.08, "learning_rate": 0.0009272187449847536, "loss": 0.0935, "theoretical_loss": 4.225437277409885, "tokens_seen": 270794752 }, { "epoch": 0.08, "learning_rate": 0.0009271385010431712, "loss": 0.0953, "theoretical_loss": 4.224952910808122, "tokens_seen": 271056896 }, { "epoch": 0.08, "learning_rate": 0.0009270582571015889, "loss": 0.0973, "theoretical_loss": 4.224469143438294, "tokens_seen": 271319040 }, { "epoch": 0.08, "learning_rate": 0.0009269780131600064, "loss": 0.0998, "theoretical_loss": 4.223985973981171, "tokens_seen": 271581184 }, { "epoch": 0.08, "learning_rate": 0.0009268977692184241, "loss": 0.0996, "theoretical_loss": 4.223503401121693, "tokens_seen": 271843328 }, { "epoch": 0.08, "learning_rate": 0.0009268175252768416, "loss": 0.0941, "theoretical_loss": 4.223021423548962, "tokens_seen": 272105472 }, { "epoch": 0.08, "learning_rate": 0.0009267372813352593, "loss": 0.0962, "theoretical_loss": 4.222540039956215, "tokens_seen": 272367616 }, { "epoch": 0.08, "learning_rate": 0.0009266570373936768, "loss": 0.0953, "theoretical_loss": 4.222059249040814, "tokens_seen": 272629760 }, { "epoch": 0.08, "learning_rate": 0.0009265767934520943, "loss": 0.0989, "theoretical_loss": 4.2215790495042285, "tokens_seen": 272891904 }, { "epoch": 0.08, "learning_rate": 0.000926496549510512, "loss": 0.0982, "theoretical_loss": 4.221099440052014, "tokens_seen": 273154048 }, { "epoch": 0.08, "learning_rate": 0.0009264163055689295, "loss": 0.0956, "theoretical_loss": 4.220620419393799, "tokens_seen": 273416192 }, { "epoch": 0.08, "learning_rate": 0.0009263360616273471, "loss": 0.0979, "theoretical_loss": 4.220141986243268, "tokens_seen": 273678336 }, { "epoch": 0.08, "learning_rate": 0.0009262558176857648, "loss": 0.098, "theoretical_loss": 4.219664139318145, "tokens_seen": 273940480 }, { "epoch": 0.08, "learning_rate": 0.0009261755737441824, "loss": 0.0972, "theoretical_loss": 4.219186877340174, "tokens_seen": 274202624 }, { "epoch": 0.08, "learning_rate": 0.0009260953298025999, "loss": 0.0943, "theoretical_loss": 4.218710199035108, "tokens_seen": 274464768 }, { "epoch": 0.08, "learning_rate": 0.0009260150858610175, "loss": 0.0941, "theoretical_loss": 4.218234103132686, "tokens_seen": 274726912 }, { "epoch": 0.08, "learning_rate": 0.0009259348419194351, "loss": 0.097, "theoretical_loss": 4.217758588366623, "tokens_seen": 274989056 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.0027189042884856462, "objective/train/docs_used": 106703, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8442187309265137, "objective/train/original_loss": 1.8442187309265137, "objective/train/theoretical_loss": 4.217283653474588, "objective/train/tokens_used": 295711200, "objective/train/value_avg": -0.011016845703125, "objective/train/value_loss": 0.0005074041546322405, "objective/train/value_max": -0.0006022453308105469, "objective/train/value_min": -0.90625, "objective/train/value_reward_corr": 0.6262922111240474, "objective/train/value_std": 0.0198211669921875, "objective/train/weight_avg": 1.0029503107070923, "objective/train/weighted_lm_loss": 1.849584937095642, "objective/train/weights_max": 1.8512089252471924, "objective/train/weights_min": 0.36976757645606995, "theoretical_loss": 4.217283653474588, "tokens_seen": 275251200 }, { "epoch": 0.08, "learning_rate": 0.0009258545979778526, "loss": 0.0936, "theoretical_loss": 4.217283653474588, "tokens_seen": 275251200 }, { "epoch": 0.08, "learning_rate": 0.0009257743540362703, "loss": 0.0962, "theoretical_loss": 4.216809297198195, "tokens_seen": 275513344 }, { "epoch": 0.08, "learning_rate": 0.0009256941100946878, "loss": 0.0968, "theoretical_loss": 4.21633551828298, "tokens_seen": 275775488 }, { "epoch": 0.08, "learning_rate": 0.0009256138661531054, "loss": 0.0981, "theoretical_loss": 4.215862315478388, "tokens_seen": 276037632 }, { "epoch": 0.08, "learning_rate": 0.0009255336222115231, "loss": 0.0992, "theoretical_loss": 4.2153896875377574, "tokens_seen": 276299776 }, { "epoch": 0.08, "learning_rate": 0.0009254533782699407, "loss": 0.0979, "theoretical_loss": 4.214917633218304, "tokens_seen": 276561920 }, { "epoch": 0.08, "learning_rate": 0.0009253731343283583, "loss": 0.098, "theoretical_loss": 4.214446151281106, "tokens_seen": 276824064 }, { "epoch": 0.08, "learning_rate": 0.0009252928903867758, "loss": 0.0945, "theoretical_loss": 4.213975240491084, "tokens_seen": 277086208 }, { "epoch": 0.08, "learning_rate": 0.0009252126464451934, "loss": 0.0964, "theoretical_loss": 4.213504899616995, "tokens_seen": 277348352 }, { "epoch": 0.08, "learning_rate": 0.000925132402503611, "loss": 0.0969, "theoretical_loss": 4.213035127431402, "tokens_seen": 277610496 }, { "epoch": 0.08, "learning_rate": 0.0009250521585620286, "loss": 0.0967, "theoretical_loss": 4.212565922710677, "tokens_seen": 277872640 }, { "epoch": 0.08, "learning_rate": 0.0009249719146204461, "loss": 0.0964, "theoretical_loss": 4.21209728423497, "tokens_seen": 278134784 }, { "epoch": 0.08, "learning_rate": 0.0009248916706788639, "loss": 0.0984, "theoretical_loss": 4.2116292107882, "tokens_seen": 278396928 }, { "epoch": 0.08, "learning_rate": 0.0009248114267372814, "loss": 0.0973, "theoretical_loss": 4.211161701158042, "tokens_seen": 278659072 }, { "epoch": 0.08, "learning_rate": 0.0009247311827956989, "loss": 0.0985, "theoretical_loss": 4.2106947541359085, "tokens_seen": 278921216 }, { "epoch": 0.08, "learning_rate": 0.0009246509388541166, "loss": 0.0962, "theoretical_loss": 4.210228368516935, "tokens_seen": 279183360 }, { "epoch": 0.08, "learning_rate": 0.0009245706949125341, "loss": 0.1018, "theoretical_loss": 4.209762543099966, "tokens_seen": 279445504 }, { "epoch": 0.08, "learning_rate": 0.0009244904509709517, "loss": 0.0971, "theoretical_loss": 4.209297276687541, "tokens_seen": 279707648 }, { "epoch": 0.08, "learning_rate": 0.0009244102070293693, "loss": 0.1, "theoretical_loss": 4.2088325680858745, "tokens_seen": 279969792 }, { "epoch": 0.08, "learning_rate": 0.0009243299630877869, "loss": 0.0954, "theoretical_loss": 4.208368416104849, "tokens_seen": 280231936 }, { "epoch": 0.09, "learning_rate": 0.0009242497191462045, "loss": 0.0986, "theoretical_loss": 4.207904819557995, "tokens_seen": 280494080 }, { "epoch": 0.09, "learning_rate": 0.000924169475204622, "loss": 0.0965, "theoretical_loss": 4.207441777262477, "tokens_seen": 280756224 }, { "epoch": 0.09, "learning_rate": 0.0009240892312630397, "loss": 0.0958, "theoretical_loss": 4.206979288039081, "tokens_seen": 281018368 }, { "epoch": 0.09, "learning_rate": 0.0009240089873214573, "loss": 0.0943, "theoretical_loss": 4.206517350712199, "tokens_seen": 281280512 }, { "epoch": 0.09, "learning_rate": 0.0009239287433798749, "loss": 0.0964, "theoretical_loss": 4.206055964109813, "tokens_seen": 281542656 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.0017802802613005042, "objective/train/docs_used": 109222, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.907019853591919, "objective/train/original_loss": 1.907020092010498, "objective/train/theoretical_loss": 4.205595127063485, "objective/train/tokens_used": 302264800, "objective/train/value_avg": -0.007091522216796875, "objective/train/value_loss": 0.00020236380805727094, "objective/train/value_max": -0.00042891502380371094, "objective/train/value_min": -0.2548828125, "objective/train/value_reward_corr": 0.46679314179268283, "objective/train/value_std": 0.0074310302734375, "objective/train/weight_avg": 1.0018771886825562, "objective/train/weighted_lm_loss": 1.9096391201019287, "objective/train/weights_max": 1.1684050559997559, "objective/train/weights_min": 0.38661831617355347, "theoretical_loss": 4.205595127063485, "tokens_seen": 281804800 }, { "epoch": 0.09, "learning_rate": 0.0009238484994382924, "loss": 0.0951, "theoretical_loss": 4.205595127063485, "tokens_seen": 281804800 }, { "epoch": 0.09, "learning_rate": 0.0009237682554967101, "loss": 0.0938, "theoretical_loss": 4.205134838408337, "tokens_seen": 282066944 }, { "epoch": 0.09, "learning_rate": 0.0009236880115551276, "loss": 0.0959, "theoretical_loss": 4.20467509698304, "tokens_seen": 282329088 }, { "epoch": 0.09, "learning_rate": 0.0009236077676135451, "loss": 0.0954, "theoretical_loss": 4.204215901629803, "tokens_seen": 282591232 }, { "epoch": 0.09, "learning_rate": 0.0009235275236719628, "loss": 0.0962, "theoretical_loss": 4.203757251194353, "tokens_seen": 282853376 }, { "epoch": 0.09, "learning_rate": 0.0009234472797303803, "loss": 0.0952, "theoretical_loss": 4.203299144525923, "tokens_seen": 283115520 }, { "epoch": 0.09, "learning_rate": 0.000923367035788798, "loss": 0.096, "theoretical_loss": 4.202841580477241, "tokens_seen": 283377664 }, { "epoch": 0.09, "learning_rate": 0.0009232867918472156, "loss": 0.0989, "theoretical_loss": 4.202384557904513, "tokens_seen": 283639808 }, { "epoch": 0.09, "learning_rate": 0.0009232065479056332, "loss": 0.0955, "theoretical_loss": 4.201928075667411, "tokens_seen": 283901952 }, { "epoch": 0.09, "learning_rate": 0.0009231263039640507, "loss": 0.0949, "theoretical_loss": 4.201472132629057, "tokens_seen": 284164096 }, { "epoch": 0.09, "learning_rate": 0.0009230460600224683, "loss": 0.1007, "theoretical_loss": 4.201016727656012, "tokens_seen": 284426240 }, { "epoch": 0.09, "learning_rate": 0.0009229658160808859, "loss": 0.0983, "theoretical_loss": 4.2005618596182615, "tokens_seen": 284688384 }, { "epoch": 0.09, "learning_rate": 0.0009228855721393035, "loss": 0.0985, "theoretical_loss": 4.200107527389202, "tokens_seen": 284950528 }, { "epoch": 0.09, "learning_rate": 0.0009228053281977211, "loss": 0.0965, "theoretical_loss": 4.199653729845626, "tokens_seen": 285212672 }, { "epoch": 0.09, "learning_rate": 0.0009227250842561386, "loss": 0.0965, "theoretical_loss": 4.199200465867714, "tokens_seen": 285474816 }, { "epoch": 0.09, "learning_rate": 0.0009226448403145564, "loss": 0.0955, "theoretical_loss": 4.198747734339013, "tokens_seen": 285736960 }, { "epoch": 0.09, "learning_rate": 0.0009225645963729739, "loss": 0.0947, "theoretical_loss": 4.198295534146429, "tokens_seen": 285999104 }, { "epoch": 0.09, "learning_rate": 0.0009224843524313914, "loss": 0.095, "theoretical_loss": 4.197843864180214, "tokens_seen": 286261248 }, { "epoch": 0.09, "learning_rate": 0.0009224041084898091, "loss": 0.0949, "theoretical_loss": 4.197392723333951, "tokens_seen": 286523392 }, { "epoch": 0.09, "learning_rate": 0.0009223238645482266, "loss": 0.0975, "theoretical_loss": 4.196942110504538, "tokens_seen": 286785536 }, { "epoch": 0.09, "learning_rate": 0.0009222436206066442, "loss": 0.0954, "theoretical_loss": 4.196492024592183, "tokens_seen": 287047680 }, { "epoch": 0.09, "learning_rate": 0.0009221633766650618, "loss": 0.095, "theoretical_loss": 4.196042464500382, "tokens_seen": 287309824 }, { "epoch": 0.09, "learning_rate": 0.0009220831327234794, "loss": 0.0947, "theoretical_loss": 4.195593429135916, "tokens_seen": 287571968 }, { "epoch": 0.09, "learning_rate": 0.0009220028887818969, "loss": 0.1014, "theoretical_loss": 4.195144917408828, "tokens_seen": 287834112 }, { "epoch": 0.09, "learning_rate": 0.0009219226448403146, "loss": 0.0949, "theoretical_loss": 4.194696928232417, "tokens_seen": 288096256 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.002935412572696805, "objective/train/docs_used": 111591, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8163905143737793, "objective/train/original_loss": 1.8163906335830688, "objective/train/theoretical_loss": 4.194249460523222, "objective/train/tokens_used": 308818400, "objective/train/value_avg": -0.00843048095703125, "objective/train/value_loss": 0.0001841627963585779, "objective/train/value_max": -0.000507354736328125, "objective/train/value_min": -0.318603515625, "objective/train/value_reward_corr": 0.49277345208210327, "objective/train/value_std": 0.0080413818359375, "objective/train/weight_avg": 1.0030207633972168, "objective/train/weighted_lm_loss": 1.8225624561309814, "objective/train/weights_max": 1.174675464630127, "objective/train/weights_min": 0.3693629503250122, "theoretical_loss": 4.194249460523222, "tokens_seen": 288358400 }, { "epoch": 0.09, "learning_rate": 0.0009218424008987322, "loss": 0.0934, "theoretical_loss": 4.194249460523222, "tokens_seen": 288358400 }, { "epoch": 0.09, "learning_rate": 0.0009217621569571497, "loss": 0.0965, "theoretical_loss": 4.193802513201015, "tokens_seen": 288620544 }, { "epoch": 0.09, "learning_rate": 0.0009216819130155674, "loss": 0.0999, "theoretical_loss": 4.193356085188778, "tokens_seen": 288882688 }, { "epoch": 0.09, "learning_rate": 0.0009216016690739849, "loss": 0.0984, "theoretical_loss": 4.1929101754127025, "tokens_seen": 289144832 }, { "epoch": 0.09, "learning_rate": 0.0009215214251324026, "loss": 0.0957, "theoretical_loss": 4.192464782802167, "tokens_seen": 289406976 }, { "epoch": 0.09, "learning_rate": 0.0009214411811908201, "loss": 0.0957, "theoretical_loss": 4.192019906289733, "tokens_seen": 289669120 }, { "epoch": 0.09, "learning_rate": 0.0009213609372492377, "loss": 0.0985, "theoretical_loss": 4.1915755448111245, "tokens_seen": 289931264 }, { "epoch": 0.09, "learning_rate": 0.0009212806933076553, "loss": 0.0972, "theoretical_loss": 4.191131697305222, "tokens_seen": 290193408 }, { "epoch": 0.09, "learning_rate": 0.0009212004493660728, "loss": 0.0962, "theoretical_loss": 4.1906883627140505, "tokens_seen": 290455552 }, { "epoch": 0.09, "learning_rate": 0.0009211202054244905, "loss": 0.0931, "theoretical_loss": 4.19024553998276, "tokens_seen": 290717696 }, { "epoch": 0.09, "learning_rate": 0.0009210399614829081, "loss": 0.0957, "theoretical_loss": 4.189803228059623, "tokens_seen": 290979840 }, { "epoch": 0.09, "learning_rate": 0.0009209597175413257, "loss": 0.096, "theoretical_loss": 4.189361425896016, "tokens_seen": 291241984 }, { "epoch": 0.09, "learning_rate": 0.0009208794735997432, "loss": 0.0929, "theoretical_loss": 4.188920132446411, "tokens_seen": 291504128 }, { "epoch": 0.09, "learning_rate": 0.0009207992296581609, "loss": 0.0937, "theoretical_loss": 4.188479346668359, "tokens_seen": 291766272 }, { "epoch": 0.09, "learning_rate": 0.0009207189857165784, "loss": 0.0941, "theoretical_loss": 4.188039067522484, "tokens_seen": 292028416 }, { "epoch": 0.09, "learning_rate": 0.0009206387417749959, "loss": 0.0953, "theoretical_loss": 4.18759929397247, "tokens_seen": 292290560 }, { "epoch": 0.09, "learning_rate": 0.0009205584978334136, "loss": 0.0958, "theoretical_loss": 4.187160024985044, "tokens_seen": 292552704 }, { "epoch": 0.09, "learning_rate": 0.0009204782538918311, "loss": 0.0973, "theoretical_loss": 4.1867212595299685, "tokens_seen": 292814848 }, { "epoch": 0.09, "learning_rate": 0.0009203980099502489, "loss": 0.0958, "theoretical_loss": 4.186282996580034, "tokens_seen": 293076992 }, { "epoch": 0.09, "learning_rate": 0.0009203177660086664, "loss": 0.0983, "theoretical_loss": 4.185845235111037, "tokens_seen": 293339136 }, { "epoch": 0.09, "learning_rate": 0.000920237522067084, "loss": 0.0962, "theoretical_loss": 4.185407974101779, "tokens_seen": 293601280 }, { "epoch": 0.09, "learning_rate": 0.0009201572781255016, "loss": 0.0948, "theoretical_loss": 4.184971212534048, "tokens_seen": 293863424 }, { "epoch": 0.09, "learning_rate": 0.0009200770341839191, "loss": 0.0959, "theoretical_loss": 4.184534949392611, "tokens_seen": 294125568 }, { "epoch": 0.09, "learning_rate": 0.0009199967902423367, "loss": 0.095, "theoretical_loss": 4.184099183665199, "tokens_seen": 294387712 }, { "epoch": 0.09, "learning_rate": 0.0009199165463007543, "loss": 0.0982, "theoretical_loss": 4.1836639143425, "tokens_seen": 294649856 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.001675541396252811, "objective/train/docs_used": 114046, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.922358751296997, "objective/train/original_loss": 1.922358751296997, "objective/train/theoretical_loss": 4.1832291404181445, "objective/train/tokens_used": 315372000, "objective/train/value_avg": -0.00826263427734375, "objective/train/value_loss": 0.0002560181892476976, "objective/train/value_max": -0.00048041343688964844, "objective/train/value_min": -0.51025390625, "objective/train/value_reward_corr": 0.5477898481987934, "objective/train/value_std": 0.008514404296875, "objective/train/weight_avg": 1.001792550086975, "objective/train/weighted_lm_loss": 1.9252777099609375, "objective/train/weights_max": 1.2260377407073975, "objective/train/weights_min": 0.37242892384529114, "theoretical_loss": 4.1832291404181445, "tokens_seen": 294912000 }, { "epoch": 0.09, "learning_rate": 0.0009198363023591719, "loss": 0.0955, "theoretical_loss": 4.1832291404181445, "tokens_seen": 294912000 }, { "epoch": 0.09, "learning_rate": 0.0009197560584175894, "loss": 0.0958, "theoretical_loss": 4.182794860888696, "tokens_seen": 295174144 }, { "epoch": 0.09, "learning_rate": 0.0009196758144760072, "loss": 0.0975, "theoretical_loss": 4.18236107475364, "tokens_seen": 295436288 }, { "epoch": 0.09, "learning_rate": 0.0009195955705344247, "loss": 0.0973, "theoretical_loss": 4.18192778101537, "tokens_seen": 295698432 }, { "epoch": 0.09, "learning_rate": 0.0009195153265928422, "loss": 0.0942, "theoretical_loss": 4.181494978679181, "tokens_seen": 295960576 }, { "epoch": 0.09, "learning_rate": 0.0009194350826512599, "loss": 0.0951, "theoretical_loss": 4.181062666753256, "tokens_seen": 296222720 }, { "epoch": 0.09, "learning_rate": 0.0009193548387096774, "loss": 0.0946, "theoretical_loss": 4.180630844248653, "tokens_seen": 296484864 }, { "epoch": 0.09, "learning_rate": 0.000919274594768095, "loss": 0.0976, "theoretical_loss": 4.180199510179299, "tokens_seen": 296747008 }, { "epoch": 0.09, "learning_rate": 0.0009191943508265126, "loss": 0.0947, "theoretical_loss": 4.179768663561975, "tokens_seen": 297009152 }, { "epoch": 0.09, "learning_rate": 0.0009191141068849302, "loss": 0.0924, "theoretical_loss": 4.1793383034163085, "tokens_seen": 297271296 }, { "epoch": 0.09, "learning_rate": 0.0009190338629433478, "loss": 0.0939, "theoretical_loss": 4.178908428764759, "tokens_seen": 297533440 }, { "epoch": 0.09, "learning_rate": 0.0009189536190017654, "loss": 0.0945, "theoretical_loss": 4.17847903863261, "tokens_seen": 297795584 }, { "epoch": 0.09, "learning_rate": 0.000918873375060183, "loss": 0.095, "theoretical_loss": 4.178050132047958, "tokens_seen": 298057728 }, { "epoch": 0.09, "learning_rate": 0.0009187931311186006, "loss": 0.0937, "theoretical_loss": 4.177621708041703, "tokens_seen": 298319872 }, { "epoch": 0.09, "learning_rate": 0.0009187128871770182, "loss": 0.0964, "theoretical_loss": 4.177193765647534, "tokens_seen": 298582016 }, { "epoch": 0.09, "learning_rate": 0.0009186326432354357, "loss": 0.0973, "theoretical_loss": 4.176766303901922, "tokens_seen": 298844160 }, { "epoch": 0.09, "learning_rate": 0.0009185523992938534, "loss": 0.0954, "theoretical_loss": 4.17633932184411, "tokens_seen": 299106304 }, { "epoch": 0.09, "learning_rate": 0.0009184721553522709, "loss": 0.0951, "theoretical_loss": 4.1759128185161005, "tokens_seen": 299368448 }, { "epoch": 0.09, "learning_rate": 0.0009183919114106885, "loss": 0.0917, "theoretical_loss": 4.175486792962646, "tokens_seen": 299630592 }, { "epoch": 0.09, "learning_rate": 0.0009183116674691061, "loss": 0.0952, "theoretical_loss": 4.175061244231237, "tokens_seen": 299892736 }, { "epoch": 0.09, "learning_rate": 0.0009182314235275236, "loss": 0.097, "theoretical_loss": 4.174636171372097, "tokens_seen": 300154880 }, { "epoch": 0.09, "learning_rate": 0.0009181511795859412, "loss": 0.0914, "theoretical_loss": 4.174211573438166, "tokens_seen": 300417024 }, { "epoch": 0.09, "learning_rate": 0.0009180709356443589, "loss": 0.0931, "theoretical_loss": 4.173787449485094, "tokens_seen": 300679168 }, { "epoch": 0.09, "learning_rate": 0.0009179906917027765, "loss": 0.0979, "theoretical_loss": 4.17336379857123, "tokens_seen": 300941312 }, { "epoch": 0.09, "learning_rate": 0.0009179104477611941, "loss": 0.0941, "theoretical_loss": 4.172940619757611, "tokens_seen": 301203456 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.002791388425976038, "objective/train/docs_used": 116418, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.9814350605010986, "objective/train/original_loss": 1.9814350605010986, "objective/train/theoretical_loss": 4.172517912107954, "objective/train/tokens_used": 321925600, "objective/train/value_avg": -0.01168060302734375, "objective/train/value_loss": 0.0004382512124720961, "objective/train/value_max": -0.0003905296325683594, "objective/train/value_min": -0.64453125, "objective/train/value_reward_corr": 0.6855180405018573, "objective/train/value_std": 0.0270538330078125, "objective/train/weight_avg": 1.0029999017715454, "objective/train/weighted_lm_loss": 1.9875683784484863, "objective/train/weights_max": 1.4244129657745361, "objective/train/weights_min": 0.3713734447956085, "theoretical_loss": 4.172517912107954, "tokens_seen": 301465600 }, { "epoch": 0.09, "learning_rate": 0.0009178302038196117, "loss": 0.0932, "theoretical_loss": 4.172517912107954, "tokens_seen": 301465600 }, { "epoch": 0.09, "learning_rate": 0.0009177499598780292, "loss": 0.0923, "theoretical_loss": 4.172095674688645, "tokens_seen": 301727744 }, { "epoch": 0.09, "learning_rate": 0.0009176697159364468, "loss": 0.0951, "theoretical_loss": 4.171673906568729, "tokens_seen": 301989888 }, { "epoch": 0.09, "learning_rate": 0.0009175894719948644, "loss": 0.0967, "theoretical_loss": 4.171252606819899, "tokens_seen": 302252032 }, { "epoch": 0.09, "learning_rate": 0.0009175092280532819, "loss": 0.0948, "theoretical_loss": 4.170831774516489, "tokens_seen": 302514176 }, { "epoch": 0.09, "learning_rate": 0.0009174289841116997, "loss": 0.0958, "theoretical_loss": 4.170411408735461, "tokens_seen": 302776320 }, { "epoch": 0.09, "learning_rate": 0.0009173487401701172, "loss": 0.0958, "theoretical_loss": 4.169991508556398, "tokens_seen": 303038464 }, { "epoch": 0.09, "learning_rate": 0.0009172684962285348, "loss": 0.0944, "theoretical_loss": 4.169572073061493, "tokens_seen": 303300608 }, { "epoch": 0.09, "learning_rate": 0.0009171882522869524, "loss": 0.0977, "theoretical_loss": 4.16915310133554, "tokens_seen": 303562752 }, { "epoch": 0.09, "learning_rate": 0.0009171080083453699, "loss": 0.0962, "theoretical_loss": 4.1687345924659205, "tokens_seen": 303824896 }, { "epoch": 0.09, "learning_rate": 0.0009170277644037875, "loss": 0.0937, "theoretical_loss": 4.168316545542602, "tokens_seen": 304087040 }, { "epoch": 0.09, "learning_rate": 0.0009169475204622051, "loss": 0.0935, "theoretical_loss": 4.167898959658121, "tokens_seen": 304349184 }, { "epoch": 0.09, "learning_rate": 0.0009168672765206227, "loss": 0.0903, "theoretical_loss": 4.167481833907576, "tokens_seen": 304611328 }, { "epoch": 0.09, "learning_rate": 0.0009167870325790402, "loss": 0.098, "theoretical_loss": 4.16706516738862, "tokens_seen": 304873472 }, { "epoch": 0.09, "learning_rate": 0.000916706788637458, "loss": 0.0937, "theoretical_loss": 4.166648959201449, "tokens_seen": 305135616 }, { "epoch": 0.09, "learning_rate": 0.0009166265446958755, "loss": 0.097, "theoretical_loss": 4.166233208448794, "tokens_seen": 305397760 }, { "epoch": 0.09, "learning_rate": 0.0009165463007542931, "loss": 0.0962, "theoretical_loss": 4.165817914235908, "tokens_seen": 305659904 }, { "epoch": 0.09, "learning_rate": 0.0009164660568127107, "loss": 0.0944, "theoretical_loss": 4.165403075670562, "tokens_seen": 305922048 }, { "epoch": 0.09, "learning_rate": 0.0009163858128711282, "loss": 0.0938, "theoretical_loss": 4.164988691863032, "tokens_seen": 306184192 }, { "epoch": 0.09, "learning_rate": 0.0009163055689295459, "loss": 0.0937, "theoretical_loss": 4.164574761926092, "tokens_seen": 306446336 }, { "epoch": 0.09, "learning_rate": 0.0009162253249879634, "loss": 0.0943, "theoretical_loss": 4.164161284975005, "tokens_seen": 306708480 }, { "epoch": 0.09, "learning_rate": 0.000916145081046381, "loss": 0.0938, "theoretical_loss": 4.1637482601275115, "tokens_seen": 306970624 }, { "epoch": 0.09, "learning_rate": 0.0009160648371047986, "loss": 0.0979, "theoretical_loss": 4.163335686503822, "tokens_seen": 307232768 }, { "epoch": 0.09, "learning_rate": 0.0009159845931632162, "loss": 0.0956, "theoretical_loss": 4.162923563226607, "tokens_seen": 307494912 }, { "epoch": 0.09, "learning_rate": 0.0009159043492216338, "loss": 0.093, "theoretical_loss": 4.1625118894209905, "tokens_seen": 307757056 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.0017242436297237873, "objective/train/docs_used": 118729, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8616645336151123, "objective/train/original_loss": 1.8616645336151123, "objective/train/theoretical_loss": 4.16210066421454, "objective/train/tokens_used": 328479200, "objective/train/value_avg": -0.00921630859375, "objective/train/value_loss": 0.0002815025800373405, "objective/train/value_max": -0.0003936290740966797, "objective/train/value_min": -0.79638671875, "objective/train/value_reward_corr": 0.6599847619871043, "objective/train/value_std": 0.0160675048828125, "objective/train/weight_avg": 1.0018616914749146, "objective/train/weighted_lm_loss": 1.86510169506073, "objective/train/weights_max": 2.101844549179077, "objective/train/weights_min": 0.36873507499694824, "theoretical_loss": 4.16210066421454, "tokens_seen": 308019200 }, { "epoch": 0.09, "learning_rate": 0.0009158241052800514, "loss": 0.0932, "theoretical_loss": 4.16210066421454, "tokens_seen": 308019200 }, { "epoch": 0.09, "learning_rate": 0.000915743861338469, "loss": 0.0945, "theoretical_loss": 4.161689886737255, "tokens_seen": 308281344 }, { "epoch": 0.09, "learning_rate": 0.0009156636173968865, "loss": 0.0963, "theoretical_loss": 4.161279556121562, "tokens_seen": 308543488 }, { "epoch": 0.09, "learning_rate": 0.0009155833734553042, "loss": 0.0952, "theoretical_loss": 4.160869671502302, "tokens_seen": 308805632 }, { "epoch": 0.09, "learning_rate": 0.0009155031295137217, "loss": 0.0959, "theoretical_loss": 4.160460232016725, "tokens_seen": 309067776 }, { "epoch": 0.09, "learning_rate": 0.0009154228855721394, "loss": 0.0964, "theoretical_loss": 4.16005123680448, "tokens_seen": 309329920 }, { "epoch": 0.09, "learning_rate": 0.0009153426416305569, "loss": 0.0931, "theoretical_loss": 4.159642685007606, "tokens_seen": 309592064 }, { "epoch": 0.09, "learning_rate": 0.0009152623976889744, "loss": 0.0924, "theoretical_loss": 4.1592345757705225, "tokens_seen": 309854208 }, { "epoch": 0.09, "learning_rate": 0.0009151821537473922, "loss": 0.0917, "theoretical_loss": 4.158826908240022, "tokens_seen": 310116352 }, { "epoch": 0.09, "learning_rate": 0.0009151019098058097, "loss": 0.0928, "theoretical_loss": 4.158419681565265, "tokens_seen": 310378496 }, { "epoch": 0.09, "learning_rate": 0.0009150216658642273, "loss": 0.0943, "theoretical_loss": 4.1580128948977615, "tokens_seen": 310640640 }, { "epoch": 0.09, "learning_rate": 0.0009149414219226449, "loss": 0.0938, "theoretical_loss": 4.157606547391374, "tokens_seen": 310902784 }, { "epoch": 0.09, "learning_rate": 0.0009148611779810625, "loss": 0.0973, "theoretical_loss": 4.157200638202301, "tokens_seen": 311164928 }, { "epoch": 0.09, "learning_rate": 0.00091478093403948, "loss": 0.0901, "theoretical_loss": 4.156795166489074, "tokens_seen": 311427072 }, { "epoch": 0.09, "learning_rate": 0.0009147006900978976, "loss": 0.0948, "theoretical_loss": 4.156390131412543, "tokens_seen": 311689216 }, { "epoch": 0.09, "learning_rate": 0.0009146204461563152, "loss": 0.0936, "theoretical_loss": 4.155985532135875, "tokens_seen": 311951360 }, { "epoch": 0.09, "learning_rate": 0.0009145402022147327, "loss": 0.0966, "theoretical_loss": 4.1555813678245395, "tokens_seen": 312213504 }, { "epoch": 0.09, "learning_rate": 0.0009144599582731505, "loss": 0.0935, "theoretical_loss": 4.155177637646306, "tokens_seen": 312475648 }, { "epoch": 0.09, "learning_rate": 0.000914379714331568, "loss": 0.0914, "theoretical_loss": 4.154774340771228, "tokens_seen": 312737792 }, { "epoch": 0.09, "learning_rate": 0.0009142994703899856, "loss": 0.095, "theoretical_loss": 4.154371476371646, "tokens_seen": 312999936 }, { "epoch": 0.09, "learning_rate": 0.0009142192264484032, "loss": 0.0931, "theoretical_loss": 4.153969043622169, "tokens_seen": 313262080 }, { "epoch": 0.1, "learning_rate": 0.0009141389825068207, "loss": 0.0963, "theoretical_loss": 4.15356704169967, "tokens_seen": 313524224 }, { "epoch": 0.1, "learning_rate": 0.0009140587385652384, "loss": 0.0968, "theoretical_loss": 4.153165469783279, "tokens_seen": 313786368 }, { "epoch": 0.1, "learning_rate": 0.0009139784946236559, "loss": 0.0969, "theoretical_loss": 4.152764327054376, "tokens_seen": 314048512 }, { "epoch": 0.1, "learning_rate": 0.0009138982506820735, "loss": 0.0947, "theoretical_loss": 4.152363612696579, "tokens_seen": 314310656 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.0024639612529426813, "objective/train/docs_used": 121169, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8197975158691406, "objective/train/original_loss": 1.8197975158691406, "objective/train/theoretical_loss": 4.151963325895737, "objective/train/tokens_used": 335032800, "objective/train/value_avg": -0.0099029541015625, "objective/train/value_loss": 0.0005963764269836247, "objective/train/value_max": -0.0004029273986816406, "objective/train/value_min": -0.27783203125, "objective/train/value_reward_corr": 0.45480866686294624, "objective/train/value_std": 0.01166534423828125, "objective/train/weight_avg": 1.0026694536209106, "objective/train/weighted_lm_loss": 1.8254142999649048, "objective/train/weights_max": 1.2214922904968262, "objective/train/weights_min": 0.020853379741311073, "theoretical_loss": 4.151963325895737, "tokens_seen": 314572800 }, { "epoch": 0.1, "learning_rate": 0.0009138180067404911, "loss": 0.0933, "theoretical_loss": 4.151963325895737, "tokens_seen": 314572800 }, { "epoch": 0.1, "learning_rate": 0.0009137377627989088, "loss": 0.0944, "theoretical_loss": 4.151563465839927, "tokens_seen": 314834944 }, { "epoch": 0.1, "learning_rate": 0.0009136575188573263, "loss": 0.0949, "theoretical_loss": 4.151164031719437, "tokens_seen": 315097088 }, { "epoch": 0.1, "learning_rate": 0.0009135772749157439, "loss": 0.0956, "theoretical_loss": 4.15076502272677, "tokens_seen": 315359232 }, { "epoch": 0.1, "learning_rate": 0.0009134970309741615, "loss": 0.0934, "theoretical_loss": 4.150366438056622, "tokens_seen": 315621376 }, { "epoch": 0.1, "learning_rate": 0.000913416787032579, "loss": 0.092, "theoretical_loss": 4.149968276905888, "tokens_seen": 315883520 }, { "epoch": 0.1, "learning_rate": 0.0009133365430909967, "loss": 0.0986, "theoretical_loss": 4.149570538473644, "tokens_seen": 316145664 }, { "epoch": 0.1, "learning_rate": 0.0009132562991494142, "loss": 0.0909, "theoretical_loss": 4.149173221961146, "tokens_seen": 316407808 }, { "epoch": 0.1, "learning_rate": 0.0009131760552078318, "loss": 0.0936, "theoretical_loss": 4.1487763265718165, "tokens_seen": 316669952 }, { "epoch": 0.1, "learning_rate": 0.0009130958112662494, "loss": 0.094, "theoretical_loss": 4.148379851511241, "tokens_seen": 316932096 }, { "epoch": 0.1, "learning_rate": 0.0009130155673246669, "loss": 0.094, "theoretical_loss": 4.147983795987161, "tokens_seen": 317194240 }, { "epoch": 0.1, "learning_rate": 0.0009129353233830846, "loss": 0.0934, "theoretical_loss": 4.14758815920946, "tokens_seen": 317456384 }, { "epoch": 0.1, "learning_rate": 0.0009128550794415022, "loss": 0.0914, "theoretical_loss": 4.147192940390165, "tokens_seen": 317718528 }, { "epoch": 0.1, "learning_rate": 0.0009127748354999198, "loss": 0.0966, "theoretical_loss": 4.146798138743433, "tokens_seen": 317980672 }, { "epoch": 0.1, "learning_rate": 0.0009126945915583374, "loss": 0.09, "theoretical_loss": 4.146403753485544, "tokens_seen": 318242816 }, { "epoch": 0.1, "learning_rate": 0.000912614347616755, "loss": 0.0927, "theoretical_loss": 4.146009783834892, "tokens_seen": 318504960 }, { "epoch": 0.1, "learning_rate": 0.0009125341036751725, "loss": 0.095, "theoretical_loss": 4.145616229011987, "tokens_seen": 318767104 }, { "epoch": 0.1, "learning_rate": 0.0009124538597335902, "loss": 0.0958, "theoretical_loss": 4.145223088239432, "tokens_seen": 319029248 }, { "epoch": 0.1, "learning_rate": 0.0009123736157920077, "loss": 0.0922, "theoretical_loss": 4.14483036074193, "tokens_seen": 319291392 }, { "epoch": 0.1, "learning_rate": 0.0009122933718504252, "loss": 0.0958, "theoretical_loss": 4.14443804574627, "tokens_seen": 319553536 }, { "epoch": 0.1, "learning_rate": 0.000912213127908843, "loss": 0.0925, "theoretical_loss": 4.144046142481317, "tokens_seen": 319815680 }, { "epoch": 0.1, "learning_rate": 0.0009121328839672605, "loss": 0.0965, "theoretical_loss": 4.143654650178012, "tokens_seen": 320077824 }, { "epoch": 0.1, "learning_rate": 0.0009120526400256781, "loss": 0.0966, "theoretical_loss": 4.143263568069358, "tokens_seen": 320339968 }, { "epoch": 0.1, "learning_rate": 0.0009119723960840957, "loss": 0.0925, "theoretical_loss": 4.142872895390417, "tokens_seen": 320602112 }, { "epoch": 0.1, "learning_rate": 0.0009118921521425133, "loss": 0.0938, "theoretical_loss": 4.142482631378303, "tokens_seen": 320864256 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.00016702228458598256, "objective/train/docs_used": 123582, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8835115432739258, "objective/train/original_loss": 1.8835114240646362, "objective/train/theoretical_loss": 4.142092775272169, "objective/train/tokens_used": 341586400, "objective/train/value_avg": -0.009979248046875, "objective/train/value_loss": 0.000347665831213817, "objective/train/value_max": -0.0004355907440185547, "objective/train/value_min": -0.2398681640625, "objective/train/value_reward_corr": 0.6063551562843142, "objective/train/value_std": 0.012237548828125, "objective/train/weight_avg": 1.0003257989883423, "objective/train/weighted_lm_loss": 1.8840646743774414, "objective/train/weights_max": 1.1990368366241455, "objective/train/weights_min": 0.3737753629684448, "theoretical_loss": 4.142092775272169, "tokens_seen": 321126400 }, { "epoch": 0.1, "learning_rate": 0.0009118119082009308, "loss": 0.0913, "theoretical_loss": 4.142092775272169, "tokens_seen": 321126400 }, { "epoch": 0.1, "learning_rate": 0.0009117316642593484, "loss": 0.0893, "theoretical_loss": 4.141703326313209, "tokens_seen": 321388544 }, { "epoch": 0.1, "learning_rate": 0.000911651420317766, "loss": 0.0931, "theoretical_loss": 4.141314283744643, "tokens_seen": 321650688 }, { "epoch": 0.1, "learning_rate": 0.0009115711763761835, "loss": 0.0925, "theoretical_loss": 4.140925646811714, "tokens_seen": 321912832 }, { "epoch": 0.1, "learning_rate": 0.0009114909324346013, "loss": 0.0944, "theoretical_loss": 4.1405374147616785, "tokens_seen": 322174976 }, { "epoch": 0.1, "learning_rate": 0.0009114106884930188, "loss": 0.0928, "theoretical_loss": 4.140149586843803, "tokens_seen": 322437120 }, { "epoch": 0.1, "learning_rate": 0.0009113304445514365, "loss": 0.0937, "theoretical_loss": 4.139762162309355, "tokens_seen": 322699264 }, { "epoch": 0.1, "learning_rate": 0.000911250200609854, "loss": 0.0929, "theoretical_loss": 4.139375140411592, "tokens_seen": 322961408 }, { "epoch": 0.1, "learning_rate": 0.0009111699566682715, "loss": 0.091, "theoretical_loss": 4.138988520405764, "tokens_seen": 323223552 }, { "epoch": 0.1, "learning_rate": 0.0009110897127266892, "loss": 0.0919, "theoretical_loss": 4.138602301549097, "tokens_seen": 323485696 }, { "epoch": 0.1, "learning_rate": 0.0009110094687851067, "loss": 0.0917, "theoretical_loss": 4.138216483100795, "tokens_seen": 323747840 }, { "epoch": 0.1, "learning_rate": 0.0009109292248435243, "loss": 0.0897, "theoretical_loss": 4.137831064322021, "tokens_seen": 324009984 }, { "epoch": 0.1, "learning_rate": 0.0009108489809019419, "loss": 0.0901, "theoretical_loss": 4.1374460444759045, "tokens_seen": 324272128 }, { "epoch": 0.1, "learning_rate": 0.0009107687369603596, "loss": 0.092, "theoretical_loss": 4.137061422827525, "tokens_seen": 324534272 }, { "epoch": 0.1, "learning_rate": 0.0009106884930187771, "loss": 0.0937, "theoretical_loss": 4.136677198643908, "tokens_seen": 324796416 }, { "epoch": 0.1, "learning_rate": 0.0009106082490771947, "loss": 0.0912, "theoretical_loss": 4.13629337119402, "tokens_seen": 325058560 }, { "epoch": 0.1, "learning_rate": 0.0009105280051356123, "loss": 0.0919, "theoretical_loss": 4.135909939748757, "tokens_seen": 325320704 }, { "epoch": 0.1, "learning_rate": 0.0009104477611940298, "loss": 0.0915, "theoretical_loss": 4.135526903580946, "tokens_seen": 325582848 }, { "epoch": 0.1, "learning_rate": 0.0009103675172524475, "loss": 0.0933, "theoretical_loss": 4.135144261965327, "tokens_seen": 325844992 }, { "epoch": 0.1, "learning_rate": 0.000910287273310865, "loss": 0.0921, "theoretical_loss": 4.134762014178559, "tokens_seen": 326107136 }, { "epoch": 0.1, "learning_rate": 0.0009102070293692827, "loss": 0.092, "theoretical_loss": 4.134380159499204, "tokens_seen": 326369280 }, { "epoch": 0.1, "learning_rate": 0.0009101267854277002, "loss": 0.0928, "theoretical_loss": 4.1339986972077245, "tokens_seen": 326631424 }, { "epoch": 0.1, "learning_rate": 0.0009100465414861177, "loss": 0.0881, "theoretical_loss": 4.133617626586475, "tokens_seen": 326893568 }, { "epoch": 0.1, "learning_rate": 0.0009099662975445355, "loss": 0.0911, "theoretical_loss": 4.133236946919698, "tokens_seen": 327155712 }, { "epoch": 0.1, "learning_rate": 0.000909886053602953, "loss": 0.0969, "theoretical_loss": 4.132856657493516, "tokens_seen": 327417856 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.0012691118754446507, "objective/train/docs_used": 126013, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.943372130393982, "objective/train/original_loss": 1.9433720111846924, "objective/train/theoretical_loss": 4.132476757595925, "objective/train/tokens_used": 348140000, "objective/train/value_avg": -0.00861358642578125, "objective/train/value_loss": 0.0002645694767124951, "objective/train/value_max": -0.00031757354736328125, "objective/train/value_min": -0.280517578125, "objective/train/value_reward_corr": 0.6476616622246956, "objective/train/value_std": 0.01280975341796875, "objective/train/weight_avg": 1.0013933181762695, "objective/train/weighted_lm_loss": 1.9461336135864258, "objective/train/weights_max": 1.204296588897705, "objective/train/weights_min": 0.3697463870048523, "theoretical_loss": 4.132476757595925, "tokens_seen": 327680000 }, { "epoch": 0.1, "learning_rate": 0.0009098058096613706, "loss": 0.095, "theoretical_loss": 4.132476757595925, "tokens_seen": 327680000 }, { "epoch": 0.1, "learning_rate": 0.0009097255657197882, "loss": 0.0945, "theoretical_loss": 4.132097246516788, "tokens_seen": 327942144 }, { "epoch": 0.1, "learning_rate": 0.0009096453217782058, "loss": 0.094, "theoretical_loss": 4.131718123547829, "tokens_seen": 328204288 }, { "epoch": 0.1, "learning_rate": 0.0009095650778366233, "loss": 0.0928, "theoretical_loss": 4.131339387982628, "tokens_seen": 328466432 }, { "epoch": 0.1, "learning_rate": 0.000909484833895041, "loss": 0.091, "theoretical_loss": 4.1309610391166105, "tokens_seen": 328728576 }, { "epoch": 0.1, "learning_rate": 0.0009094045899534585, "loss": 0.09, "theoretical_loss": 4.1305830762470475, "tokens_seen": 328990720 }, { "epoch": 0.1, "learning_rate": 0.000909324346011876, "loss": 0.0925, "theoretical_loss": 4.1302054986730425, "tokens_seen": 329252864 }, { "epoch": 0.1, "learning_rate": 0.0009092441020702938, "loss": 0.0918, "theoretical_loss": 4.129828305695531, "tokens_seen": 329515008 }, { "epoch": 0.1, "learning_rate": 0.0009091638581287113, "loss": 0.0915, "theoretical_loss": 4.129451496617269, "tokens_seen": 329777152 }, { "epoch": 0.1, "learning_rate": 0.0009090836141871289, "loss": 0.0936, "theoretical_loss": 4.129075070742831, "tokens_seen": 330039296 }, { "epoch": 0.1, "learning_rate": 0.0009090033702455465, "loss": 0.0929, "theoretical_loss": 4.128699027378604, "tokens_seen": 330301440 }, { "epoch": 0.1, "learning_rate": 0.0009089231263039641, "loss": 0.0921, "theoretical_loss": 4.128323365832777, "tokens_seen": 330563584 }, { "epoch": 0.1, "learning_rate": 0.0009088428823623817, "loss": 0.0936, "theoretical_loss": 4.127948085415338, "tokens_seen": 330825728 }, { "epoch": 0.1, "learning_rate": 0.0009087626384207992, "loss": 0.0937, "theoretical_loss": 4.127573185438068, "tokens_seen": 331087872 }, { "epoch": 0.1, "learning_rate": 0.0009086823944792168, "loss": 0.0959, "theoretical_loss": 4.127198665214536, "tokens_seen": 331350016 }, { "epoch": 0.1, "learning_rate": 0.0009086021505376344, "loss": 0.0897, "theoretical_loss": 4.126824524060088, "tokens_seen": 331612160 }, { "epoch": 0.1, "learning_rate": 0.000908521906596052, "loss": 0.0925, "theoretical_loss": 4.126450761291847, "tokens_seen": 331874304 }, { "epoch": 0.1, "learning_rate": 0.0009084416626544696, "loss": 0.0948, "theoretical_loss": 4.126077376228702, "tokens_seen": 332136448 }, { "epoch": 0.1, "learning_rate": 0.0009083614187128873, "loss": 0.0874, "theoretical_loss": 4.1257043681913075, "tokens_seen": 332398592 }, { "epoch": 0.1, "learning_rate": 0.0009082811747713048, "loss": 0.0917, "theoretical_loss": 4.125331736502073, "tokens_seen": 332660736 }, { "epoch": 0.1, "learning_rate": 0.0009082009308297223, "loss": 0.0937, "theoretical_loss": 4.124959480485156, "tokens_seen": 332922880 }, { "epoch": 0.1, "learning_rate": 0.00090812068688814, "loss": 0.0899, "theoretical_loss": 4.124587599466462, "tokens_seen": 333185024 }, { "epoch": 0.1, "learning_rate": 0.0009080404429465575, "loss": 0.0936, "theoretical_loss": 4.124216092773635, "tokens_seen": 333447168 }, { "epoch": 0.1, "learning_rate": 0.0009079601990049751, "loss": 0.0895, "theoretical_loss": 4.123844959736049, "tokens_seen": 333709312 }, { "epoch": 0.1, "learning_rate": 0.0009078799550633927, "loss": 0.091, "theoretical_loss": 4.123474199684807, "tokens_seen": 333971456 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.002065310487523675, "objective/train/docs_used": 128373, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.9022668600082397, "objective/train/original_loss": 1.9022668600082397, "objective/train/theoretical_loss": 4.123103811952736, "objective/train/tokens_used": 354693600, "objective/train/value_avg": -0.00934600830078125, "objective/train/value_loss": 0.00017411113367415965, "objective/train/value_max": -0.000469207763671875, "objective/train/value_min": -0.1983642578125, "objective/train/value_reward_corr": 0.5511531255621919, "objective/train/value_std": 0.00977325439453125, "objective/train/weight_avg": 1.0021498203277588, "objective/train/weighted_lm_loss": 1.9055767059326172, "objective/train/weights_max": 1.1557458639144897, "objective/train/weights_min": 0.36877235770225525, "theoretical_loss": 4.123103811952736, "tokens_seen": 334233600 }, { "epoch": 0.1, "learning_rate": 0.0009077997111218103, "loss": 0.0956, "theoretical_loss": 4.123103811952736, "tokens_seen": 334233600 }, { "epoch": 0.1, "learning_rate": 0.000907719467180228, "loss": 0.089, "theoretical_loss": 4.122733795874372, "tokens_seen": 334495744 }, { "epoch": 0.1, "learning_rate": 0.0009076392232386455, "loss": 0.0941, "theoretical_loss": 4.122364150785966, "tokens_seen": 334757888 }, { "epoch": 0.1, "learning_rate": 0.0009075589792970631, "loss": 0.0942, "theoretical_loss": 4.1219948760254725, "tokens_seen": 335020032 }, { "epoch": 0.1, "learning_rate": 0.0009074787353554807, "loss": 0.093, "theoretical_loss": 4.121625970932542, "tokens_seen": 335282176 }, { "epoch": 0.1, "learning_rate": 0.0009073984914138983, "loss": 0.0926, "theoretical_loss": 4.121257434848519, "tokens_seen": 335544320 }, { "epoch": 0.1, "learning_rate": 0.0009073182474723158, "loss": 0.0921, "theoretical_loss": 4.120889267116435, "tokens_seen": 335806464 }, { "epoch": 0.1, "learning_rate": 0.0009072380035307335, "loss": 0.0923, "theoretical_loss": 4.1205214670810015, "tokens_seen": 336068608 }, { "epoch": 0.1, "learning_rate": 0.000907157759589151, "loss": 0.0923, "theoretical_loss": 4.120154034088609, "tokens_seen": 336330752 }, { "epoch": 0.1, "learning_rate": 0.0009070775156475685, "loss": 0.0922, "theoretical_loss": 4.119786967487314, "tokens_seen": 336592896 }, { "epoch": 0.1, "learning_rate": 0.0009069972717059863, "loss": 0.0935, "theoretical_loss": 4.11942026662684, "tokens_seen": 336855040 }, { "epoch": 0.1, "learning_rate": 0.0009069170277644038, "loss": 0.0896, "theoretical_loss": 4.11905393085857, "tokens_seen": 337117184 }, { "epoch": 0.1, "learning_rate": 0.0009068367838228214, "loss": 0.0926, "theoretical_loss": 4.118687959535539, "tokens_seen": 337379328 }, { "epoch": 0.1, "learning_rate": 0.000906756539881239, "loss": 0.0906, "theoretical_loss": 4.118322352012429, "tokens_seen": 337641472 }, { "epoch": 0.1, "learning_rate": 0.0009066762959396566, "loss": 0.0902, "theoretical_loss": 4.117957107645569, "tokens_seen": 337903616 }, { "epoch": 0.1, "learning_rate": 0.0009065960519980741, "loss": 0.0925, "theoretical_loss": 4.1175922257929205, "tokens_seen": 338165760 }, { "epoch": 0.1, "learning_rate": 0.0009065158080564918, "loss": 0.0945, "theoretical_loss": 4.117227705814078, "tokens_seen": 338427904 }, { "epoch": 0.1, "learning_rate": 0.0009064355641149093, "loss": 0.0971, "theoretical_loss": 4.116863547070264, "tokens_seen": 338690048 }, { "epoch": 0.1, "learning_rate": 0.0009063553201733269, "loss": 0.0927, "theoretical_loss": 4.116499748924319, "tokens_seen": 338952192 }, { "epoch": 0.1, "learning_rate": 0.0009062750762317446, "loss": 0.093, "theoretical_loss": 4.116136310740702, "tokens_seen": 339214336 }, { "epoch": 0.1, "learning_rate": 0.0009061948322901621, "loss": 0.0871, "theoretical_loss": 4.115773231885479, "tokens_seen": 339476480 }, { "epoch": 0.1, "learning_rate": 0.0009061145883485798, "loss": 0.0904, "theoretical_loss": 4.115410511726323, "tokens_seen": 339738624 }, { "epoch": 0.1, "learning_rate": 0.0009060343444069973, "loss": 0.0921, "theoretical_loss": 4.115048149632507, "tokens_seen": 340000768 }, { "epoch": 0.1, "learning_rate": 0.0009059541004654149, "loss": 0.0933, "theoretical_loss": 4.114686144974897, "tokens_seen": 340262912 }, { "epoch": 0.1, "learning_rate": 0.0009058738565238325, "loss": 0.0923, "theoretical_loss": 4.114324497125947, "tokens_seen": 340525056 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.0013270352501422167, "objective/train/docs_used": 130755, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.773648977279663, "objective/train/original_loss": 1.7736492156982422, "objective/train/theoretical_loss": 4.113963205459697, "objective/train/tokens_used": 361247200, "objective/train/value_avg": -0.0078582763671875, "objective/train/value_loss": 0.0002814758918248117, "objective/train/value_max": -0.0004305839538574219, "objective/train/value_min": -0.75634765625, "objective/train/value_reward_corr": 0.6420233629882061, "objective/train/value_std": 0.01309967041015625, "objective/train/weight_avg": 1.0014532804489136, "objective/train/weighted_lm_loss": 1.775871753692627, "objective/train/weights_max": 1.4132821559906006, "objective/train/weights_min": 0.38253408670425415, "theoretical_loss": 4.113963205459697, "tokens_seen": 340787200 }, { "epoch": 0.1, "learning_rate": 0.00090579361258225, "loss": 0.0906, "theoretical_loss": 4.113963205459697, "tokens_seen": 340787200 }, { "epoch": 0.1, "learning_rate": 0.0009057133686406676, "loss": 0.091, "theoretical_loss": 4.113602269351765, "tokens_seen": 341049344 }, { "epoch": 0.1, "learning_rate": 0.0009056331246990852, "loss": 0.0903, "theoretical_loss": 4.113241688179341, "tokens_seen": 341311488 }, { "epoch": 0.1, "learning_rate": 0.0009055528807575029, "loss": 0.093, "theoretical_loss": 4.1128814613211855, "tokens_seen": 341573632 }, { "epoch": 0.1, "learning_rate": 0.0009054726368159204, "loss": 0.0896, "theoretical_loss": 4.1125215881576205, "tokens_seen": 341835776 }, { "epoch": 0.1, "learning_rate": 0.0009053923928743381, "loss": 0.0899, "theoretical_loss": 4.112162068070525, "tokens_seen": 342097920 }, { "epoch": 0.1, "learning_rate": 0.0009053121489327556, "loss": 0.0911, "theoretical_loss": 4.111802900443333, "tokens_seen": 342360064 }, { "epoch": 0.1, "learning_rate": 0.0009052319049911731, "loss": 0.0891, "theoretical_loss": 4.111444084661026, "tokens_seen": 342622208 }, { "epoch": 0.1, "learning_rate": 0.0009051516610495908, "loss": 0.0888, "theoretical_loss": 4.111085620110127, "tokens_seen": 342884352 }, { "epoch": 0.1, "learning_rate": 0.0009050714171080083, "loss": 0.0914, "theoretical_loss": 4.110727506178697, "tokens_seen": 343146496 }, { "epoch": 0.1, "learning_rate": 0.000904991173166426, "loss": 0.0929, "theoretical_loss": 4.110369742256329, "tokens_seen": 343408640 }, { "epoch": 0.1, "learning_rate": 0.0009049109292248435, "loss": 0.09, "theoretical_loss": 4.110012327734145, "tokens_seen": 343670784 }, { "epoch": 0.1, "learning_rate": 0.0009048306852832611, "loss": 0.091, "theoretical_loss": 4.1096552620047895, "tokens_seen": 343932928 }, { "epoch": 0.1, "learning_rate": 0.0009047504413416788, "loss": 0.0921, "theoretical_loss": 4.109298544462423, "tokens_seen": 344195072 }, { "epoch": 0.1, "learning_rate": 0.0009046701974000963, "loss": 0.0867, "theoretical_loss": 4.108942174502721, "tokens_seen": 344457216 }, { "epoch": 0.1, "learning_rate": 0.0009045899534585139, "loss": 0.0907, "theoretical_loss": 4.108586151522863, "tokens_seen": 344719360 }, { "epoch": 0.1, "learning_rate": 0.0009045097095169315, "loss": 0.0932, "theoretical_loss": 4.1082304749215375, "tokens_seen": 344981504 }, { "epoch": 0.1, "learning_rate": 0.0009044294655753491, "loss": 0.091, "theoretical_loss": 4.107875144098925, "tokens_seen": 345243648 }, { "epoch": 0.1, "learning_rate": 0.0009043492216337666, "loss": 0.0949, "theoretical_loss": 4.107520158456703, "tokens_seen": 345505792 }, { "epoch": 0.1, "learning_rate": 0.0009042689776921843, "loss": 0.0901, "theoretical_loss": 4.107165517398034, "tokens_seen": 345767936 }, { "epoch": 0.1, "learning_rate": 0.0009041887337506018, "loss": 0.0926, "theoretical_loss": 4.106811220327568, "tokens_seen": 346030080 }, { "epoch": 0.1, "learning_rate": 0.0009041084898090193, "loss": 0.0906, "theoretical_loss": 4.10645726665143, "tokens_seen": 346292224 }, { "epoch": 0.11, "learning_rate": 0.0009040282458674371, "loss": 0.0934, "theoretical_loss": 4.10610365577722, "tokens_seen": 346554368 }, { "epoch": 0.11, "learning_rate": 0.0009039480019258546, "loss": 0.0913, "theoretical_loss": 4.105750387114009, "tokens_seen": 346816512 }, { "epoch": 0.11, "learning_rate": 0.0009038677579842723, "loss": 0.0905, "theoretical_loss": 4.105397460072329, "tokens_seen": 347078656 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.002003602683544159, "objective/train/docs_used": 133174, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.733391523361206, "objective/train/original_loss": 1.7333914041519165, "objective/train/theoretical_loss": 4.105044874064174, "objective/train/tokens_used": 367800800, "objective/train/value_avg": -0.0088043212890625, "objective/train/value_loss": 0.00023858436907175928, "objective/train/value_max": -0.0003998279571533203, "objective/train/value_min": -0.9033203125, "objective/train/value_reward_corr": 0.6106213393331679, "objective/train/value_std": 0.0132904052734375, "objective/train/weight_avg": 1.002118468284607, "objective/train/weighted_lm_loss": 1.736554741859436, "objective/train/weights_max": 2.2593677043914795, "objective/train/weights_min": 0.37059226632118225, "theoretical_loss": 4.105044874064174, "tokens_seen": 347340800 }, { "epoch": 0.11, "learning_rate": 0.0009037875140426898, "loss": 0.0871, "theoretical_loss": 4.105044874064174, "tokens_seen": 347340800 }, { "epoch": 0.11, "learning_rate": 0.0009037072701011074, "loss": 0.0909, "theoretical_loss": 4.104692628502993, "tokens_seen": 347602944 }, { "epoch": 0.11, "learning_rate": 0.000903627026159525, "loss": 0.0895, "theoretical_loss": 4.104340722803683, "tokens_seen": 347865088 }, { "epoch": 0.11, "learning_rate": 0.0009035467822179425, "loss": 0.0903, "theoretical_loss": 4.103989156382589, "tokens_seen": 348127232 }, { "epoch": 0.11, "learning_rate": 0.0009034665382763601, "loss": 0.0873, "theoretical_loss": 4.103637928657495, "tokens_seen": 348389376 }, { "epoch": 0.11, "learning_rate": 0.0009033862943347777, "loss": 0.0953, "theoretical_loss": 4.103287039047622, "tokens_seen": 348651520 }, { "epoch": 0.11, "learning_rate": 0.0009033060503931954, "loss": 0.0901, "theoretical_loss": 4.102936486973624, "tokens_seen": 348913664 }, { "epoch": 0.11, "learning_rate": 0.0009032258064516129, "loss": 0.0896, "theoretical_loss": 4.102586271857579, "tokens_seen": 349175808 }, { "epoch": 0.11, "learning_rate": 0.0009031455625100306, "loss": 0.0882, "theoretical_loss": 4.102236393122989, "tokens_seen": 349437952 }, { "epoch": 0.11, "learning_rate": 0.0009030653185684481, "loss": 0.0915, "theoretical_loss": 4.101886850194775, "tokens_seen": 349700096 }, { "epoch": 0.11, "learning_rate": 0.0009029850746268657, "loss": 0.0882, "theoretical_loss": 4.10153764249927, "tokens_seen": 349962240 }, { "epoch": 0.11, "learning_rate": 0.0009029048306852833, "loss": 0.0884, "theoretical_loss": 4.1011887694642155, "tokens_seen": 350224384 }, { "epoch": 0.11, "learning_rate": 0.0009028245867437008, "loss": 0.0897, "theoretical_loss": 4.100840230518759, "tokens_seen": 350486528 }, { "epoch": 0.11, "learning_rate": 0.0009027443428021184, "loss": 0.0918, "theoretical_loss": 4.100492025093445, "tokens_seen": 350748672 }, { "epoch": 0.11, "learning_rate": 0.000902664098860536, "loss": 0.0919, "theoretical_loss": 4.100144152620215, "tokens_seen": 351010816 }, { "epoch": 0.11, "learning_rate": 0.0009025838549189537, "loss": 0.0904, "theoretical_loss": 4.099796612532403, "tokens_seen": 351272960 }, { "epoch": 0.11, "learning_rate": 0.0009025036109773713, "loss": 0.0898, "theoretical_loss": 4.0994494042647265, "tokens_seen": 351535104 }, { "epoch": 0.11, "learning_rate": 0.0009024233670357889, "loss": 0.0904, "theoretical_loss": 4.099102527253285, "tokens_seen": 351797248 }, { "epoch": 0.11, "learning_rate": 0.0009023431230942064, "loss": 0.0923, "theoretical_loss": 4.098755980935557, "tokens_seen": 352059392 }, { "epoch": 0.11, "learning_rate": 0.000902262879152624, "loss": 0.0893, "theoretical_loss": 4.0984097647503965, "tokens_seen": 352321536 }, { "epoch": 0.11, "learning_rate": 0.0009021826352110416, "loss": 0.0903, "theoretical_loss": 4.09806387813802, "tokens_seen": 352583680 }, { "epoch": 0.11, "learning_rate": 0.0009021023912694591, "loss": 0.0875, "theoretical_loss": 4.0977183205400145, "tokens_seen": 352845824 }, { "epoch": 0.11, "learning_rate": 0.0009020221473278768, "loss": 0.0897, "theoretical_loss": 4.097373091399324, "tokens_seen": 353107968 }, { "epoch": 0.11, "learning_rate": 0.0009019419033862943, "loss": 0.0896, "theoretical_loss": 4.097028190160249, "tokens_seen": 353370112 }, { "epoch": 0.11, "learning_rate": 0.000901861659444712, "loss": 0.0909, "theoretical_loss": 4.096683616268442, "tokens_seen": 353632256 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.003045022254809737, "objective/train/docs_used": 135614, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.740461826324463, "objective/train/original_loss": 1.7404619455337524, "objective/train/theoretical_loss": 4.096339369170902, "objective/train/tokens_used": 374354400, "objective/train/value_avg": -0.01047515869140625, "objective/train/value_loss": 0.0004944366519339383, "objective/train/value_max": -0.00038909912109375, "objective/train/value_min": -0.673828125, "objective/train/value_reward_corr": 0.5444227902078449, "objective/train/value_std": 0.01522064208984375, "objective/train/weight_avg": 1.00326406955719, "objective/train/weighted_lm_loss": 1.7446165084838867, "objective/train/weights_max": 1.2945656776428223, "objective/train/weights_min": 0.3746718466281891, "theoretical_loss": 4.096339369170902, "tokens_seen": 353894400 }, { "epoch": 0.11, "learning_rate": 0.0009017814155031296, "loss": 0.0898, "theoretical_loss": 4.096339369170902, "tokens_seen": 353894400 }, { "epoch": 0.11, "learning_rate": 0.0009017011715615471, "loss": 0.0909, "theoretical_loss": 4.095995448315972, "tokens_seen": 354156544 }, { "epoch": 0.11, "learning_rate": 0.0009016209276199647, "loss": 0.0899, "theoretical_loss": 4.095651853153331, "tokens_seen": 354418688 }, { "epoch": 0.11, "learning_rate": 0.0009015406836783823, "loss": 0.0912, "theoretical_loss": 4.095308583133997, "tokens_seen": 354680832 }, { "epoch": 0.11, "learning_rate": 0.0009014604397367999, "loss": 0.0915, "theoretical_loss": 4.094965637710314, "tokens_seen": 354942976 }, { "epoch": 0.11, "learning_rate": 0.0009013801957952175, "loss": 0.0903, "theoretical_loss": 4.094623016335954, "tokens_seen": 355205120 }, { "epoch": 0.11, "learning_rate": 0.0009012999518536351, "loss": 0.0899, "theoretical_loss": 4.094280718465911, "tokens_seen": 355467264 }, { "epoch": 0.11, "learning_rate": 0.0009012197079120526, "loss": 0.09, "theoretical_loss": 4.093938743556496, "tokens_seen": 355729408 }, { "epoch": 0.11, "learning_rate": 0.0009011394639704702, "loss": 0.0889, "theoretical_loss": 4.093597091065333, "tokens_seen": 355991552 }, { "epoch": 0.11, "learning_rate": 0.0009010592200288879, "loss": 0.089, "theoretical_loss": 4.093255760451357, "tokens_seen": 356253696 }, { "epoch": 0.11, "learning_rate": 0.0009009789760873054, "loss": 0.0883, "theoretical_loss": 4.092914751174808, "tokens_seen": 356515840 }, { "epoch": 0.11, "learning_rate": 0.0009008987321457231, "loss": 0.0898, "theoretical_loss": 4.092574062697225, "tokens_seen": 356777984 }, { "epoch": 0.11, "learning_rate": 0.0009008184882041406, "loss": 0.0881, "theoretical_loss": 4.092233694481447, "tokens_seen": 357040128 }, { "epoch": 0.11, "learning_rate": 0.0009007382442625582, "loss": 0.0927, "theoretical_loss": 4.091893645991604, "tokens_seen": 357302272 }, { "epoch": 0.11, "learning_rate": 0.0009006580003209758, "loss": 0.0913, "theoretical_loss": 4.091553916693115, "tokens_seen": 357564416 }, { "epoch": 0.11, "learning_rate": 0.0009005777563793933, "loss": 0.0859, "theoretical_loss": 4.091214506052687, "tokens_seen": 357826560 }, { "epoch": 0.11, "learning_rate": 0.0009004975124378109, "loss": 0.0841, "theoretical_loss": 4.090875413538302, "tokens_seen": 358088704 }, { "epoch": 0.11, "learning_rate": 0.0009004172684962285, "loss": 0.0921, "theoretical_loss": 4.090536638619224, "tokens_seen": 358350848 }, { "epoch": 0.11, "learning_rate": 0.0009003370245546462, "loss": 0.0872, "theoretical_loss": 4.090198180765989, "tokens_seen": 358612992 }, { "epoch": 0.11, "learning_rate": 0.0009002567806130637, "loss": 0.0893, "theoretical_loss": 4.0898600394504, "tokens_seen": 358875136 }, { "epoch": 0.11, "learning_rate": 0.0009001765366714814, "loss": 0.0888, "theoretical_loss": 4.089522214145525, "tokens_seen": 359137280 }, { "epoch": 0.11, "learning_rate": 0.0009000962927298989, "loss": 0.0911, "theoretical_loss": 4.089184704325695, "tokens_seen": 359399424 }, { "epoch": 0.11, "learning_rate": 0.0009000160487883166, "loss": 0.0886, "theoretical_loss": 4.088847509466497, "tokens_seen": 359661568 }, { "epoch": 0.11, "learning_rate": 0.0008999358048467341, "loss": 0.0885, "theoretical_loss": 4.088510629044771, "tokens_seen": 359923712 }, { "epoch": 0.11, "learning_rate": 0.0008998555609051516, "loss": 0.0885, "theoretical_loss": 4.088174062538605, "tokens_seen": 360185856 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.0014361967332661152, "objective/train/docs_used": 137426, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.912741780281067, "objective/train/original_loss": 1.9127418994903564, "objective/train/theoretical_loss": 4.087837809427334, "objective/train/tokens_used": 380908000, "objective/train/value_avg": -0.0081939697265625, "objective/train/value_loss": 0.00017770560225471854, "objective/train/value_max": -0.0002913475036621094, "objective/train/value_min": -0.2374267578125, "objective/train/value_reward_corr": 0.6243051078626306, "objective/train/value_std": 0.0115814208984375, "objective/train/weight_avg": 1.0015201568603516, "objective/train/weighted_lm_loss": 1.9144210815429688, "objective/train/weights_max": 1.1172159910202026, "objective/train/weights_min": 0.36891940236091614, "theoretical_loss": 4.087837809427334, "tokens_seen": 360448000 }, { "epoch": 0.11, "learning_rate": 0.0008997753169635693, "loss": 0.093, "theoretical_loss": 4.087837809427334, "tokens_seen": 360448000 }, { "epoch": 0.11, "learning_rate": 0.0008996950730219868, "loss": 0.088, "theoretical_loss": 4.087501869191536, "tokens_seen": 360710144 }, { "epoch": 0.11, "learning_rate": 0.0008996148290804045, "loss": 0.0877, "theoretical_loss": 4.087166241313023, "tokens_seen": 360972288 }, { "epoch": 0.11, "learning_rate": 0.0008995345851388221, "loss": 0.0925, "theoretical_loss": 4.086830925274842, "tokens_seen": 361234432 }, { "epoch": 0.11, "learning_rate": 0.0008994543411972397, "loss": 0.0887, "theoretical_loss": 4.08649592056127, "tokens_seen": 361496576 }, { "epoch": 0.11, "learning_rate": 0.0008993740972556572, "loss": 0.0926, "theoretical_loss": 4.086161226657811, "tokens_seen": 361758720 }, { "epoch": 0.11, "learning_rate": 0.0008992938533140748, "loss": 0.0857, "theoretical_loss": 4.085826843051191, "tokens_seen": 362020864 }, { "epoch": 0.11, "learning_rate": 0.0008992136093724924, "loss": 0.0915, "theoretical_loss": 4.0854927692293534, "tokens_seen": 362283008 }, { "epoch": 0.11, "learning_rate": 0.0008991333654309099, "loss": 0.0891, "theoretical_loss": 4.085159004681458, "tokens_seen": 362545152 }, { "epoch": 0.11, "learning_rate": 0.0008990531214893276, "loss": 0.0904, "theoretical_loss": 4.084825548897873, "tokens_seen": 362807296 }, { "epoch": 0.11, "learning_rate": 0.0008989728775477451, "loss": 0.0932, "theoretical_loss": 4.084492401370177, "tokens_seen": 363069440 }, { "epoch": 0.11, "learning_rate": 0.0008988926336061629, "loss": 0.0911, "theoretical_loss": 4.08415956159115, "tokens_seen": 363331584 }, { "epoch": 0.11, "learning_rate": 0.0008988123896645804, "loss": 0.0894, "theoretical_loss": 4.083827029054773, "tokens_seen": 363593728 }, { "epoch": 0.11, "learning_rate": 0.0008987321457229979, "loss": 0.0912, "theoretical_loss": 4.0834948032562215, "tokens_seen": 363855872 }, { "epoch": 0.11, "learning_rate": 0.0008986519017814156, "loss": 0.0909, "theoretical_loss": 4.083162883691864, "tokens_seen": 364118016 }, { "epoch": 0.11, "learning_rate": 0.0008985716578398331, "loss": 0.0913, "theoretical_loss": 4.082831269859261, "tokens_seen": 364380160 }, { "epoch": 0.11, "learning_rate": 0.0008984914138982507, "loss": 0.0901, "theoretical_loss": 4.0824999612571515, "tokens_seen": 364642304 }, { "epoch": 0.11, "learning_rate": 0.0008984111699566683, "loss": 0.0889, "theoretical_loss": 4.082168957385462, "tokens_seen": 364904448 }, { "epoch": 0.11, "learning_rate": 0.0008983309260150859, "loss": 0.0917, "theoretical_loss": 4.081838257745293, "tokens_seen": 365166592 }, { "epoch": 0.11, "learning_rate": 0.0008982506820735034, "loss": 0.0873, "theoretical_loss": 4.081507861838922, "tokens_seen": 365428736 }, { "epoch": 0.11, "learning_rate": 0.000898170438131921, "loss": 0.0901, "theoretical_loss": 4.081177769169795, "tokens_seen": 365690880 }, { "epoch": 0.11, "learning_rate": 0.0008980901941903387, "loss": 0.0872, "theoretical_loss": 4.080847979242526, "tokens_seen": 365953024 }, { "epoch": 0.11, "learning_rate": 0.0008980099502487562, "loss": 0.0898, "theoretical_loss": 4.0805184915628905, "tokens_seen": 366215168 }, { "epoch": 0.11, "learning_rate": 0.0008979297063071739, "loss": 0.0891, "theoretical_loss": 4.080189305637827, "tokens_seen": 366477312 }, { "epoch": 0.11, "learning_rate": 0.0008978494623655914, "loss": 0.0893, "theoretical_loss": 4.079860420975429, "tokens_seen": 366739456 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.0024263346567749977, "objective/train/docs_used": 139849, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8241596221923828, "objective/train/original_loss": 1.8241596221923828, "objective/train/theoretical_loss": 4.07953183708494, "objective/train/tokens_used": 387461600, "objective/train/value_avg": -0.0073089599609375, "objective/train/value_loss": 0.00016963874804787338, "objective/train/value_max": -0.00034999847412109375, "objective/train/value_min": -0.385498046875, "objective/train/value_reward_corr": 0.4735220244507201, "objective/train/value_std": 0.008209228515625, "objective/train/weight_avg": 1.002504587173462, "objective/train/weighted_lm_loss": 1.8287756443023682, "objective/train/weights_max": 1.386350154876709, "objective/train/weights_min": 0.3713734447956085, "theoretical_loss": 4.07953183708494, "tokens_seen": 367001600 }, { "epoch": 0.11, "learning_rate": 0.000897769218424009, "loss": 0.0945, "theoretical_loss": 4.07953183708494, "tokens_seen": 367001600 }, { "epoch": 0.11, "learning_rate": 0.0008976889744824266, "loss": 0.0911, "theoretical_loss": 4.079203553476759, "tokens_seen": 367263744 }, { "epoch": 0.11, "learning_rate": 0.0008976087305408441, "loss": 0.0914, "theoretical_loss": 4.078875569662424, "tokens_seen": 367525888 }, { "epoch": 0.11, "learning_rate": 0.0008975284865992618, "loss": 0.0883, "theoretical_loss": 4.07854788515462, "tokens_seen": 367788032 }, { "epoch": 0.11, "learning_rate": 0.0008974482426576793, "loss": 0.0893, "theoretical_loss": 4.078220499467168, "tokens_seen": 368050176 }, { "epoch": 0.11, "learning_rate": 0.000897367998716097, "loss": 0.0896, "theoretical_loss": 4.077893412115025, "tokens_seen": 368312320 }, { "epoch": 0.11, "learning_rate": 0.0008972877547745146, "loss": 0.0872, "theoretical_loss": 4.077566622614281, "tokens_seen": 368574464 }, { "epoch": 0.11, "learning_rate": 0.0008972075108329322, "loss": 0.089, "theoretical_loss": 4.077240130482153, "tokens_seen": 368836608 }, { "epoch": 0.11, "learning_rate": 0.0008971272668913497, "loss": 0.0912, "theoretical_loss": 4.076913935236982, "tokens_seen": 369098752 }, { "epoch": 0.11, "learning_rate": 0.0008970470229497674, "loss": 0.089, "theoretical_loss": 4.076588036398235, "tokens_seen": 369360896 }, { "epoch": 0.11, "learning_rate": 0.0008969667790081849, "loss": 0.0887, "theoretical_loss": 4.076262433486491, "tokens_seen": 369623040 }, { "epoch": 0.11, "learning_rate": 0.0008968865350666024, "loss": 0.0909, "theoretical_loss": 4.075937126023448, "tokens_seen": 369885184 }, { "epoch": 0.11, "learning_rate": 0.0008968062911250201, "loss": 0.0886, "theoretical_loss": 4.075612113531915, "tokens_seen": 370147328 }, { "epoch": 0.11, "learning_rate": 0.0008967260471834376, "loss": 0.0882, "theoretical_loss": 4.075287395535807, "tokens_seen": 370409472 }, { "epoch": 0.11, "learning_rate": 0.0008966458032418553, "loss": 0.0894, "theoretical_loss": 4.074962971560145, "tokens_seen": 370671616 }, { "epoch": 0.11, "learning_rate": 0.0008965655593002729, "loss": 0.0882, "theoretical_loss": 4.074638841131052, "tokens_seen": 370933760 }, { "epoch": 0.11, "learning_rate": 0.0008964853153586905, "loss": 0.09, "theoretical_loss": 4.074315003775746, "tokens_seen": 371195904 }, { "epoch": 0.11, "learning_rate": 0.000896405071417108, "loss": 0.0913, "theoretical_loss": 4.073991459022544, "tokens_seen": 371458048 }, { "epoch": 0.11, "learning_rate": 0.0008963248274755256, "loss": 0.0868, "theoretical_loss": 4.073668206400851, "tokens_seen": 371720192 }, { "epoch": 0.11, "learning_rate": 0.0008962445835339432, "loss": 0.0952, "theoretical_loss": 4.0733452454411605, "tokens_seen": 371982336 }, { "epoch": 0.11, "learning_rate": 0.0008961643395923608, "loss": 0.0901, "theoretical_loss": 4.0730225756750515, "tokens_seen": 372244480 }, { "epoch": 0.11, "learning_rate": 0.0008960840956507784, "loss": 0.0874, "theoretical_loss": 4.072700196635185, "tokens_seen": 372506624 }, { "epoch": 0.11, "learning_rate": 0.0008960038517091959, "loss": 0.0904, "theoretical_loss": 4.072378107855299, "tokens_seen": 372768768 }, { "epoch": 0.11, "learning_rate": 0.0008959236077676137, "loss": 0.0864, "theoretical_loss": 4.0720563088702075, "tokens_seen": 373030912 }, { "epoch": 0.11, "learning_rate": 0.0008958433638260312, "loss": 0.0887, "theoretical_loss": 4.071734799215796, "tokens_seen": 373293056 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.0020079510286450386, "objective/train/docs_used": 142333, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8369685411453247, "objective/train/original_loss": 1.8369685411453247, "objective/train/theoretical_loss": 4.071413578429017, "objective/train/tokens_used": 394015200, "objective/train/value_avg": -0.01097869873046875, "objective/train/value_loss": 0.000343000196153298, "objective/train/value_max": -0.00045299530029296875, "objective/train/value_min": -0.6923828125, "objective/train/value_reward_corr": 0.672175762308715, "objective/train/value_std": 0.0175018310546875, "objective/train/weight_avg": 1.0021660327911377, "objective/train/weighted_lm_loss": 1.8394285440444946, "objective/train/weights_max": 1.6616524457931519, "objective/train/weights_min": 0.37235790491104126, "theoretical_loss": 4.071413578429017, "tokens_seen": 373555200 }, { "epoch": 0.11, "learning_rate": 0.0008957631198844487, "loss": 0.0874, "theoretical_loss": 4.071413578429017, "tokens_seen": 373555200 }, { "epoch": 0.11, "learning_rate": 0.0008956828759428664, "loss": 0.0895, "theoretical_loss": 4.071092646047892, "tokens_seen": 373817344 }, { "epoch": 0.11, "learning_rate": 0.0008956026320012839, "loss": 0.088, "theoretical_loss": 4.0707720016115, "tokens_seen": 374079488 }, { "epoch": 0.11, "learning_rate": 0.0008955223880597015, "loss": 0.0872, "theoretical_loss": 4.0704516446599825, "tokens_seen": 374341632 }, { "epoch": 0.11, "learning_rate": 0.0008954421441181191, "loss": 0.0915, "theoretical_loss": 4.070131574734536, "tokens_seen": 374603776 }, { "epoch": 0.11, "learning_rate": 0.0008953619001765367, "loss": 0.0894, "theoretical_loss": 4.069811791377409, "tokens_seen": 374865920 }, { "epoch": 0.11, "learning_rate": 0.0008952816562349542, "loss": 0.0883, "theoretical_loss": 4.0694922941319, "tokens_seen": 375128064 }, { "epoch": 0.11, "learning_rate": 0.0008952014122933718, "loss": 0.091, "theoretical_loss": 4.069173082542351, "tokens_seen": 375390208 }, { "epoch": 0.11, "learning_rate": 0.0008951211683517895, "loss": 0.0935, "theoretical_loss": 4.068854156154154, "tokens_seen": 375652352 }, { "epoch": 0.11, "learning_rate": 0.000895040924410207, "loss": 0.0902, "theoretical_loss": 4.068535514513734, "tokens_seen": 375914496 }, { "epoch": 0.11, "learning_rate": 0.0008949606804686247, "loss": 0.0913, "theoretical_loss": 4.068217157168556, "tokens_seen": 376176640 }, { "epoch": 0.11, "learning_rate": 0.0008948804365270422, "loss": 0.0892, "theoretical_loss": 4.06789908366712, "tokens_seen": 376438784 }, { "epoch": 0.11, "learning_rate": 0.0008948001925854599, "loss": 0.0916, "theoretical_loss": 4.067581293558955, "tokens_seen": 376700928 }, { "epoch": 0.11, "learning_rate": 0.0008947199486438774, "loss": 0.0913, "theoretical_loss": 4.0672637863946175, "tokens_seen": 376963072 }, { "epoch": 0.11, "learning_rate": 0.0008946397047022949, "loss": 0.0882, "theoretical_loss": 4.0669465617256915, "tokens_seen": 377225216 }, { "epoch": 0.11, "learning_rate": 0.0008945594607607126, "loss": 0.0874, "theoretical_loss": 4.06662961910478, "tokens_seen": 377487360 }, { "epoch": 0.11, "learning_rate": 0.0008944792168191301, "loss": 0.0868, "theoretical_loss": 4.066312958085503, "tokens_seen": 377749504 }, { "epoch": 0.11, "learning_rate": 0.0008943989728775478, "loss": 0.0875, "theoretical_loss": 4.065996578222502, "tokens_seen": 378011648 }, { "epoch": 0.11, "learning_rate": 0.0008943187289359654, "loss": 0.0902, "theoretical_loss": 4.065680479071426, "tokens_seen": 378273792 }, { "epoch": 0.11, "learning_rate": 0.000894238484994383, "loss": 0.0869, "theoretical_loss": 4.0653646601889335, "tokens_seen": 378535936 }, { "epoch": 0.11, "learning_rate": 0.0008941582410528005, "loss": 0.0885, "theoretical_loss": 4.065049121132693, "tokens_seen": 378798080 }, { "epoch": 0.11, "learning_rate": 0.0008940779971112181, "loss": 0.0877, "theoretical_loss": 4.0647338614613755, "tokens_seen": 379060224 }, { "epoch": 0.11, "learning_rate": 0.0008939977531696357, "loss": 0.0859, "theoretical_loss": 4.0644188807346495, "tokens_seen": 379322368 }, { "epoch": 0.12, "learning_rate": 0.0008939175092280532, "loss": 0.0903, "theoretical_loss": 4.064104178513186, "tokens_seen": 379584512 }, { "epoch": 0.12, "learning_rate": 0.0008938372652864709, "loss": 0.0884, "theoretical_loss": 4.0637897543586465, "tokens_seen": 379846656 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.0015821303240954876, "objective/train/docs_used": 144698, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6598097085952759, "objective/train/original_loss": 1.6598095893859863, "objective/train/theoretical_loss": 4.063475607833687, "objective/train/tokens_used": 400568800, "objective/train/value_avg": -0.007183074951171875, "objective/train/value_loss": 0.00021680242207366973, "objective/train/value_max": -0.0001881122589111328, "objective/train/value_min": -0.3359375, "objective/train/value_reward_corr": 0.5725408863624046, "objective/train/value_std": 0.00899505615234375, "objective/train/weight_avg": 1.0016818046569824, "objective/train/weighted_lm_loss": 1.662369728088379, "objective/train/weights_max": 1.1277165412902832, "objective/train/weights_min": 0.373598575592041, "theoretical_loss": 4.063475607833687, "tokens_seen": 380108800 }, { "epoch": 0.12, "learning_rate": 0.0008937570213448884, "loss": 0.0865, "theoretical_loss": 4.063475607833687, "tokens_seen": 380108800 }, { "epoch": 0.12, "learning_rate": 0.0008936767774033062, "loss": 0.089, "theoretical_loss": 4.063161738501951, "tokens_seen": 380370944 }, { "epoch": 0.12, "learning_rate": 0.0008935965334617237, "loss": 0.0886, "theoretical_loss": 4.0628481459280685, "tokens_seen": 380633088 }, { "epoch": 0.12, "learning_rate": 0.0008935162895201413, "loss": 0.0867, "theoretical_loss": 4.062534829677653, "tokens_seen": 380895232 }, { "epoch": 0.12, "learning_rate": 0.0008934360455785589, "loss": 0.0888, "theoretical_loss": 4.062221789317297, "tokens_seen": 381157376 }, { "epoch": 0.12, "learning_rate": 0.0008933558016369764, "loss": 0.087, "theoretical_loss": 4.061909024414572, "tokens_seen": 381419520 }, { "epoch": 0.12, "learning_rate": 0.000893275557695394, "loss": 0.0875, "theoretical_loss": 4.061596534538021, "tokens_seen": 381681664 }, { "epoch": 0.12, "learning_rate": 0.0008931953137538116, "loss": 0.0909, "theoretical_loss": 4.061284319257162, "tokens_seen": 381943808 }, { "epoch": 0.12, "learning_rate": 0.0008931150698122292, "loss": 0.0901, "theoretical_loss": 4.060972378142479, "tokens_seen": 382205952 }, { "epoch": 0.12, "learning_rate": 0.0008930348258706467, "loss": 0.0896, "theoretical_loss": 4.060660710765423, "tokens_seen": 382468096 }, { "epoch": 0.12, "learning_rate": 0.0008929545819290645, "loss": 0.0909, "theoretical_loss": 4.060349316698408, "tokens_seen": 382730240 }, { "epoch": 0.12, "learning_rate": 0.000892874337987482, "loss": 0.0874, "theoretical_loss": 4.060038195514808, "tokens_seen": 382992384 }, { "epoch": 0.12, "learning_rate": 0.0008927940940458995, "loss": 0.0866, "theoretical_loss": 4.059727346788955, "tokens_seen": 383254528 }, { "epoch": 0.12, "learning_rate": 0.0008927138501043172, "loss": 0.087, "theoretical_loss": 4.059416770096134, "tokens_seen": 383516672 }, { "epoch": 0.12, "learning_rate": 0.0008926336061627347, "loss": 0.0885, "theoretical_loss": 4.059106465012583, "tokens_seen": 383778816 }, { "epoch": 0.12, "learning_rate": 0.0008925533622211523, "loss": 0.0878, "theoretical_loss": 4.058796431115489, "tokens_seen": 384040960 }, { "epoch": 0.12, "learning_rate": 0.0008924731182795699, "loss": 0.0894, "theoretical_loss": 4.058486667982986, "tokens_seen": 384303104 }, { "epoch": 0.12, "learning_rate": 0.0008923928743379875, "loss": 0.0873, "theoretical_loss": 4.058177175194148, "tokens_seen": 384565248 }, { "epoch": 0.12, "learning_rate": 0.0008923126303964051, "loss": 0.0883, "theoretical_loss": 4.057867952328994, "tokens_seen": 384827392 }, { "epoch": 0.12, "learning_rate": 0.0008922323864548226, "loss": 0.0897, "theoretical_loss": 4.057558998968479, "tokens_seen": 385089536 }, { "epoch": 0.12, "learning_rate": 0.0008921521425132403, "loss": 0.0903, "theoretical_loss": 4.0572503146944925, "tokens_seen": 385351680 }, { "epoch": 0.12, "learning_rate": 0.0008920718985716579, "loss": 0.0884, "theoretical_loss": 4.056941899089858, "tokens_seen": 385613824 }, { "epoch": 0.12, "learning_rate": 0.0008919916546300755, "loss": 0.0881, "theoretical_loss": 4.056633751738328, "tokens_seen": 385875968 }, { "epoch": 0.12, "learning_rate": 0.000891911410688493, "loss": 0.0874, "theoretical_loss": 4.0563258722245825, "tokens_seen": 386138112 }, { "epoch": 0.12, "learning_rate": 0.0008918311667469107, "loss": 0.0868, "theoretical_loss": 4.056018260134226, "tokens_seen": 386400256 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.001992823090404272, "objective/train/docs_used": 146981, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.8857860565185547, "objective/train/original_loss": 1.8857860565185547, "objective/train/theoretical_loss": 4.055710915053783, "objective/train/tokens_used": 407122400, "objective/train/value_avg": -0.007904052734375, "objective/train/value_loss": 0.00022203872504178435, "objective/train/value_max": -0.00026726722717285156, "objective/train/value_min": -0.2406005859375, "objective/train/value_reward_corr": 0.5523747667992205, "objective/train/value_std": 0.01018524169921875, "objective/train/weight_avg": 1.0020960569381714, "objective/train/weighted_lm_loss": 1.8894469738006592, "objective/train/weights_max": 1.1491678953170776, "objective/train/weights_min": 0.3828728199005127, "theoretical_loss": 4.055710915053783, "tokens_seen": 386662400 }, { "epoch": 0.12, "learning_rate": 0.0008917509228053282, "loss": 0.088, "theoretical_loss": 4.055710915053783, "tokens_seen": 386662400 }, { "epoch": 0.12, "learning_rate": 0.0008916706788637457, "loss": 0.0869, "theoretical_loss": 4.055403836570701, "tokens_seen": 386924544 }, { "epoch": 0.12, "learning_rate": 0.0008915904349221634, "loss": 0.0911, "theoretical_loss": 4.05509702427334, "tokens_seen": 387186688 }, { "epoch": 0.12, "learning_rate": 0.0008915101909805809, "loss": 0.0864, "theoretical_loss": 4.054790477750974, "tokens_seen": 387448832 }, { "epoch": 0.12, "learning_rate": 0.0008914299470389986, "loss": 0.0875, "theoretical_loss": 4.054484196593791, "tokens_seen": 387710976 }, { "epoch": 0.12, "learning_rate": 0.0008913497030974162, "loss": 0.0888, "theoretical_loss": 4.054178180392885, "tokens_seen": 387973120 }, { "epoch": 0.12, "learning_rate": 0.0008912694591558338, "loss": 0.0832, "theoretical_loss": 4.053872428740256, "tokens_seen": 388235264 }, { "epoch": 0.12, "learning_rate": 0.0008911892152142514, "loss": 0.0889, "theoretical_loss": 4.053566941228809, "tokens_seen": 388497408 }, { "epoch": 0.12, "learning_rate": 0.0008911089712726689, "loss": 0.0889, "theoretical_loss": 4.053261717452346, "tokens_seen": 388759552 }, { "epoch": 0.12, "learning_rate": 0.0008910287273310865, "loss": 0.0861, "theoretical_loss": 4.052956757005573, "tokens_seen": 389021696 }, { "epoch": 0.12, "learning_rate": 0.0008909484833895041, "loss": 0.086, "theoretical_loss": 4.0526520594840845, "tokens_seen": 389283840 }, { "epoch": 0.12, "learning_rate": 0.0008908682394479217, "loss": 0.0902, "theoretical_loss": 4.052347624484373, "tokens_seen": 389545984 }, { "epoch": 0.12, "learning_rate": 0.0008907879955063392, "loss": 0.087, "theoretical_loss": 4.052043451603818, "tokens_seen": 389808128 }, { "epoch": 0.12, "learning_rate": 0.000890707751564757, "loss": 0.0893, "theoretical_loss": 4.051739540440688, "tokens_seen": 390070272 }, { "epoch": 0.12, "learning_rate": 0.0008906275076231745, "loss": 0.086, "theoretical_loss": 4.0514358905941386, "tokens_seen": 390332416 }, { "epoch": 0.12, "learning_rate": 0.0008905472636815921, "loss": 0.0877, "theoretical_loss": 4.051132501664204, "tokens_seen": 390594560 }, { "epoch": 0.12, "learning_rate": 0.0008904670197400097, "loss": 0.0895, "theoretical_loss": 4.050829373251803, "tokens_seen": 390856704 }, { "epoch": 0.12, "learning_rate": 0.0008903867757984272, "loss": 0.0898, "theoretical_loss": 4.050526504958727, "tokens_seen": 391118848 }, { "epoch": 0.12, "learning_rate": 0.0008903065318568448, "loss": 0.0846, "theoretical_loss": 4.050223896387647, "tokens_seen": 391380992 }, { "epoch": 0.12, "learning_rate": 0.0008902262879152624, "loss": 0.0873, "theoretical_loss": 4.0499215471421035, "tokens_seen": 391643136 }, { "epoch": 0.12, "learning_rate": 0.00089014604397368, "loss": 0.0884, "theoretical_loss": 4.049619456826508, "tokens_seen": 391905280 }, { "epoch": 0.12, "learning_rate": 0.0008900658000320975, "loss": 0.0869, "theoretical_loss": 4.0493176250461405, "tokens_seen": 392167424 }, { "epoch": 0.12, "learning_rate": 0.0008899855560905153, "loss": 0.0874, "theoretical_loss": 4.049016051407145, "tokens_seen": 392429568 }, { "epoch": 0.12, "learning_rate": 0.0008899053121489328, "loss": 0.0863, "theoretical_loss": 4.048714735516527, "tokens_seen": 392691712 }, { "epoch": 0.12, "learning_rate": 0.0008898250682073504, "loss": 0.0904, "theoretical_loss": 4.048413676982155, "tokens_seen": 392953856 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.0020599644631147385, "objective/train/docs_used": 149327, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7652794122695923, "objective/train/original_loss": 1.7652791738510132, "objective/train/theoretical_loss": 4.048112875412752, "objective/train/tokens_used": 413676000, "objective/train/value_avg": -0.0103302001953125, "objective/train/value_loss": 0.00044312665704637766, "objective/train/value_max": -0.00033283233642578125, "objective/train/value_min": -0.95361328125, "objective/train/value_reward_corr": 0.6481849685057344, "objective/train/value_std": 0.0183258056640625, "objective/train/weight_avg": 1.002259373664856, "objective/train/weighted_lm_loss": 1.7693637609481812, "objective/train/weights_max": 1.8321107625961304, "objective/train/weights_min": 0.36900386214256287, "theoretical_loss": 4.048112875412752, "tokens_seen": 393216000 }, { "epoch": 0.12, "learning_rate": 0.000889744824265768, "loss": 0.0873, "theoretical_loss": 4.048112875412752, "tokens_seen": 393216000 }, { "epoch": 0.12, "learning_rate": 0.0008896645803241855, "loss": 0.0889, "theoretical_loss": 4.0478123304179, "tokens_seen": 393478144 }, { "epoch": 0.12, "learning_rate": 0.0008895843363826032, "loss": 0.0903, "theoretical_loss": 4.047512041608029, "tokens_seen": 393740288 }, { "epoch": 0.12, "learning_rate": 0.0008895040924410207, "loss": 0.0887, "theoretical_loss": 4.047212008594424, "tokens_seen": 394002432 }, { "epoch": 0.12, "learning_rate": 0.0008894238484994383, "loss": 0.0872, "theoretical_loss": 4.046912230989214, "tokens_seen": 394264576 }, { "epoch": 0.12, "learning_rate": 0.0008893436045578559, "loss": 0.0895, "theoretical_loss": 4.0466127084053785, "tokens_seen": 394526720 }, { "epoch": 0.12, "learning_rate": 0.0008892633606162734, "loss": 0.0879, "theoretical_loss": 4.046313440456733, "tokens_seen": 394788864 }, { "epoch": 0.12, "learning_rate": 0.0008891831166746911, "loss": 0.0888, "theoretical_loss": 4.0460144267579405, "tokens_seen": 395051008 }, { "epoch": 0.12, "learning_rate": 0.0008891028727331087, "loss": 0.0898, "theoretical_loss": 4.045715666924499, "tokens_seen": 395313152 }, { "epoch": 0.12, "learning_rate": 0.0008890226287915263, "loss": 0.0879, "theoretical_loss": 4.045417160572743, "tokens_seen": 395575296 }, { "epoch": 0.12, "learning_rate": 0.0008889423848499438, "loss": 0.0899, "theoretical_loss": 4.045118907319839, "tokens_seen": 395837440 }, { "epoch": 0.12, "learning_rate": 0.0008888621409083615, "loss": 0.0865, "theoretical_loss": 4.04482090678379, "tokens_seen": 396099584 }, { "epoch": 0.12, "learning_rate": 0.000888781896966779, "loss": 0.0908, "theoretical_loss": 4.044523158583421, "tokens_seen": 396361728 }, { "epoch": 0.12, "learning_rate": 0.0008887016530251965, "loss": 0.0891, "theoretical_loss": 4.044225662338388, "tokens_seen": 396623872 }, { "epoch": 0.12, "learning_rate": 0.0008886214090836142, "loss": 0.0882, "theoretical_loss": 4.04392841766917, "tokens_seen": 396886016 }, { "epoch": 0.12, "learning_rate": 0.0008885411651420317, "loss": 0.0861, "theoretical_loss": 4.043631424197067, "tokens_seen": 397148160 }, { "epoch": 0.12, "learning_rate": 0.0008884609212004495, "loss": 0.0896, "theoretical_loss": 4.0433346815442, "tokens_seen": 397410304 }, { "epoch": 0.12, "learning_rate": 0.000888380677258867, "loss": 0.0912, "theoretical_loss": 4.043038189333508, "tokens_seen": 397672448 }, { "epoch": 0.12, "learning_rate": 0.0008883004333172846, "loss": 0.0917, "theoretical_loss": 4.042741947188741, "tokens_seen": 397934592 }, { "epoch": 0.12, "learning_rate": 0.0008882201893757022, "loss": 0.0874, "theoretical_loss": 4.042445954734466, "tokens_seen": 398196736 }, { "epoch": 0.12, "learning_rate": 0.0008881399454341197, "loss": 0.0925, "theoretical_loss": 4.0421502115960575, "tokens_seen": 398458880 }, { "epoch": 0.12, "learning_rate": 0.0008880597014925373, "loss": 0.0852, "theoretical_loss": 4.0418547173997, "tokens_seen": 398721024 }, { "epoch": 0.12, "learning_rate": 0.0008879794575509549, "loss": 0.0883, "theoretical_loss": 4.041559471772382, "tokens_seen": 398983168 }, { "epoch": 0.12, "learning_rate": 0.0008878992136093725, "loss": 0.0904, "theoretical_loss": 4.041264474341896, "tokens_seen": 399245312 }, { "epoch": 0.12, "learning_rate": 0.00088781896966779, "loss": 0.0888, "theoretical_loss": 4.040969724736838, "tokens_seen": 399507456 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.00207437202334404, "objective/train/docs_used": 151634, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6508839130401611, "objective/train/original_loss": 1.6508839130401611, "objective/train/theoretical_loss": 4.040675222586599, "objective/train/tokens_used": 420229600, "objective/train/value_avg": -0.00807952880859375, "objective/train/value_loss": 0.00020788678375538439, "objective/train/value_max": -0.00013136863708496094, "objective/train/value_min": -0.88134765625, "objective/train/value_reward_corr": 0.729374600933435, "objective/train/value_std": 0.015289306640625, "objective/train/weight_avg": 1.0021743774414062, "objective/train/weighted_lm_loss": 1.6536670923233032, "objective/train/weights_max": 1.6075432300567627, "objective/train/weights_min": 0.5353213548660278, "theoretical_loss": 4.040675222586599, "tokens_seen": 399769600 }, { "epoch": 0.12, "learning_rate": 0.0008877387257262078, "loss": 0.086, "theoretical_loss": 4.040675222586599, "tokens_seen": 399769600 }, { "epoch": 0.12, "learning_rate": 0.0008876584817846253, "loss": 0.0851, "theoretical_loss": 4.04038096752137, "tokens_seen": 400031744 }, { "epoch": 0.12, "learning_rate": 0.0008875782378430429, "loss": 0.0882, "theoretical_loss": 4.040086959172136, "tokens_seen": 400293888 }, { "epoch": 0.12, "learning_rate": 0.0008874979939014605, "loss": 0.0843, "theoretical_loss": 4.039793197170672, "tokens_seen": 400556032 }, { "epoch": 0.12, "learning_rate": 0.000887417749959878, "loss": 0.0894, "theoretical_loss": 4.039499681149547, "tokens_seen": 400818176 }, { "epoch": 0.12, "learning_rate": 0.0008873375060182957, "loss": 0.0886, "theoretical_loss": 4.039206410742114, "tokens_seen": 401080320 }, { "epoch": 0.12, "learning_rate": 0.0008872572620767132, "loss": 0.0864, "theoretical_loss": 4.038913385582515, "tokens_seen": 401342464 }, { "epoch": 0.12, "learning_rate": 0.0008871770181351308, "loss": 0.0867, "theoretical_loss": 4.038620605305673, "tokens_seen": 401604608 }, { "epoch": 0.12, "learning_rate": 0.0008870967741935484, "loss": 0.0843, "theoretical_loss": 4.038328069547293, "tokens_seen": 401866752 }, { "epoch": 0.12, "learning_rate": 0.000887016530251966, "loss": 0.0877, "theoretical_loss": 4.03803577794386, "tokens_seen": 402128896 }, { "epoch": 0.12, "learning_rate": 0.0008869362863103836, "loss": 0.0886, "theoretical_loss": 4.037743730132635, "tokens_seen": 402391040 }, { "epoch": 0.12, "learning_rate": 0.0008868560423688012, "loss": 0.085, "theoretical_loss": 4.037451925751654, "tokens_seen": 402653184 }, { "epoch": 0.12, "learning_rate": 0.0008867757984272188, "loss": 0.0875, "theoretical_loss": 4.0371603644397265, "tokens_seen": 402915328 }, { "epoch": 0.12, "learning_rate": 0.0008866955544856363, "loss": 0.0896, "theoretical_loss": 4.03686904583643, "tokens_seen": 403177472 }, { "epoch": 0.12, "learning_rate": 0.000886615310544054, "loss": 0.087, "theoretical_loss": 4.036577969582114, "tokens_seen": 403439616 }, { "epoch": 0.12, "learning_rate": 0.0008865350666024715, "loss": 0.0857, "theoretical_loss": 4.03628713531789, "tokens_seen": 403701760 }, { "epoch": 0.12, "learning_rate": 0.0008864548226608891, "loss": 0.089, "theoretical_loss": 4.035996542685638, "tokens_seen": 403963904 }, { "epoch": 0.12, "learning_rate": 0.0008863745787193067, "loss": 0.0859, "theoretical_loss": 4.0357061913279955, "tokens_seen": 404226048 }, { "epoch": 0.12, "learning_rate": 0.0008862943347777242, "loss": 0.0867, "theoretical_loss": 4.035416080888364, "tokens_seen": 404488192 }, { "epoch": 0.12, "learning_rate": 0.0008862140908361419, "loss": 0.0893, "theoretical_loss": 4.035126211010899, "tokens_seen": 404750336 }, { "epoch": 0.12, "learning_rate": 0.0008861338468945595, "loss": 0.0851, "theoretical_loss": 4.034836581340515, "tokens_seen": 405012480 }, { "epoch": 0.12, "learning_rate": 0.0008860536029529771, "loss": 0.0866, "theoretical_loss": 4.034547191522877, "tokens_seen": 405274624 }, { "epoch": 0.12, "learning_rate": 0.0008859733590113947, "loss": 0.0928, "theoretical_loss": 4.034258041204404, "tokens_seen": 405536768 }, { "epoch": 0.12, "learning_rate": 0.0008858931150698123, "loss": 0.0869, "theoretical_loss": 4.033969130032263, "tokens_seen": 405798912 }, { "epoch": 0.12, "learning_rate": 0.0008858128711282298, "loss": 0.0854, "theoretical_loss": 4.033680457654368, "tokens_seen": 406061056 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.001684014918282628, "objective/train/docs_used": 154148, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6580567359924316, "objective/train/original_loss": 1.6580567359924316, "objective/train/theoretical_loss": 4.0333920237193785, "objective/train/tokens_used": 426783200, "objective/train/value_avg": -0.00681304931640625, "objective/train/value_loss": 0.00015735704801045358, "objective/train/value_max": -0.0002269744873046875, "objective/train/value_min": -0.309326171875, "objective/train/value_reward_corr": 0.6036299621134384, "objective/train/value_std": 0.0090484619140625, "objective/train/weight_avg": 1.0017573833465576, "objective/train/weighted_lm_loss": 1.6613595485687256, "objective/train/weights_max": 1.1686903238296509, "objective/train/weights_min": 0.36919885873794556, "theoretical_loss": 4.0333920237193785, "tokens_seen": 406323200 }, { "epoch": 0.12, "learning_rate": 0.0008857326271866474, "loss": 0.0855, "theoretical_loss": 4.0333920237193785, "tokens_seen": 406323200 }, { "epoch": 0.12, "learning_rate": 0.000885652383245065, "loss": 0.0818, "theoretical_loss": 4.0331038278766975, "tokens_seen": 406585344 }, { "epoch": 0.12, "learning_rate": 0.0008855721393034825, "loss": 0.0891, "theoretical_loss": 4.032815869776471, "tokens_seen": 406847488 }, { "epoch": 0.12, "learning_rate": 0.0008854918953619003, "loss": 0.0883, "theoretical_loss": 4.032528149069579, "tokens_seen": 407109632 }, { "epoch": 0.12, "learning_rate": 0.0008854116514203178, "loss": 0.0888, "theoretical_loss": 4.0322406654076435, "tokens_seen": 407371776 }, { "epoch": 0.12, "learning_rate": 0.0008853314074787354, "loss": 0.0886, "theoretical_loss": 4.0319534184430195, "tokens_seen": 407633920 }, { "epoch": 0.12, "learning_rate": 0.000885251163537153, "loss": 0.0864, "theoretical_loss": 4.031666407828795, "tokens_seen": 407896064 }, { "epoch": 0.12, "learning_rate": 0.0008851709195955705, "loss": 0.0863, "theoretical_loss": 4.03137963321879, "tokens_seen": 408158208 }, { "epoch": 0.12, "learning_rate": 0.0008850906756539881, "loss": 0.0886, "theoretical_loss": 4.0310930942675505, "tokens_seen": 408420352 }, { "epoch": 0.12, "learning_rate": 0.0008850104317124057, "loss": 0.0875, "theoretical_loss": 4.030806790630353, "tokens_seen": 408682496 }, { "epoch": 0.12, "learning_rate": 0.0008849301877708233, "loss": 0.0851, "theoretical_loss": 4.030520721963199, "tokens_seen": 408944640 }, { "epoch": 0.12, "learning_rate": 0.0008848499438292408, "loss": 0.0853, "theoretical_loss": 4.030234887922808, "tokens_seen": 409206784 }, { "epoch": 0.12, "learning_rate": 0.0008847696998876586, "loss": 0.0874, "theoretical_loss": 4.029949288166627, "tokens_seen": 409468928 }, { "epoch": 0.12, "learning_rate": 0.0008846894559460761, "loss": 0.0822, "theoretical_loss": 4.0296639223528175, "tokens_seen": 409731072 }, { "epoch": 0.12, "learning_rate": 0.0008846092120044937, "loss": 0.0858, "theoretical_loss": 4.029378790140261, "tokens_seen": 409993216 }, { "epoch": 0.12, "learning_rate": 0.0008845289680629113, "loss": 0.0862, "theoretical_loss": 4.029093891188552, "tokens_seen": 410255360 }, { "epoch": 0.12, "learning_rate": 0.0008844487241213288, "loss": 0.0876, "theoretical_loss": 4.028809225158, "tokens_seen": 410517504 }, { "epoch": 0.12, "learning_rate": 0.0008843684801797465, "loss": 0.0856, "theoretical_loss": 4.028524791709621, "tokens_seen": 410779648 }, { "epoch": 0.12, "learning_rate": 0.000884288236238164, "loss": 0.0863, "theoretical_loss": 4.028240590505148, "tokens_seen": 411041792 }, { "epoch": 0.12, "learning_rate": 0.0008842079922965816, "loss": 0.0859, "theoretical_loss": 4.027956621207015, "tokens_seen": 411303936 }, { "epoch": 0.12, "learning_rate": 0.0008841277483549992, "loss": 0.0858, "theoretical_loss": 4.027672883478364, "tokens_seen": 411566080 }, { "epoch": 0.12, "learning_rate": 0.0008840475044134169, "loss": 0.0892, "theoretical_loss": 4.027389376983041, "tokens_seen": 411828224 }, { "epoch": 0.12, "learning_rate": 0.0008839672604718344, "loss": 0.0893, "theoretical_loss": 4.02710610138559, "tokens_seen": 412090368 }, { "epoch": 0.12, "learning_rate": 0.000883887016530252, "loss": 0.0863, "theoretical_loss": 4.02682305635126, "tokens_seen": 412352512 }, { "epoch": 0.13, "learning_rate": 0.0008838067725886696, "loss": 0.0875, "theoretical_loss": 4.026540241545994, "tokens_seen": 412614656 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.00014230117085389793, "objective/train/docs_used": 156351, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.629351019859314, "objective/train/original_loss": 1.6293511390686035, "objective/train/theoretical_loss": 4.026257656636431, "objective/train/tokens_used": 433336800, "objective/train/value_avg": -0.00789642333984375, "objective/train/value_loss": 0.0005366242257878184, "objective/train/value_max": -0.00022172927856445312, "objective/train/value_min": -0.9091796875, "objective/train/value_reward_corr": 0.6416809694837178, "objective/train/value_std": 0.017822265625, "objective/train/weight_avg": 1.0003734827041626, "objective/train/weighted_lm_loss": 1.628688931465149, "objective/train/weights_max": 1.8231868743896484, "objective/train/weights_min": 0.36822012066841125, "theoretical_loss": 4.026257656636431, "tokens_seen": 412876800 }, { "epoch": 0.13, "learning_rate": 0.0008837265286470871, "loss": 0.0856, "theoretical_loss": 4.026257656636431, "tokens_seen": 412876800 }, { "epoch": 0.13, "learning_rate": 0.0008836462847055048, "loss": 0.0869, "theoretical_loss": 4.025975301289906, "tokens_seen": 413138944 }, { "epoch": 0.13, "learning_rate": 0.0008835660407639223, "loss": 0.0864, "theoretical_loss": 4.025693175174443, "tokens_seen": 413401088 }, { "epoch": 0.13, "learning_rate": 0.00088348579682234, "loss": 0.0836, "theoretical_loss": 4.02541127795876, "tokens_seen": 413663232 }, { "epoch": 0.13, "learning_rate": 0.0008834055528807575, "loss": 0.0861, "theoretical_loss": 4.02512960931226, "tokens_seen": 413925376 }, { "epoch": 0.13, "learning_rate": 0.000883325308939175, "loss": 0.0831, "theoretical_loss": 4.024848168905035, "tokens_seen": 414187520 }, { "epoch": 0.13, "learning_rate": 0.0008832450649975928, "loss": 0.0864, "theoretical_loss": 4.02456695640786, "tokens_seen": 414449664 }, { "epoch": 0.13, "learning_rate": 0.0008831648210560103, "loss": 0.0855, "theoretical_loss": 4.0242859714921915, "tokens_seen": 414711808 }, { "epoch": 0.13, "learning_rate": 0.0008830845771144279, "loss": 0.0886, "theoretical_loss": 4.024005213830171, "tokens_seen": 414973952 }, { "epoch": 0.13, "learning_rate": 0.0008830043331728455, "loss": 0.0833, "theoretical_loss": 4.023724683094615, "tokens_seen": 415236096 }, { "epoch": 0.13, "learning_rate": 0.0008829240892312631, "loss": 0.0833, "theoretical_loss": 4.023444378959019, "tokens_seen": 415498240 }, { "epoch": 0.13, "learning_rate": 0.0008828438452896806, "loss": 0.085, "theoretical_loss": 4.023164301097555, "tokens_seen": 415760384 }, { "epoch": 0.13, "learning_rate": 0.0008827636013480982, "loss": 0.0854, "theoretical_loss": 4.0228844491850655, "tokens_seen": 416022528 }, { "epoch": 0.13, "learning_rate": 0.0008826833574065158, "loss": 0.0873, "theoretical_loss": 4.022604822897068, "tokens_seen": 416284672 }, { "epoch": 0.13, "learning_rate": 0.0008826031134649333, "loss": 0.0872, "theoretical_loss": 4.0223254219097475, "tokens_seen": 416546816 }, { "epoch": 0.13, "learning_rate": 0.0008825228695233511, "loss": 0.0875, "theoretical_loss": 4.022046245899958, "tokens_seen": 416808960 }, { "epoch": 0.13, "learning_rate": 0.0008824426255817686, "loss": 0.0856, "theoretical_loss": 4.021767294545221, "tokens_seen": 417071104 }, { "epoch": 0.13, "learning_rate": 0.0008823623816401862, "loss": 0.0872, "theoretical_loss": 4.021488567523721, "tokens_seen": 417333248 }, { "epoch": 0.13, "learning_rate": 0.0008822821376986038, "loss": 0.087, "theoretical_loss": 4.021210064514305, "tokens_seen": 417595392 }, { "epoch": 0.13, "learning_rate": 0.0008822018937570213, "loss": 0.0833, "theoretical_loss": 4.020931785196484, "tokens_seen": 417857536 }, { "epoch": 0.13, "learning_rate": 0.000882121649815439, "loss": 0.0849, "theoretical_loss": 4.020653729250424, "tokens_seen": 418119680 }, { "epoch": 0.13, "learning_rate": 0.0008820414058738565, "loss": 0.0847, "theoretical_loss": 4.020375896356951, "tokens_seen": 418381824 }, { "epoch": 0.13, "learning_rate": 0.0008819611619322741, "loss": 0.0905, "theoretical_loss": 4.0200982861975465, "tokens_seen": 418643968 }, { "epoch": 0.13, "learning_rate": 0.0008818809179906917, "loss": 0.0879, "theoretical_loss": 4.019820898454345, "tokens_seen": 418906112 }, { "epoch": 0.13, "learning_rate": 0.0008818006740491094, "loss": 0.0838, "theoretical_loss": 4.019543732810134, "tokens_seen": 419168256 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.0006775321089662611, "objective/train/docs_used": 158751, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7296289205551147, "objective/train/original_loss": 1.7296291589736938, "objective/train/theoretical_loss": 4.019266788948352, "objective/train/tokens_used": 439890400, "objective/train/value_avg": -0.00728607177734375, "objective/train/value_loss": 0.0003692489117383957, "objective/train/value_max": -0.0002613067626953125, "objective/train/value_min": -0.51171875, "objective/train/value_reward_corr": 0.557088638783727, "objective/train/value_std": 0.01107025146484375, "objective/train/weight_avg": 1.0008379220962524, "objective/train/weighted_lm_loss": 1.7313810586929321, "objective/train/weights_max": 1.540909767150879, "objective/train/weights_min": 0.373028963804245, "theoretical_loss": 4.019266788948352, "tokens_seen": 419430400 }, { "epoch": 0.13, "learning_rate": 0.0008817204301075269, "loss": 0.0871, "theoretical_loss": 4.019266788948352, "tokens_seen": 419430400 }, { "epoch": 0.13, "learning_rate": 0.0008816401861659445, "loss": 0.0896, "theoretical_loss": 4.0189900665530836, "tokens_seen": 419692544 }, { "epoch": 0.13, "learning_rate": 0.0008815599422243621, "loss": 0.0877, "theoretical_loss": 4.0187135653090635, "tokens_seen": 419954688 }, { "epoch": 0.13, "learning_rate": 0.0008814796982827796, "loss": 0.0827, "theoretical_loss": 4.018437284901671, "tokens_seen": 420216832 }, { "epoch": 0.13, "learning_rate": 0.0008813994543411973, "loss": 0.0896, "theoretical_loss": 4.018161225016926, "tokens_seen": 420478976 }, { "epoch": 0.13, "learning_rate": 0.0008813192103996148, "loss": 0.0875, "theoretical_loss": 4.0178853853414935, "tokens_seen": 420741120 }, { "epoch": 0.13, "learning_rate": 0.0008812389664580324, "loss": 0.087, "theoretical_loss": 4.017609765562678, "tokens_seen": 421003264 }, { "epoch": 0.13, "learning_rate": 0.00088115872251645, "loss": 0.0871, "theoretical_loss": 4.017334365368422, "tokens_seen": 421265408 }, { "epoch": 0.13, "learning_rate": 0.0008810784785748677, "loss": 0.0888, "theoretical_loss": 4.017059184447303, "tokens_seen": 421527552 }, { "epoch": 0.13, "learning_rate": 0.0008809982346332853, "loss": 0.0875, "theoretical_loss": 4.0167842224885355, "tokens_seen": 421789696 }, { "epoch": 0.13, "learning_rate": 0.0008809179906917028, "loss": 0.0827, "theoretical_loss": 4.016509479181968, "tokens_seen": 422051840 }, { "epoch": 0.13, "learning_rate": 0.0008808377467501204, "loss": 0.0861, "theoretical_loss": 4.016234954218078, "tokens_seen": 422313984 }, { "epoch": 0.13, "learning_rate": 0.000880757502808538, "loss": 0.0855, "theoretical_loss": 4.015960647287975, "tokens_seen": 422576128 }, { "epoch": 0.13, "learning_rate": 0.0008806772588669556, "loss": 0.0874, "theoretical_loss": 4.015686558083396, "tokens_seen": 422838272 }, { "epoch": 0.13, "learning_rate": 0.0008805970149253731, "loss": 0.0878, "theoretical_loss": 4.015412686296704, "tokens_seen": 423100416 }, { "epoch": 0.13, "learning_rate": 0.0008805167709837908, "loss": 0.0869, "theoretical_loss": 4.0151390316208895, "tokens_seen": 423362560 }, { "epoch": 0.13, "learning_rate": 0.0008804365270422083, "loss": 0.0872, "theoretical_loss": 4.014865593749563, "tokens_seen": 423624704 }, { "epoch": 0.13, "learning_rate": 0.0008803562831006258, "loss": 0.0852, "theoretical_loss": 4.014592372376958, "tokens_seen": 423886848 }, { "epoch": 0.13, "learning_rate": 0.0008802760391590436, "loss": 0.0871, "theoretical_loss": 4.014319367197928, "tokens_seen": 424148992 }, { "epoch": 0.13, "learning_rate": 0.0008801957952174611, "loss": 0.0905, "theoretical_loss": 4.014046577907946, "tokens_seen": 424411136 }, { "epoch": 0.13, "learning_rate": 0.0008801155512758787, "loss": 0.0886, "theoretical_loss": 4.013774004203099, "tokens_seen": 424673280 }, { "epoch": 0.13, "learning_rate": 0.0008800353073342963, "loss": 0.0873, "theoretical_loss": 4.013501645780092, "tokens_seen": 424935424 }, { "epoch": 0.13, "learning_rate": 0.0008799550633927139, "loss": 0.0835, "theoretical_loss": 4.013229502336242, "tokens_seen": 425197568 }, { "epoch": 0.13, "learning_rate": 0.0008798748194511314, "loss": 0.0863, "theoretical_loss": 4.0129575735694765, "tokens_seen": 425459712 }, { "epoch": 0.13, "learning_rate": 0.000879794575509549, "loss": 0.0891, "theoretical_loss": 4.012685859178337, "tokens_seen": 425721856 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.001993941143155098, "objective/train/docs_used": 161088, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6738786697387695, "objective/train/original_loss": 1.6738786697387695, "objective/train/theoretical_loss": 4.012414358861969, "objective/train/tokens_used": 446444000, "objective/train/value_avg": -0.0080413818359375, "objective/train/value_loss": 0.00036461447598412633, "objective/train/value_max": -0.00036263465881347656, "objective/train/value_min": -0.89501953125, "objective/train/value_reward_corr": 0.7342816416076462, "objective/train/value_std": 0.018157958984375, "objective/train/weight_avg": 1.0021560192108154, "objective/train/weighted_lm_loss": 1.676504373550415, "objective/train/weights_max": 1.6657140254974365, "objective/train/weights_min": 0.3717136085033417, "theoretical_loss": 4.012414358861969, "tokens_seen": 425984000 }, { "epoch": 0.13, "learning_rate": 0.0008797143315679666, "loss": 0.0842, "theoretical_loss": 4.012414358861969, "tokens_seen": 425984000 }, { "epoch": 0.13, "learning_rate": 0.0008796340876263842, "loss": 0.0868, "theoretical_loss": 4.01214307232013, "tokens_seen": 426246144 }, { "epoch": 0.13, "learning_rate": 0.0008795538436848019, "loss": 0.0863, "theoretical_loss": 4.011871999253178, "tokens_seen": 426508288 }, { "epoch": 0.13, "learning_rate": 0.0008794735997432194, "loss": 0.0883, "theoretical_loss": 4.011601139362078, "tokens_seen": 426770432 }, { "epoch": 0.13, "learning_rate": 0.0008793933558016371, "loss": 0.0852, "theoretical_loss": 4.011330492348397, "tokens_seen": 427032576 }, { "epoch": 0.13, "learning_rate": 0.0008793131118600546, "loss": 0.0891, "theoretical_loss": 4.0110600579143, "tokens_seen": 427294720 }, { "epoch": 0.13, "learning_rate": 0.0008792328679184721, "loss": 0.0866, "theoretical_loss": 4.010789835762555, "tokens_seen": 427556864 }, { "epoch": 0.13, "learning_rate": 0.0008791526239768898, "loss": 0.0888, "theoretical_loss": 4.010519825596525, "tokens_seen": 427819008 }, { "epoch": 0.13, "learning_rate": 0.0008790723800353073, "loss": 0.086, "theoretical_loss": 4.010250027120169, "tokens_seen": 428081152 }, { "epoch": 0.13, "learning_rate": 0.0008789921360937249, "loss": 0.0852, "theoretical_loss": 4.009980440038043, "tokens_seen": 428343296 }, { "epoch": 0.13, "learning_rate": 0.0008789118921521425, "loss": 0.0889, "theoretical_loss": 4.009711064055291, "tokens_seen": 428605440 }, { "epoch": 0.13, "learning_rate": 0.0008788316482105602, "loss": 0.0852, "theoretical_loss": 4.009441898877652, "tokens_seen": 428867584 }, { "epoch": 0.13, "learning_rate": 0.0008787514042689777, "loss": 0.0852, "theoretical_loss": 4.009172944211455, "tokens_seen": 429129728 }, { "epoch": 0.13, "learning_rate": 0.0008786711603273953, "loss": 0.087, "theoretical_loss": 4.008904199763615, "tokens_seen": 429391872 }, { "epoch": 0.13, "learning_rate": 0.0008785909163858129, "loss": 0.0851, "theoretical_loss": 4.008635665241635, "tokens_seen": 429654016 }, { "epoch": 0.13, "learning_rate": 0.0008785106724442304, "loss": 0.0883, "theoretical_loss": 4.008367340353602, "tokens_seen": 429916160 }, { "epoch": 0.13, "learning_rate": 0.0008784304285026481, "loss": 0.0863, "theoretical_loss": 4.008099224808188, "tokens_seen": 430178304 }, { "epoch": 0.13, "learning_rate": 0.0008783501845610656, "loss": 0.0837, "theoretical_loss": 4.007831318314645, "tokens_seen": 430440448 }, { "epoch": 0.13, "learning_rate": 0.0008782699406194833, "loss": 0.0846, "theoretical_loss": 4.00756362058281, "tokens_seen": 430702592 }, { "epoch": 0.13, "learning_rate": 0.0008781896966779008, "loss": 0.0839, "theoretical_loss": 4.007296131323094, "tokens_seen": 430964736 }, { "epoch": 0.13, "learning_rate": 0.0008781094527363185, "loss": 0.0866, "theoretical_loss": 4.007028850246487, "tokens_seen": 431226880 }, { "epoch": 0.13, "learning_rate": 0.0008780292087947361, "loss": 0.0861, "theoretical_loss": 4.006761777064557, "tokens_seen": 431489024 }, { "epoch": 0.13, "learning_rate": 0.0008779489648531536, "loss": 0.0854, "theoretical_loss": 4.006494911489444, "tokens_seen": 431751168 }, { "epoch": 0.13, "learning_rate": 0.0008778687209115712, "loss": 0.086, "theoretical_loss": 4.006228253233864, "tokens_seen": 432013312 }, { "epoch": 0.13, "learning_rate": 0.0008777884769699888, "loss": 0.088, "theoretical_loss": 4.0059618020111, "tokens_seen": 432275456 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.0015352979535236955, "objective/train/docs_used": 163281, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.734284520149231, "objective/train/original_loss": 1.7342844009399414, "objective/train/theoretical_loss": 4.00569555753501, "objective/train/tokens_used": 452997600, "objective/train/value_avg": -0.006992340087890625, "objective/train/value_loss": 0.00020126513845752925, "objective/train/value_max": -0.0002913475036621094, "objective/train/value_min": -0.54833984375, "objective/train/value_reward_corr": 0.5731349995282375, "objective/train/value_std": 0.01129913330078125, "objective/train/weight_avg": 1.0016310214996338, "objective/train/weighted_lm_loss": 1.7371350526809692, "objective/train/weights_max": 1.6854356527328491, "objective/train/weights_min": 0.3728724420070648, "theoretical_loss": 4.00569555753501, "tokens_seen": 432537600 }, { "epoch": 0.13, "learning_rate": 0.0008777082330284064, "loss": 0.0868, "theoretical_loss": 4.00569555753501, "tokens_seen": 432537600 }, { "epoch": 0.13, "learning_rate": 0.0008776279890868239, "loss": 0.0854, "theoretical_loss": 4.0054295195200185, "tokens_seen": 432799744 }, { "epoch": 0.13, "learning_rate": 0.0008775477451452416, "loss": 0.085, "theoretical_loss": 4.005163687681116, "tokens_seen": 433061888 }, { "epoch": 0.13, "learning_rate": 0.0008774675012036591, "loss": 0.085, "theoretical_loss": 4.00489806173386, "tokens_seen": 433324032 }, { "epoch": 0.13, "learning_rate": 0.0008773872572620766, "loss": 0.0889, "theoretical_loss": 4.004632641394372, "tokens_seen": 433586176 }, { "epoch": 0.13, "learning_rate": 0.0008773070133204944, "loss": 0.0868, "theoretical_loss": 4.0043674263793365, "tokens_seen": 433848320 }, { "epoch": 0.13, "learning_rate": 0.0008772267693789119, "loss": 0.0842, "theoretical_loss": 4.004102416405998, "tokens_seen": 434110464 }, { "epoch": 0.13, "learning_rate": 0.0008771465254373296, "loss": 0.0853, "theoretical_loss": 4.0038376111921625, "tokens_seen": 434372608 }, { "epoch": 0.13, "learning_rate": 0.0008770662814957471, "loss": 0.0872, "theoretical_loss": 4.0035730104561935, "tokens_seen": 434634752 }, { "epoch": 0.13, "learning_rate": 0.0008769860375541647, "loss": 0.0888, "theoretical_loss": 4.003308613917012, "tokens_seen": 434896896 }, { "epoch": 0.13, "learning_rate": 0.0008769057936125823, "loss": 0.0873, "theoretical_loss": 4.003044421294094, "tokens_seen": 435159040 }, { "epoch": 0.13, "learning_rate": 0.0008768255496709998, "loss": 0.0852, "theoretical_loss": 4.002780432307468, "tokens_seen": 435421184 }, { "epoch": 0.13, "learning_rate": 0.0008767453057294174, "loss": 0.0853, "theoretical_loss": 4.0025166466777184, "tokens_seen": 435683328 }, { "epoch": 0.13, "learning_rate": 0.000876665061787835, "loss": 0.0868, "theoretical_loss": 4.00225306412598, "tokens_seen": 435945472 }, { "epoch": 0.13, "learning_rate": 0.0008765848178462527, "loss": 0.0869, "theoretical_loss": 4.001989684373934, "tokens_seen": 436207616 }, { "epoch": 0.13, "learning_rate": 0.0008765045739046702, "loss": 0.0863, "theoretical_loss": 4.0017265071438155, "tokens_seen": 436469760 }, { "epoch": 0.13, "learning_rate": 0.0008764243299630879, "loss": 0.0863, "theoretical_loss": 4.001463532158402, "tokens_seen": 436731904 }, { "epoch": 0.13, "learning_rate": 0.0008763440860215054, "loss": 0.0864, "theoretical_loss": 4.001200759141019, "tokens_seen": 436994048 }, { "epoch": 0.13, "learning_rate": 0.0008762638420799229, "loss": 0.0857, "theoretical_loss": 4.000938187815535, "tokens_seen": 437256192 }, { "epoch": 0.13, "learning_rate": 0.0008761835981383406, "loss": 0.0852, "theoretical_loss": 4.000675817906362, "tokens_seen": 437518336 }, { "epoch": 0.13, "learning_rate": 0.0008761033541967581, "loss": 0.0852, "theoretical_loss": 4.000413649138453, "tokens_seen": 437780480 }, { "epoch": 0.13, "learning_rate": 0.0008760231102551757, "loss": 0.0871, "theoretical_loss": 4.000151681237301, "tokens_seen": 438042624 }, { "epoch": 0.13, "learning_rate": 0.0008759428663135933, "loss": 0.0851, "theoretical_loss": 3.9998899139289392, "tokens_seen": 438304768 }, { "epoch": 0.13, "learning_rate": 0.000875862622372011, "loss": 0.0855, "theoretical_loss": 3.999628346939934, "tokens_seen": 438566912 }, { "epoch": 0.13, "learning_rate": 0.0008757823784304286, "loss": 0.088, "theoretical_loss": 3.9993669799973928, "tokens_seen": 438829056 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.0022315175738185644, "objective/train/docs_used": 165818, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.725619912147522, "objective/train/original_loss": 1.725620150566101, "objective/train/theoretical_loss": 3.9991058128289536, "objective/train/tokens_used": 459551200, "objective/train/value_avg": -0.009033203125, "objective/train/value_loss": 0.0003572371497284621, "objective/train/value_max": -0.0001780986785888672, "objective/train/value_min": -0.8076171875, "objective/train/value_reward_corr": 0.7491131113466319, "objective/train/value_std": 0.020477294921875, "objective/train/weight_avg": 1.002398133277893, "objective/train/weighted_lm_loss": 1.7294703722000122, "objective/train/weights_max": 1.4387959241867065, "objective/train/weights_min": 0.41180431842803955, "theoretical_loss": 3.9991058128289536, "tokens_seen": 439091200 }, { "epoch": 0.13, "learning_rate": 0.0008757021344888461, "loss": 0.0852, "theoretical_loss": 3.9991058128289536, "tokens_seen": 439091200 }, { "epoch": 0.13, "learning_rate": 0.0008756218905472637, "loss": 0.0893, "theoretical_loss": 3.998844845162789, "tokens_seen": 439353344 }, { "epoch": 0.13, "learning_rate": 0.0008755416466056813, "loss": 0.0869, "theoretical_loss": 3.998584076727604, "tokens_seen": 439615488 }, { "epoch": 0.13, "learning_rate": 0.0008754614026640989, "loss": 0.0882, "theoretical_loss": 3.998323507252633, "tokens_seen": 439877632 }, { "epoch": 0.13, "learning_rate": 0.0008753811587225164, "loss": 0.0848, "theoretical_loss": 3.998063136467639, "tokens_seen": 440139776 }, { "epoch": 0.13, "learning_rate": 0.0008753009147809341, "loss": 0.0875, "theoretical_loss": 3.9978029641029154, "tokens_seen": 440401920 }, { "epoch": 0.13, "learning_rate": 0.0008752206708393516, "loss": 0.0858, "theoretical_loss": 3.9975429898892783, "tokens_seen": 440664064 }, { "epoch": 0.13, "learning_rate": 0.0008751404268977691, "loss": 0.0866, "theoretical_loss": 3.9972832135580707, "tokens_seen": 440926208 }, { "epoch": 0.13, "learning_rate": 0.0008750601829561869, "loss": 0.0863, "theoretical_loss": 3.9970236348411605, "tokens_seen": 441188352 }, { "epoch": 0.13, "learning_rate": 0.0008749799390146044, "loss": 0.0866, "theoretical_loss": 3.996764253470935, "tokens_seen": 441450496 }, { "epoch": 0.13, "learning_rate": 0.000874899695073022, "loss": 0.0821, "theoretical_loss": 3.996505069180306, "tokens_seen": 441712640 }, { "epoch": 0.13, "learning_rate": 0.0008748194511314396, "loss": 0.0869, "theoretical_loss": 3.9962460817027017, "tokens_seen": 441974784 }, { "epoch": 0.13, "learning_rate": 0.0008747392071898572, "loss": 0.0861, "theoretical_loss": 3.995987290772071, "tokens_seen": 442236928 }, { "epoch": 0.13, "learning_rate": 0.0008746589632482748, "loss": 0.0853, "theoretical_loss": 3.9957286961228786, "tokens_seen": 442499072 }, { "epoch": 0.13, "learning_rate": 0.0008745787193066924, "loss": 0.0835, "theoretical_loss": 3.995470297490106, "tokens_seen": 442761216 }, { "epoch": 0.13, "learning_rate": 0.0008744984753651099, "loss": 0.0836, "theoretical_loss": 3.995212094609249, "tokens_seen": 443023360 }, { "epoch": 0.13, "learning_rate": 0.0008744182314235275, "loss": 0.0854, "theoretical_loss": 3.994954087216315, "tokens_seen": 443285504 }, { "epoch": 0.13, "learning_rate": 0.0008743379874819452, "loss": 0.0881, "theoretical_loss": 3.994696275047825, "tokens_seen": 443547648 }, { "epoch": 0.13, "learning_rate": 0.0008742577435403627, "loss": 0.0857, "theoretical_loss": 3.9944386578408113, "tokens_seen": 443809792 }, { "epoch": 0.13, "learning_rate": 0.0008741774995987804, "loss": 0.0838, "theoretical_loss": 3.9941812353328126, "tokens_seen": 444071936 }, { "epoch": 0.13, "learning_rate": 0.0008740972556571979, "loss": 0.0846, "theoretical_loss": 3.993924007261878, "tokens_seen": 444334080 }, { "epoch": 0.13, "learning_rate": 0.0008740170117156155, "loss": 0.0878, "theoretical_loss": 3.9936669733665617, "tokens_seen": 444596224 }, { "epoch": 0.13, "learning_rate": 0.0008739367677740331, "loss": 0.0857, "theoretical_loss": 3.9934101333859253, "tokens_seen": 444858368 }, { "epoch": 0.13, "learning_rate": 0.0008738565238324506, "loss": 0.0878, "theoretical_loss": 3.9931534870595327, "tokens_seen": 445120512 }, { "epoch": 0.13, "learning_rate": 0.0008737762798908682, "loss": 0.0852, "theoretical_loss": 3.9928970341274517, "tokens_seen": 445382656 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.0004395976720843464, "objective/train/docs_used": 168097, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7950403690338135, "objective/train/original_loss": 1.795040249824524, "objective/train/theoretical_loss": 3.992640774330251, "objective/train/tokens_used": 466104800, "objective/train/value_avg": -0.00882720947265625, "objective/train/value_loss": 0.0003522172919474542, "objective/train/value_max": -0.0001823902130126953, "objective/train/value_min": -0.646484375, "objective/train/value_reward_corr": 0.7499370479136089, "objective/train/value_std": 0.0190887451171875, "objective/train/weight_avg": 1.00059974193573, "objective/train/weighted_lm_loss": 1.795857548713684, "objective/train/weights_max": 1.362145185470581, "objective/train/weights_min": 0.3820091187953949, "theoretical_loss": 3.992640774330251, "tokens_seen": 445644800 }, { "epoch": 0.14, "learning_rate": 0.0008736960359492858, "loss": 0.0857, "theoretical_loss": 3.992640774330251, "tokens_seen": 445644800 }, { "epoch": 0.14, "learning_rate": 0.0008736157920077035, "loss": 0.0836, "theoretical_loss": 3.9923847074090015, "tokens_seen": 445906944 }, { "epoch": 0.14, "learning_rate": 0.000873535548066121, "loss": 0.0831, "theoretical_loss": 3.9921288331052702, "tokens_seen": 446169088 }, { "epoch": 0.14, "learning_rate": 0.0008734553041245387, "loss": 0.0851, "theoretical_loss": 3.991873151161124, "tokens_seen": 446431232 }, { "epoch": 0.14, "learning_rate": 0.0008733750601829562, "loss": 0.0882, "theoretical_loss": 3.9916176613191263, "tokens_seen": 446693376 }, { "epoch": 0.14, "learning_rate": 0.0008732948162413738, "loss": 0.0857, "theoretical_loss": 3.9913623633223354, "tokens_seen": 446955520 }, { "epoch": 0.14, "learning_rate": 0.0008732145722997914, "loss": 0.0828, "theoretical_loss": 3.9911072569143036, "tokens_seen": 447217664 }, { "epoch": 0.14, "learning_rate": 0.0008731343283582089, "loss": 0.084, "theoretical_loss": 3.9908523418390764, "tokens_seen": 447479808 }, { "epoch": 0.14, "learning_rate": 0.0008730540844166266, "loss": 0.084, "theoretical_loss": 3.990597617841191, "tokens_seen": 447741952 }, { "epoch": 0.14, "learning_rate": 0.0008729738404750441, "loss": 0.0838, "theoretical_loss": 3.9903430846656742, "tokens_seen": 448004096 }, { "epoch": 0.14, "learning_rate": 0.0008728935965334618, "loss": 0.0868, "theoretical_loss": 3.990088742058043, "tokens_seen": 448266240 }, { "epoch": 0.14, "learning_rate": 0.0008728133525918794, "loss": 0.0831, "theoretical_loss": 3.9898345897643024, "tokens_seen": 448528384 }, { "epoch": 0.14, "learning_rate": 0.0008727331086502969, "loss": 0.0829, "theoretical_loss": 3.989580627530943, "tokens_seen": 448790528 }, { "epoch": 0.14, "learning_rate": 0.0008726528647087145, "loss": 0.0831, "theoretical_loss": 3.9893268551049417, "tokens_seen": 449052672 }, { "epoch": 0.14, "learning_rate": 0.0008725726207671321, "loss": 0.0845, "theoretical_loss": 3.9890732722337594, "tokens_seen": 449314816 }, { "epoch": 0.14, "learning_rate": 0.0008724923768255497, "loss": 0.0877, "theoretical_loss": 3.988819878665341, "tokens_seen": 449576960 }, { "epoch": 0.14, "learning_rate": 0.0008724121328839672, "loss": 0.0846, "theoretical_loss": 3.988566674148111, "tokens_seen": 449839104 }, { "epoch": 0.14, "learning_rate": 0.0008723318889423849, "loss": 0.0849, "theoretical_loss": 3.988313658430978, "tokens_seen": 450101248 }, { "epoch": 0.14, "learning_rate": 0.0008722516450008024, "loss": 0.0867, "theoretical_loss": 3.9880608312633274, "tokens_seen": 450363392 }, { "epoch": 0.14, "learning_rate": 0.0008721714010592199, "loss": 0.0871, "theoretical_loss": 3.9878081923950237, "tokens_seen": 450625536 }, { "epoch": 0.14, "learning_rate": 0.0008720911571176377, "loss": 0.0848, "theoretical_loss": 3.9875557415764087, "tokens_seen": 450887680 }, { "epoch": 0.14, "learning_rate": 0.0008720109131760552, "loss": 0.083, "theoretical_loss": 3.9873034785582995, "tokens_seen": 451149824 }, { "epoch": 0.14, "learning_rate": 0.0008719306692344729, "loss": 0.0824, "theoretical_loss": 3.9870514030919884, "tokens_seen": 451411968 }, { "epoch": 0.14, "learning_rate": 0.0008718504252928904, "loss": 0.0843, "theoretical_loss": 3.986799514929242, "tokens_seen": 451674112 }, { "epoch": 0.14, "learning_rate": 0.000871770181351308, "loss": 0.087, "theoretical_loss": 3.9865478138222974, "tokens_seen": 451936256 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.0014793577138334513, "objective/train/docs_used": 170598, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7309378385543823, "objective/train/original_loss": 1.7309377193450928, "objective/train/theoretical_loss": 3.9862962995238647, "objective/train/tokens_used": 472658400, "objective/train/value_avg": -0.00775146484375, "objective/train/value_loss": 0.0004800742899533361, "objective/train/value_max": -0.00023412704467773438, "objective/train/value_min": -0.78662109375, "objective/train/value_reward_corr": 0.5517175996070474, "objective/train/value_std": 0.0128021240234375, "objective/train/weight_avg": 1.0016835927963257, "objective/train/weighted_lm_loss": 1.7332364320755005, "objective/train/weights_max": 2.0833756923675537, "objective/train/weights_min": 0.368600070476532, "theoretical_loss": 3.9862962995238647, "tokens_seen": 452198400 }, { "epoch": 0.14, "learning_rate": 0.0008716899374097256, "loss": 0.0856, "theoretical_loss": 3.9862962995238647, "tokens_seen": 452198400 }, { "epoch": 0.14, "learning_rate": 0.0008716096934681432, "loss": 0.0879, "theoretical_loss": 3.9860449717871234, "tokens_seen": 452460544 }, { "epoch": 0.14, "learning_rate": 0.0008715294495265607, "loss": 0.0847, "theoretical_loss": 3.9857938303657217, "tokens_seen": 452722688 }, { "epoch": 0.14, "learning_rate": 0.0008714492055849783, "loss": 0.0879, "theoretical_loss": 3.9855428750137754, "tokens_seen": 452984832 }, { "epoch": 0.14, "learning_rate": 0.000871368961643396, "loss": 0.0884, "theoretical_loss": 3.9852921054858665, "tokens_seen": 453246976 }, { "epoch": 0.14, "learning_rate": 0.0008712887177018135, "loss": 0.0839, "theoretical_loss": 3.9850415215370445, "tokens_seen": 453509120 }, { "epoch": 0.14, "learning_rate": 0.0008712084737602312, "loss": 0.0814, "theoretical_loss": 3.984791122922821, "tokens_seen": 453771264 }, { "epoch": 0.14, "learning_rate": 0.0008711282298186487, "loss": 0.0855, "theoretical_loss": 3.98454090939917, "tokens_seen": 454033408 }, { "epoch": 0.14, "learning_rate": 0.0008710479858770663, "loss": 0.0857, "theoretical_loss": 3.984290880722531, "tokens_seen": 454295552 }, { "epoch": 0.14, "learning_rate": 0.0008709677419354839, "loss": 0.0831, "theoretical_loss": 3.9840410366498, "tokens_seen": 454557696 }, { "epoch": 0.14, "learning_rate": 0.0008708874979939014, "loss": 0.0864, "theoretical_loss": 3.983791376938336, "tokens_seen": 454819840 }, { "epoch": 0.14, "learning_rate": 0.0008708072540523191, "loss": 0.0828, "theoretical_loss": 3.983541901345955, "tokens_seen": 455081984 }, { "epoch": 0.14, "learning_rate": 0.0008707270101107366, "loss": 0.0868, "theoretical_loss": 3.983292609630931, "tokens_seen": 455344128 }, { "epoch": 0.14, "learning_rate": 0.0008706467661691543, "loss": 0.0872, "theoretical_loss": 3.9830435015519936, "tokens_seen": 455606272 }, { "epoch": 0.14, "learning_rate": 0.0008705665222275719, "loss": 0.0855, "theoretical_loss": 3.982794576868328, "tokens_seen": 455868416 }, { "epoch": 0.14, "learning_rate": 0.0008704862782859895, "loss": 0.0842, "theoretical_loss": 3.982545835339573, "tokens_seen": 456130560 }, { "epoch": 0.14, "learning_rate": 0.000870406034344407, "loss": 0.0845, "theoretical_loss": 3.982297276725822, "tokens_seen": 456392704 }, { "epoch": 0.14, "learning_rate": 0.0008703257904028246, "loss": 0.0834, "theoretical_loss": 3.9820489007876176, "tokens_seen": 456654848 }, { "epoch": 0.14, "learning_rate": 0.0008702455464612422, "loss": 0.0808, "theoretical_loss": 3.981800707285955, "tokens_seen": 456916992 }, { "epoch": 0.14, "learning_rate": 0.0008701653025196597, "loss": 0.085, "theoretical_loss": 3.9815526959822787, "tokens_seen": 457179136 }, { "epoch": 0.14, "learning_rate": 0.0008700850585780774, "loss": 0.0831, "theoretical_loss": 3.981304866638481, "tokens_seen": 457441280 }, { "epoch": 0.14, "learning_rate": 0.0008700048146364949, "loss": 0.087, "theoretical_loss": 3.9810572190169027, "tokens_seen": 457703424 }, { "epoch": 0.14, "learning_rate": 0.0008699245706949126, "loss": 0.0862, "theoretical_loss": 3.9808097528803295, "tokens_seen": 457965568 }, { "epoch": 0.14, "learning_rate": 0.0008698443267533302, "loss": 0.0858, "theoretical_loss": 3.9805624679919935, "tokens_seen": 458227712 }, { "epoch": 0.14, "learning_rate": 0.0008697640828117477, "loss": 0.0873, "theoretical_loss": 3.98031536411557, "tokens_seen": 458489856 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.0019264193251729012, "objective/train/docs_used": 173009, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6288082599639893, "objective/train/original_loss": 1.6288082599639893, "objective/train/theoretical_loss": 3.9800684410151783, "objective/train/tokens_used": 479212000, "objective/train/value_avg": -0.007114410400390625, "objective/train/value_loss": 0.0001740693987812847, "objective/train/value_max": -0.0002002716064453125, "objective/train/value_min": -0.42919921875, "objective/train/value_reward_corr": 0.5126065077385661, "objective/train/value_std": 0.00885772705078125, "objective/train/weight_avg": 1.0020055770874023, "objective/train/weighted_lm_loss": 1.6328537464141846, "objective/train/weights_max": 1.4530192613601685, "objective/train/weights_min": 0.3697379231452942, "theoretical_loss": 3.9800684410151783, "tokens_seen": 458752000 }, { "epoch": 0.14, "learning_rate": 0.0008696838388701653, "loss": 0.0843, "theoretical_loss": 3.9800684410151783, "tokens_seen": 458752000 }, { "epoch": 0.14, "learning_rate": 0.0008696035949285829, "loss": 0.082, "theoretical_loss": 3.979821698455379, "tokens_seen": 459014144 }, { "epoch": 0.14, "learning_rate": 0.0008695233509870005, "loss": 0.0855, "theoretical_loss": 3.9795751362011735, "tokens_seen": 459276288 }, { "epoch": 0.14, "learning_rate": 0.0008694431070454181, "loss": 0.0845, "theoretical_loss": 3.979328754018004, "tokens_seen": 459538432 }, { "epoch": 0.14, "learning_rate": 0.0008693628631038357, "loss": 0.0844, "theoretical_loss": 3.979082551671749, "tokens_seen": 459800576 }, { "epoch": 0.14, "learning_rate": 0.0008692826191622532, "loss": 0.0871, "theoretical_loss": 3.9788365289287286, "tokens_seen": 460062720 }, { "epoch": 0.14, "learning_rate": 0.0008692023752206708, "loss": 0.0873, "theoretical_loss": 3.9785906855556945, "tokens_seen": 460324864 }, { "epoch": 0.14, "learning_rate": 0.0008691221312790885, "loss": 0.0841, "theoretical_loss": 3.9783450213198384, "tokens_seen": 460587008 }, { "epoch": 0.14, "learning_rate": 0.000869041887337506, "loss": 0.0832, "theoretical_loss": 3.9780995359887843, "tokens_seen": 460849152 }, { "epoch": 0.14, "learning_rate": 0.0008689616433959237, "loss": 0.0863, "theoretical_loss": 3.9778542293305894, "tokens_seen": 461111296 }, { "epoch": 0.14, "learning_rate": 0.0008688813994543412, "loss": 0.086, "theoretical_loss": 3.977609101113744, "tokens_seen": 461373440 }, { "epoch": 0.14, "learning_rate": 0.0008688011555127588, "loss": 0.0805, "theoretical_loss": 3.97736415110717, "tokens_seen": 461635584 }, { "epoch": 0.14, "learning_rate": 0.0008687209115711764, "loss": 0.0899, "theoretical_loss": 3.977119379080218, "tokens_seen": 461897728 }, { "epoch": 0.14, "learning_rate": 0.000868640667629594, "loss": 0.088, "theoretical_loss": 3.9768747848026695, "tokens_seen": 462159872 }, { "epoch": 0.14, "learning_rate": 0.0008685604236880115, "loss": 0.0832, "theoretical_loss": 3.9766303680447335, "tokens_seen": 462422016 }, { "epoch": 0.14, "learning_rate": 0.0008684801797464291, "loss": 0.083, "theoretical_loss": 3.9763861285770457, "tokens_seen": 462684160 }, { "epoch": 0.14, "learning_rate": 0.0008683999358048468, "loss": 0.0856, "theoretical_loss": 3.9761420661706683, "tokens_seen": 462946304 }, { "epoch": 0.14, "learning_rate": 0.0008683196918632643, "loss": 0.0836, "theoretical_loss": 3.975898180597089, "tokens_seen": 463208448 }, { "epoch": 0.14, "learning_rate": 0.000868239447921682, "loss": 0.0852, "theoretical_loss": 3.9756544716282187, "tokens_seen": 463470592 }, { "epoch": 0.14, "learning_rate": 0.0008681592039800995, "loss": 0.0834, "theoretical_loss": 3.975410939036392, "tokens_seen": 463732736 }, { "epoch": 0.14, "learning_rate": 0.0008680789600385172, "loss": 0.0849, "theoretical_loss": 3.9751675825943638, "tokens_seen": 463994880 }, { "epoch": 0.14, "learning_rate": 0.0008679987160969347, "loss": 0.0891, "theoretical_loss": 3.974924402075313, "tokens_seen": 464257024 }, { "epoch": 0.14, "learning_rate": 0.0008679184721553522, "loss": 0.0858, "theoretical_loss": 3.9746813972528354, "tokens_seen": 464519168 }, { "epoch": 0.14, "learning_rate": 0.0008678382282137699, "loss": 0.0871, "theoretical_loss": 3.9744385679009486, "tokens_seen": 464781312 }, { "epoch": 0.14, "learning_rate": 0.0008677579842721874, "loss": 0.0829, "theoretical_loss": 3.9741959137940848, "tokens_seen": 465043456 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.0013637046795338392, "objective/train/docs_used": 175471, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.55617356300354, "objective/train/original_loss": 1.55617356300354, "objective/train/theoretical_loss": 3.973953434707096, "objective/train/tokens_used": 485765600, "objective/train/value_avg": -0.00836944580078125, "objective/train/value_loss": 0.00027403823332861066, "objective/train/value_max": -0.00031256675720214844, "objective/train/value_min": -0.7216796875, "objective/train/value_reward_corr": 0.5759118889260557, "objective/train/value_std": 0.01276397705078125, "objective/train/weight_avg": 1.0014877319335938, "objective/train/weighted_lm_loss": 1.5572477579116821, "objective/train/weights_max": 1.3698445558547974, "objective/train/weights_min": 0.3708581030368805, "theoretical_loss": 3.973953434707096, "tokens_seen": 465305600 }, { "epoch": 0.14, "learning_rate": 0.0008676777403306051, "loss": 0.0824, "theoretical_loss": 3.973953434707096, "tokens_seen": 465305600 }, { "epoch": 0.14, "learning_rate": 0.0008675974963890227, "loss": 0.0837, "theoretical_loss": 3.973711130415248, "tokens_seen": 465567744 }, { "epoch": 0.14, "learning_rate": 0.0008675172524474403, "loss": 0.0852, "theoretical_loss": 3.973469000694223, "tokens_seen": 465829888 }, { "epoch": 0.14, "learning_rate": 0.0008674370085058578, "loss": 0.0837, "theoretical_loss": 3.973227045320117, "tokens_seen": 466092032 }, { "epoch": 0.14, "learning_rate": 0.0008673567645642754, "loss": 0.085, "theoretical_loss": 3.9729852640694383, "tokens_seen": 466354176 }, { "epoch": 0.14, "learning_rate": 0.000867276520622693, "loss": 0.0853, "theoretical_loss": 3.972743656719107, "tokens_seen": 466616320 }, { "epoch": 0.14, "learning_rate": 0.0008671962766811105, "loss": 0.0815, "theoretical_loss": 3.9725022230464537, "tokens_seen": 466878464 }, { "epoch": 0.14, "learning_rate": 0.0008671160327395282, "loss": 0.0844, "theoretical_loss": 3.9722609628292207, "tokens_seen": 467140608 }, { "epoch": 0.14, "learning_rate": 0.0008670357887979457, "loss": 0.0838, "theoretical_loss": 3.9720198758455574, "tokens_seen": 467402752 }, { "epoch": 0.14, "learning_rate": 0.0008669555448563635, "loss": 0.085, "theoretical_loss": 3.9717789618740227, "tokens_seen": 467664896 }, { "epoch": 0.14, "learning_rate": 0.000866875300914781, "loss": 0.0861, "theoretical_loss": 3.9715382206935814, "tokens_seen": 467927040 }, { "epoch": 0.14, "learning_rate": 0.0008667950569731985, "loss": 0.0818, "theoretical_loss": 3.9712976520836043, "tokens_seen": 468189184 }, { "epoch": 0.14, "learning_rate": 0.0008667148130316162, "loss": 0.0838, "theoretical_loss": 3.971057255823868, "tokens_seen": 468451328 }, { "epoch": 0.14, "learning_rate": 0.0008666345690900337, "loss": 0.0856, "theoretical_loss": 3.9708170316945526, "tokens_seen": 468713472 }, { "epoch": 0.14, "learning_rate": 0.0008665543251484513, "loss": 0.0849, "theoretical_loss": 3.9705769794762418, "tokens_seen": 468975616 }, { "epoch": 0.14, "learning_rate": 0.0008664740812068689, "loss": 0.0862, "theoretical_loss": 3.97033709894992, "tokens_seen": 469237760 }, { "epoch": 0.14, "learning_rate": 0.0008663938372652865, "loss": 0.0846, "theoretical_loss": 3.970097389896975, "tokens_seen": 469499904 }, { "epoch": 0.14, "learning_rate": 0.000866313593323704, "loss": 0.0851, "theoretical_loss": 3.9698578520991936, "tokens_seen": 469762048 }, { "epoch": 0.14, "learning_rate": 0.0008662333493821216, "loss": 0.087, "theoretical_loss": 3.9696184853387617, "tokens_seen": 470024192 }, { "epoch": 0.14, "learning_rate": 0.0008661531054405393, "loss": 0.0832, "theoretical_loss": 3.9693792893982636, "tokens_seen": 470286336 }, { "epoch": 0.14, "learning_rate": 0.0008660728614989568, "loss": 0.0855, "theoretical_loss": 3.969140264060681, "tokens_seen": 470548480 }, { "epoch": 0.14, "learning_rate": 0.0008659926175573745, "loss": 0.0841, "theoretical_loss": 3.9689014091093933, "tokens_seen": 470810624 }, { "epoch": 0.14, "learning_rate": 0.000865912373615792, "loss": 0.0841, "theoretical_loss": 3.9686627243281727, "tokens_seen": 471072768 }, { "epoch": 0.14, "learning_rate": 0.0008658321296742096, "loss": 0.0876, "theoretical_loss": 3.9684242095011886, "tokens_seen": 471334912 }, { "epoch": 0.14, "learning_rate": 0.0008657518857326272, "loss": 0.0839, "theoretical_loss": 3.9681858644130017, "tokens_seen": 471597056 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.0014818408526480198, "objective/train/docs_used": 177948, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7183817625045776, "objective/train/original_loss": 1.718381643295288, "objective/train/theoretical_loss": 3.967947688848568, "objective/train/tokens_used": 492319200, "objective/train/value_avg": -0.01105499267578125, "objective/train/value_loss": 0.00045233668060973287, "objective/train/value_max": -0.00037550926208496094, "objective/train/value_min": -0.8310546875, "objective/train/value_reward_corr": 0.7132991610895623, "objective/train/value_std": 0.0223541259765625, "objective/train/weight_avg": 1.001685619354248, "objective/train/weighted_lm_loss": 1.7202060222625732, "objective/train/weights_max": 1.5177611112594604, "objective/train/weights_min": 0.37088075280189514, "theoretical_loss": 3.967947688848568, "tokens_seen": 471859200 }, { "epoch": 0.14, "learning_rate": 0.0008656716417910447, "loss": 0.0866, "theoretical_loss": 3.967947688848568, "tokens_seen": 471859200 }, { "epoch": 0.14, "learning_rate": 0.0008655913978494624, "loss": 0.0803, "theoretical_loss": 3.9677096825932328, "tokens_seen": 472121344 }, { "epoch": 0.14, "learning_rate": 0.0008655111539078799, "loss": 0.0837, "theoretical_loss": 3.9674718454327325, "tokens_seen": 472383488 }, { "epoch": 0.14, "learning_rate": 0.0008654309099662976, "loss": 0.0858, "theoretical_loss": 3.9672341771531956, "tokens_seen": 472645632 }, { "epoch": 0.14, "learning_rate": 0.0008653506660247152, "loss": 0.0856, "theoretical_loss": 3.9669966775411365, "tokens_seen": 472907776 }, { "epoch": 0.14, "learning_rate": 0.0008652704220831328, "loss": 0.0837, "theoretical_loss": 3.96675934638346, "tokens_seen": 473169920 }, { "epoch": 0.14, "learning_rate": 0.0008651901781415503, "loss": 0.0859, "theoretical_loss": 3.9665221834674558, "tokens_seen": 473432064 }, { "epoch": 0.14, "learning_rate": 0.000865109934199968, "loss": 0.0837, "theoretical_loss": 3.9662851885808026, "tokens_seen": 473694208 }, { "epoch": 0.14, "learning_rate": 0.0008650296902583855, "loss": 0.0843, "theoretical_loss": 3.966048361511562, "tokens_seen": 473956352 }, { "epoch": 0.14, "learning_rate": 0.000864949446316803, "loss": 0.0825, "theoretical_loss": 3.96581170204818, "tokens_seen": 474218496 }, { "epoch": 0.14, "learning_rate": 0.0008648692023752207, "loss": 0.0867, "theoretical_loss": 3.9655752099794874, "tokens_seen": 474480640 }, { "epoch": 0.14, "learning_rate": 0.0008647889584336382, "loss": 0.0885, "theoretical_loss": 3.9653388850946976, "tokens_seen": 474742784 }, { "epoch": 0.14, "learning_rate": 0.0008647087144920559, "loss": 0.0836, "theoretical_loss": 3.965102727183404, "tokens_seen": 475004928 }, { "epoch": 0.14, "learning_rate": 0.0008646284705504735, "loss": 0.0829, "theoretical_loss": 3.9648667360355816, "tokens_seen": 475267072 }, { "epoch": 0.14, "learning_rate": 0.0008645482266088911, "loss": 0.0863, "theoretical_loss": 3.9646309114415863, "tokens_seen": 475529216 }, { "epoch": 0.14, "learning_rate": 0.0008644679826673087, "loss": 0.0834, "theoretical_loss": 3.9643952531921505, "tokens_seen": 475791360 }, { "epoch": 0.14, "learning_rate": 0.0008643877387257262, "loss": 0.0814, "theoretical_loss": 3.9641597610783874, "tokens_seen": 476053504 }, { "epoch": 0.14, "learning_rate": 0.0008643074947841438, "loss": 0.0845, "theoretical_loss": 3.9639244348917853, "tokens_seen": 476315648 }, { "epoch": 0.14, "learning_rate": 0.0008642272508425614, "loss": 0.0821, "theoretical_loss": 3.96368927442421, "tokens_seen": 476577792 }, { "epoch": 0.14, "learning_rate": 0.000864147006900979, "loss": 0.0851, "theoretical_loss": 3.9634542794679013, "tokens_seen": 476839936 }, { "epoch": 0.14, "learning_rate": 0.0008640667629593965, "loss": 0.0828, "theoretical_loss": 3.963219449815475, "tokens_seen": 477102080 }, { "epoch": 0.14, "learning_rate": 0.0008639865190178143, "loss": 0.0805, "theoretical_loss": 3.9629847852599207, "tokens_seen": 477364224 }, { "epoch": 0.14, "learning_rate": 0.0008639062750762318, "loss": 0.0868, "theoretical_loss": 3.9627502855945984, "tokens_seen": 477626368 }, { "epoch": 0.14, "learning_rate": 0.0008638260311346493, "loss": 0.0836, "theoretical_loss": 3.962515950613242, "tokens_seen": 477888512 }, { "epoch": 0.14, "learning_rate": 0.000863745787193067, "loss": 0.0858, "theoretical_loss": 3.962281780109957, "tokens_seen": 478150656 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.0006618535844609141, "objective/train/docs_used": 180329, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7160143852233887, "objective/train/original_loss": 1.7160143852233887, "objective/train/theoretical_loss": 3.9620477738792164, "objective/train/tokens_used": 498872800, "objective/train/value_avg": -0.006938934326171875, "objective/train/value_loss": 0.0002311340213054791, "objective/train/value_max": -0.00033926963806152344, "objective/train/value_min": -0.716796875, "objective/train/value_reward_corr": 0.5828097197150116, "objective/train/value_std": 0.010406494140625, "objective/train/weight_avg": 1.0007702112197876, "objective/train/weighted_lm_loss": 1.7172032594680786, "objective/train/weights_max": 1.2420917749404907, "objective/train/weights_min": 0.37445467710494995, "theoretical_loss": 3.9620477738792164, "tokens_seen": 478412800 }, { "epoch": 0.14, "learning_rate": 0.0008636655432514845, "loss": 0.0838, "theoretical_loss": 3.9620477738792164, "tokens_seen": 478412800 }, { "epoch": 0.15, "learning_rate": 0.0008635852993099021, "loss": 0.085, "theoretical_loss": 3.9618139317158647, "tokens_seen": 478674944 }, { "epoch": 0.15, "learning_rate": 0.0008635050553683197, "loss": 0.084, "theoretical_loss": 3.961580253415114, "tokens_seen": 478937088 }, { "epoch": 0.15, "learning_rate": 0.0008634248114267373, "loss": 0.084, "theoretical_loss": 3.9613467387725434, "tokens_seen": 479199232 }, { "epoch": 0.15, "learning_rate": 0.0008633445674851548, "loss": 0.0804, "theoretical_loss": 3.9611133875841, "tokens_seen": 479461376 }, { "epoch": 0.15, "learning_rate": 0.0008632643235435724, "loss": 0.085, "theoretical_loss": 3.960880199646096, "tokens_seen": 479723520 }, { "epoch": 0.15, "learning_rate": 0.0008631840796019901, "loss": 0.0857, "theoretical_loss": 3.9606471747552083, "tokens_seen": 479985664 }, { "epoch": 0.15, "learning_rate": 0.0008631038356604077, "loss": 0.0822, "theoretical_loss": 3.9604143127084774, "tokens_seen": 480247808 }, { "epoch": 0.15, "learning_rate": 0.0008630235917188253, "loss": 0.0845, "theoretical_loss": 3.960181613303309, "tokens_seen": 480509952 }, { "epoch": 0.15, "learning_rate": 0.0008629433477772428, "loss": 0.0871, "theoretical_loss": 3.9599490763374687, "tokens_seen": 480772096 }, { "epoch": 0.15, "learning_rate": 0.0008628631038356605, "loss": 0.0853, "theoretical_loss": 3.9597167016090866, "tokens_seen": 481034240 }, { "epoch": 0.15, "learning_rate": 0.000862782859894078, "loss": 0.0825, "theoretical_loss": 3.95948448891665, "tokens_seen": 481296384 }, { "epoch": 0.15, "learning_rate": 0.0008627026159524955, "loss": 0.0835, "theoretical_loss": 3.959252438059009, "tokens_seen": 481558528 }, { "epoch": 0.15, "learning_rate": 0.0008626223720109132, "loss": 0.0839, "theoretical_loss": 3.9590205488353707, "tokens_seen": 481820672 }, { "epoch": 0.15, "learning_rate": 0.0008625421280693307, "loss": 0.0864, "theoretical_loss": 3.958788821045302, "tokens_seen": 482082816 }, { "epoch": 0.15, "learning_rate": 0.0008624618841277484, "loss": 0.0845, "theoretical_loss": 3.958557254488727, "tokens_seen": 482344960 }, { "epoch": 0.15, "learning_rate": 0.000862381640186166, "loss": 0.0836, "theoretical_loss": 3.958325848965925, "tokens_seen": 482607104 }, { "epoch": 0.15, "learning_rate": 0.0008623013962445836, "loss": 0.0829, "theoretical_loss": 3.958094604277532, "tokens_seen": 482869248 }, { "epoch": 0.15, "learning_rate": 0.0008622211523030011, "loss": 0.086, "theoretical_loss": 3.9578635202245387, "tokens_seen": 483131392 }, { "epoch": 0.15, "learning_rate": 0.0008621409083614188, "loss": 0.0803, "theoretical_loss": 3.95763259660829, "tokens_seen": 483393536 }, { "epoch": 0.15, "learning_rate": 0.0008620606644198363, "loss": 0.0825, "theoretical_loss": 3.957401833230484, "tokens_seen": 483655680 }, { "epoch": 0.15, "learning_rate": 0.0008619804204782538, "loss": 0.0829, "theoretical_loss": 3.957171229893171, "tokens_seen": 483917824 }, { "epoch": 0.15, "learning_rate": 0.0008619001765366715, "loss": 0.0842, "theoretical_loss": 3.956940786398753, "tokens_seen": 484179968 }, { "epoch": 0.15, "learning_rate": 0.000861819932595089, "loss": 0.0852, "theoretical_loss": 3.9567105025499827, "tokens_seen": 484442112 }, { "epoch": 0.15, "learning_rate": 0.0008617396886535068, "loss": 0.0835, "theoretical_loss": 3.9564803781499633, "tokens_seen": 484704256 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.0013643309939652681, "objective/train/docs_used": 182723, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5896873474121094, "objective/train/original_loss": 1.5896873474121094, "objective/train/theoretical_loss": 3.956250413002146, "objective/train/tokens_used": 505426400, "objective/train/value_avg": -0.00983428955078125, "objective/train/value_loss": 0.00046001793816685677, "objective/train/value_max": -0.0002512931823730469, "objective/train/value_min": -0.94873046875, "objective/train/value_reward_corr": 0.692742976797035, "objective/train/value_std": 0.0214996337890625, "objective/train/weight_avg": 1.0015811920166016, "objective/train/weighted_lm_loss": 1.592099905014038, "objective/train/weights_max": 2.4389986991882324, "objective/train/weights_min": 0.37612104415893555, "theoretical_loss": 3.956250413002146, "tokens_seen": 484966400 }, { "epoch": 0.15, "learning_rate": 0.0008616594447119243, "loss": 0.084, "theoretical_loss": 3.956250413002146, "tokens_seen": 484966400 }, { "epoch": 0.15, "learning_rate": 0.0008615792007703419, "loss": 0.0847, "theoretical_loss": 3.9560206069103314, "tokens_seen": 485228544 }, { "epoch": 0.15, "learning_rate": 0.0008614989568287595, "loss": 0.0855, "theoretical_loss": 3.9557909596786676, "tokens_seen": 485490688 }, { "epoch": 0.15, "learning_rate": 0.000861418712887177, "loss": 0.0836, "theoretical_loss": 3.9555614711116487, "tokens_seen": 485752832 }, { "epoch": 0.15, "learning_rate": 0.0008613384689455946, "loss": 0.0837, "theoretical_loss": 3.9553321410141162, "tokens_seen": 486014976 }, { "epoch": 0.15, "learning_rate": 0.0008612582250040122, "loss": 0.0828, "theoretical_loss": 3.9551029691912545, "tokens_seen": 486277120 }, { "epoch": 0.15, "learning_rate": 0.0008611779810624298, "loss": 0.0839, "theoretical_loss": 3.954873955448594, "tokens_seen": 486539264 }, { "epoch": 0.15, "learning_rate": 0.0008610977371208473, "loss": 0.084, "theoretical_loss": 3.9546450995920086, "tokens_seen": 486801408 }, { "epoch": 0.15, "learning_rate": 0.0008610174931792651, "loss": 0.0823, "theoretical_loss": 3.954416401427715, "tokens_seen": 487063552 }, { "epoch": 0.15, "learning_rate": 0.0008609372492376826, "loss": 0.0821, "theoretical_loss": 3.9541878607622705, "tokens_seen": 487325696 }, { "epoch": 0.15, "learning_rate": 0.0008608570052961001, "loss": 0.0802, "theoretical_loss": 3.9539594774025755, "tokens_seen": 487587840 }, { "epoch": 0.15, "learning_rate": 0.0008607767613545178, "loss": 0.0833, "theoretical_loss": 3.9537312511558698, "tokens_seen": 487849984 }, { "epoch": 0.15, "learning_rate": 0.0008606965174129353, "loss": 0.0821, "theoretical_loss": 3.953503181829732, "tokens_seen": 488112128 }, { "epoch": 0.15, "learning_rate": 0.000860616273471353, "loss": 0.0838, "theoretical_loss": 3.9532752692320816, "tokens_seen": 488374272 }, { "epoch": 0.15, "learning_rate": 0.0008605360295297705, "loss": 0.0807, "theoretical_loss": 3.9530475131711746, "tokens_seen": 488636416 }, { "epoch": 0.15, "learning_rate": 0.0008604557855881881, "loss": 0.0823, "theoretical_loss": 3.9528199134556044, "tokens_seen": 488898560 }, { "epoch": 0.15, "learning_rate": 0.0008603755416466057, "loss": 0.0838, "theoretical_loss": 3.9525924698943022, "tokens_seen": 489160704 }, { "epoch": 0.15, "learning_rate": 0.0008602952977050232, "loss": 0.0829, "theoretical_loss": 3.952365182296533, "tokens_seen": 489422848 }, { "epoch": 0.15, "learning_rate": 0.0008602150537634409, "loss": 0.0842, "theoretical_loss": 3.9521380504718975, "tokens_seen": 489684992 }, { "epoch": 0.15, "learning_rate": 0.0008601348098218585, "loss": 0.0849, "theoretical_loss": 3.9519110742303325, "tokens_seen": 489947136 }, { "epoch": 0.15, "learning_rate": 0.0008600545658802761, "loss": 0.0829, "theoretical_loss": 3.951684253382105, "tokens_seen": 490209280 }, { "epoch": 0.15, "learning_rate": 0.0008599743219386936, "loss": 0.0827, "theoretical_loss": 3.951457587737817, "tokens_seen": 490471424 }, { "epoch": 0.15, "learning_rate": 0.0008598940779971113, "loss": 0.0829, "theoretical_loss": 3.9512310771084014, "tokens_seen": 490733568 }, { "epoch": 0.15, "learning_rate": 0.0008598138340555288, "loss": 0.0846, "theoretical_loss": 3.951004721305123, "tokens_seen": 490995712 }, { "epoch": 0.15, "learning_rate": 0.0008597335901139463, "loss": 0.0828, "theoretical_loss": 3.950778520139576, "tokens_seen": 491257856 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.0006203674129210413, "objective/train/docs_used": 185162, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6717313528060913, "objective/train/original_loss": 1.6717313528060913, "objective/train/theoretical_loss": 3.9505524734236848, "objective/train/tokens_used": 511980000, "objective/train/value_avg": -0.00823974609375, "objective/train/value_loss": 0.00022850836103316396, "objective/train/value_max": -0.0002758502960205078, "objective/train/value_min": -0.67724609375, "objective/train/value_reward_corr": 0.6322221181145709, "objective/train/value_std": 0.0125732421875, "objective/train/weight_avg": 1.0007286071777344, "objective/train/weighted_lm_loss": 1.672489881515503, "objective/train/weights_max": 1.3001129627227783, "objective/train/weights_min": 0.372374951839447, "theoretical_loss": 3.9505524734236848, "tokens_seen": 491520000 }, { "epoch": 0.15, "learning_rate": 0.000859653346172364, "loss": 0.0839, "theoretical_loss": 3.9505524734236848, "tokens_seen": 491520000 }, { "epoch": 0.15, "learning_rate": 0.0008595731022307815, "loss": 0.0811, "theoretical_loss": 3.950326580969703, "tokens_seen": 491782144 }, { "epoch": 0.15, "learning_rate": 0.0008594928582891992, "loss": 0.0851, "theoretical_loss": 3.950100842590212, "tokens_seen": 492044288 }, { "epoch": 0.15, "learning_rate": 0.0008594126143476168, "loss": 0.0847, "theoretical_loss": 3.949875258098121, "tokens_seen": 492306432 }, { "epoch": 0.15, "learning_rate": 0.0008593323704060344, "loss": 0.0849, "theoretical_loss": 3.949649827306665, "tokens_seen": 492568576 }, { "epoch": 0.15, "learning_rate": 0.000859252126464452, "loss": 0.084, "theoretical_loss": 3.9494245500294047, "tokens_seen": 492830720 }, { "epoch": 0.15, "learning_rate": 0.0008591718825228696, "loss": 0.0837, "theoretical_loss": 3.949199426080228, "tokens_seen": 493092864 }, { "epoch": 0.15, "learning_rate": 0.0008590916385812871, "loss": 0.0848, "theoretical_loss": 3.9489744552733455, "tokens_seen": 493355008 }, { "epoch": 0.15, "learning_rate": 0.0008590113946397047, "loss": 0.0887, "theoretical_loss": 3.9487496374232913, "tokens_seen": 493617152 }, { "epoch": 0.15, "learning_rate": 0.0008589311506981223, "loss": 0.0828, "theoretical_loss": 3.9485249723449236, "tokens_seen": 493879296 }, { "epoch": 0.15, "learning_rate": 0.0008588509067565398, "loss": 0.0822, "theoretical_loss": 3.9483004598534217, "tokens_seen": 494141440 }, { "epoch": 0.15, "learning_rate": 0.0008587706628149576, "loss": 0.0855, "theoretical_loss": 3.948076099764288, "tokens_seen": 494403584 }, { "epoch": 0.15, "learning_rate": 0.0008586904188733751, "loss": 0.089, "theoretical_loss": 3.947851891893343, "tokens_seen": 494665728 }, { "epoch": 0.15, "learning_rate": 0.0008586101749317927, "loss": 0.0803, "theoretical_loss": 3.9476278360567303, "tokens_seen": 494927872 }, { "epoch": 0.15, "learning_rate": 0.0008585299309902103, "loss": 0.0838, "theoretical_loss": 3.9474039320709107, "tokens_seen": 495190016 }, { "epoch": 0.15, "learning_rate": 0.0008584496870486278, "loss": 0.0867, "theoretical_loss": 3.9471801797526633, "tokens_seen": 495452160 }, { "epoch": 0.15, "learning_rate": 0.0008583694431070454, "loss": 0.0818, "theoretical_loss": 3.946956578919088, "tokens_seen": 495714304 }, { "epoch": 0.15, "learning_rate": 0.000858289199165463, "loss": 0.0841, "theoretical_loss": 3.9467331293875976, "tokens_seen": 495976448 }, { "epoch": 0.15, "learning_rate": 0.0008582089552238806, "loss": 0.0854, "theoretical_loss": 3.9465098309759252, "tokens_seen": 496238592 }, { "epoch": 0.15, "learning_rate": 0.0008581287112822982, "loss": 0.0843, "theoretical_loss": 3.9462866835021178, "tokens_seen": 496500736 }, { "epoch": 0.15, "learning_rate": 0.0008580484673407159, "loss": 0.0838, "theoretical_loss": 3.9460636867845365, "tokens_seen": 496762880 }, { "epoch": 0.15, "learning_rate": 0.0008579682233991334, "loss": 0.0851, "theoretical_loss": 3.9458408406418584, "tokens_seen": 497025024 }, { "epoch": 0.15, "learning_rate": 0.000857887979457551, "loss": 0.0823, "theoretical_loss": 3.945618144893073, "tokens_seen": 497287168 }, { "epoch": 0.15, "learning_rate": 0.0008578077355159686, "loss": 0.0865, "theoretical_loss": 3.9453955993574845, "tokens_seen": 497549312 }, { "epoch": 0.15, "learning_rate": 0.0008577274915743861, "loss": 0.083, "theoretical_loss": 3.945173203854707, "tokens_seen": 497811456 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.00040782641735859215, "objective/train/docs_used": 187487, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7300009727478027, "objective/train/original_loss": 1.7300009727478027, "objective/train/theoretical_loss": 3.9449509582046662, "objective/train/tokens_used": 518533600, "objective/train/value_avg": -0.00957489013671875, "objective/train/value_loss": 0.0005079260445199907, "objective/train/value_max": -0.00016736984252929688, "objective/train/value_min": -0.72998046875, "objective/train/value_reward_corr": 0.5804749067115648, "objective/train/value_std": 0.01552581787109375, "objective/train/weight_avg": 1.0006271600723267, "objective/train/weighted_lm_loss": 1.7296302318572998, "objective/train/weights_max": 1.716912031173706, "objective/train/weights_min": 0.36845633387565613, "theoretical_loss": 3.9449509582046662, "tokens_seen": 498073600 }, { "epoch": 0.15, "learning_rate": 0.0008576472476328038, "loss": 0.0856, "theoretical_loss": 3.9449509582046662, "tokens_seen": 498073600 }, { "epoch": 0.15, "learning_rate": 0.0008575670036912213, "loss": 0.0832, "theoretical_loss": 3.944728862227601, "tokens_seen": 498335744 }, { "epoch": 0.15, "learning_rate": 0.0008574867597496389, "loss": 0.0812, "theoretical_loss": 3.9445069157440575, "tokens_seen": 498597888 }, { "epoch": 0.15, "learning_rate": 0.0008574065158080565, "loss": 0.0846, "theoretical_loss": 3.944285118574893, "tokens_seen": 498860032 }, { "epoch": 0.15, "learning_rate": 0.000857326271866474, "loss": 0.0842, "theoretical_loss": 3.9440634705412725, "tokens_seen": 499122176 }, { "epoch": 0.15, "learning_rate": 0.0008572460279248917, "loss": 0.0851, "theoretical_loss": 3.9438419714646695, "tokens_seen": 499384320 }, { "epoch": 0.15, "learning_rate": 0.0008571657839833093, "loss": 0.0868, "theoretical_loss": 3.9436206211668647, "tokens_seen": 499646464 }, { "epoch": 0.15, "learning_rate": 0.0008570855400417269, "loss": 0.0806, "theoretical_loss": 3.9433994194699453, "tokens_seen": 499908608 }, { "epoch": 0.15, "learning_rate": 0.0008570052961001444, "loss": 0.0834, "theoretical_loss": 3.943178366196304, "tokens_seen": 500170752 }, { "epoch": 0.15, "learning_rate": 0.0008569250521585621, "loss": 0.0847, "theoretical_loss": 3.942957461168639, "tokens_seen": 500432896 }, { "epoch": 0.15, "learning_rate": 0.0008568448082169796, "loss": 0.0869, "theoretical_loss": 3.9427367042099544, "tokens_seen": 500695040 }, { "epoch": 0.15, "learning_rate": 0.0008567645642753972, "loss": 0.0808, "theoretical_loss": 3.942516095143555, "tokens_seen": 500957184 }, { "epoch": 0.15, "learning_rate": 0.0008566843203338148, "loss": 0.0846, "theoretical_loss": 3.9422956337930524, "tokens_seen": 501219328 }, { "epoch": 0.15, "learning_rate": 0.0008566040763922323, "loss": 0.0818, "theoretical_loss": 3.942075319982358, "tokens_seen": 501481472 }, { "epoch": 0.15, "learning_rate": 0.0008565238324506501, "loss": 0.0828, "theoretical_loss": 3.941855153535686, "tokens_seen": 501743616 }, { "epoch": 0.15, "learning_rate": 0.0008564435885090676, "loss": 0.0837, "theoretical_loss": 3.9416351342775524, "tokens_seen": 502005760 }, { "epoch": 0.15, "learning_rate": 0.0008563633445674852, "loss": 0.0825, "theoretical_loss": 3.9414152620327716, "tokens_seen": 502267904 }, { "epoch": 0.15, "learning_rate": 0.0008562831006259028, "loss": 0.0798, "theoretical_loss": 3.941195536626461, "tokens_seen": 502530048 }, { "epoch": 0.15, "learning_rate": 0.0008562028566843203, "loss": 0.0835, "theoretical_loss": 3.940975957884034, "tokens_seen": 502792192 }, { "epoch": 0.15, "learning_rate": 0.0008561226127427379, "loss": 0.0839, "theoretical_loss": 3.9407565256312047, "tokens_seen": 503054336 }, { "epoch": 0.15, "learning_rate": 0.0008560423688011555, "loss": 0.0859, "theoretical_loss": 3.940537239693983, "tokens_seen": 503316480 }, { "epoch": 0.15, "learning_rate": 0.0008559621248595731, "loss": 0.0844, "theoretical_loss": 3.9403180998986778, "tokens_seen": 503578624 }, { "epoch": 0.15, "learning_rate": 0.0008558818809179906, "loss": 0.0805, "theoretical_loss": 3.9400991060718935, "tokens_seen": 503840768 }, { "epoch": 0.15, "learning_rate": 0.0008558016369764084, "loss": 0.0865, "theoretical_loss": 3.93988025804053, "tokens_seen": 504102912 }, { "epoch": 0.15, "learning_rate": 0.0008557213930348259, "loss": 0.0846, "theoretical_loss": 3.9396615556317838, "tokens_seen": 504365056 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.003414377337321639, "objective/train/docs_used": 189911, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.635353684425354, "objective/train/original_loss": 1.6353535652160645, "objective/train/theoretical_loss": 3.939442998673144, "objective/train/tokens_used": 525087200, "objective/train/value_avg": -0.0120849609375, "objective/train/value_loss": 0.0004840447218157351, "objective/train/value_max": -0.0003199577331542969, "objective/train/value_min": -0.7880859375, "objective/train/value_reward_corr": 0.7853448439202004, "objective/train/value_std": 0.0305023193359375, "objective/train/weight_avg": 1.0036317110061646, "objective/train/weighted_lm_loss": 1.6402772665023804, "objective/train/weights_max": 1.392095923423767, "objective/train/weights_min": 0.3688413202762604, "theoretical_loss": 3.939442998673144, "tokens_seen": 504627200 }, { "epoch": 0.15, "learning_rate": 0.0008556411490932436, "loss": 0.0822, "theoretical_loss": 3.939442998673144, "tokens_seen": 504627200 }, { "epoch": 0.15, "learning_rate": 0.0008555609051516611, "loss": 0.0824, "theoretical_loss": 3.9392245869923954, "tokens_seen": 504889344 }, { "epoch": 0.15, "learning_rate": 0.0008554806612100786, "loss": 0.0865, "theoretical_loss": 3.939006320417614, "tokens_seen": 505151488 }, { "epoch": 0.15, "learning_rate": 0.0008554004172684963, "loss": 0.0833, "theoretical_loss": 3.9387881987771705, "tokens_seen": 505413632 }, { "epoch": 0.15, "learning_rate": 0.0008553201733269138, "loss": 0.0847, "theoretical_loss": 3.9385702218997247, "tokens_seen": 505675776 }, { "epoch": 0.15, "learning_rate": 0.0008552399293853314, "loss": 0.0849, "theoretical_loss": 3.9383523896142316, "tokens_seen": 505937920 }, { "epoch": 0.15, "learning_rate": 0.000855159685443749, "loss": 0.0849, "theoretical_loss": 3.9381347017499326, "tokens_seen": 506200064 }, { "epoch": 0.15, "learning_rate": 0.0008550794415021667, "loss": 0.0847, "theoretical_loss": 3.9379171581363623, "tokens_seen": 506462208 }, { "epoch": 0.15, "learning_rate": 0.0008549991975605842, "loss": 0.0846, "theoretical_loss": 3.937699758603342, "tokens_seen": 506724352 }, { "epoch": 0.15, "learning_rate": 0.0008549189536190018, "loss": 0.0845, "theoretical_loss": 3.937482502980985, "tokens_seen": 506986496 }, { "epoch": 0.15, "learning_rate": 0.0008548387096774194, "loss": 0.0873, "theoretical_loss": 3.937265391099688, "tokens_seen": 507248640 }, { "epoch": 0.15, "learning_rate": 0.0008547584657358369, "loss": 0.081, "theoretical_loss": 3.9370484227901397, "tokens_seen": 507510784 }, { "epoch": 0.15, "learning_rate": 0.0008546782217942546, "loss": 0.0815, "theoretical_loss": 3.9368315978833124, "tokens_seen": 507772928 }, { "epoch": 0.15, "learning_rate": 0.0008545979778526721, "loss": 0.0824, "theoretical_loss": 3.936614916210466, "tokens_seen": 508035072 }, { "epoch": 0.15, "learning_rate": 0.0008545177339110897, "loss": 0.0811, "theoretical_loss": 3.9363983776031457, "tokens_seen": 508297216 }, { "epoch": 0.15, "learning_rate": 0.0008544374899695073, "loss": 0.0846, "theoretical_loss": 3.936181981893182, "tokens_seen": 508559360 }, { "epoch": 0.15, "learning_rate": 0.0008543572460279248, "loss": 0.0867, "theoretical_loss": 3.9359657289126875, "tokens_seen": 508821504 }, { "epoch": 0.15, "learning_rate": 0.0008542770020863426, "loss": 0.0841, "theoretical_loss": 3.935749618494061, "tokens_seen": 509083648 }, { "epoch": 0.15, "learning_rate": 0.0008541967581447601, "loss": 0.0836, "theoretical_loss": 3.935533650469983, "tokens_seen": 509345792 }, { "epoch": 0.15, "learning_rate": 0.0008541165142031777, "loss": 0.0818, "theoretical_loss": 3.935317824673417, "tokens_seen": 509607936 }, { "epoch": 0.15, "learning_rate": 0.0008540362702615953, "loss": 0.0842, "theoretical_loss": 3.935102140937608, "tokens_seen": 509870080 }, { "epoch": 0.15, "learning_rate": 0.0008539560263200129, "loss": 0.0829, "theoretical_loss": 3.934886599096081, "tokens_seen": 510132224 }, { "epoch": 0.15, "learning_rate": 0.0008538757823784304, "loss": 0.0819, "theoretical_loss": 3.9346711989826426, "tokens_seen": 510394368 }, { "epoch": 0.15, "learning_rate": 0.000853795538436848, "loss": 0.0832, "theoretical_loss": 3.93445594043138, "tokens_seen": 510656512 }, { "epoch": 0.15, "learning_rate": 0.0008537152944952656, "loss": 0.082, "theoretical_loss": 3.9342408232766584, "tokens_seen": 510918656 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.0016618784284219146, "objective/train/docs_used": 192198, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7548766136169434, "objective/train/original_loss": 1.7548766136169434, "objective/train/theoretical_loss": 3.934025847353122, "objective/train/tokens_used": 531640800, "objective/train/value_avg": -0.00916290283203125, "objective/train/value_loss": 0.0003608835977502167, "objective/train/value_max": -0.0002694129943847656, "objective/train/value_min": -0.78173828125, "objective/train/value_reward_corr": 0.6032279595057155, "objective/train/value_std": 0.015838623046875, "objective/train/weight_avg": 1.0018247365951538, "objective/train/weighted_lm_loss": 1.7572846412658691, "objective/train/weights_max": 1.5906000137329102, "objective/train/weights_min": 0.3688792884349823, "theoretical_loss": 3.934025847353122, "tokens_seen": 511180800 }, { "epoch": 0.15, "learning_rate": 0.0008536350505536831, "loss": 0.0836, "theoretical_loss": 3.934025847353122, "tokens_seen": 511180800 }, { "epoch": 0.15, "learning_rate": 0.0008535548066121009, "loss": 0.0818, "theoretical_loss": 3.9338110124956924, "tokens_seen": 511442944 }, { "epoch": 0.16, "learning_rate": 0.0008534745626705184, "loss": 0.0835, "theoretical_loss": 3.9335963185395713, "tokens_seen": 511705088 }, { "epoch": 0.16, "learning_rate": 0.000853394318728936, "loss": 0.0811, "theoretical_loss": 3.933381765320233, "tokens_seen": 511967232 }, { "epoch": 0.16, "learning_rate": 0.0008533140747873536, "loss": 0.0812, "theoretical_loss": 3.933167352673432, "tokens_seen": 512229376 }, { "epoch": 0.16, "learning_rate": 0.0008532338308457711, "loss": 0.0834, "theoretical_loss": 3.9329530804351958, "tokens_seen": 512491520 }, { "epoch": 0.16, "learning_rate": 0.0008531535869041887, "loss": 0.0855, "theoretical_loss": 3.9327389484418287, "tokens_seen": 512753664 }, { "epoch": 0.16, "learning_rate": 0.0008530733429626063, "loss": 0.0804, "theoretical_loss": 3.9325249565299076, "tokens_seen": 513015808 }, { "epoch": 0.16, "learning_rate": 0.0008529930990210239, "loss": 0.0843, "theoretical_loss": 3.932311104536285, "tokens_seen": 513277952 }, { "epoch": 0.16, "learning_rate": 0.0008529128550794415, "loss": 0.0822, "theoretical_loss": 3.9320973922980844, "tokens_seen": 513540096 }, { "epoch": 0.16, "learning_rate": 0.0008528326111378592, "loss": 0.0844, "theoretical_loss": 3.931883819652705, "tokens_seen": 513802240 }, { "epoch": 0.16, "learning_rate": 0.0008527523671962767, "loss": 0.082, "theoretical_loss": 3.9316703864378155, "tokens_seen": 514064384 }, { "epoch": 0.16, "learning_rate": 0.0008526721232546944, "loss": 0.082, "theoretical_loss": 3.9314570924913568, "tokens_seen": 514326528 }, { "epoch": 0.16, "learning_rate": 0.0008525918793131119, "loss": 0.0834, "theoretical_loss": 3.9312439376515407, "tokens_seen": 514588672 }, { "epoch": 0.16, "learning_rate": 0.0008525116353715294, "loss": 0.083, "theoretical_loss": 3.9310309217568493, "tokens_seen": 514850816 }, { "epoch": 0.16, "learning_rate": 0.0008524313914299471, "loss": 0.0845, "theoretical_loss": 3.9308180446460343, "tokens_seen": 515112960 }, { "epoch": 0.16, "learning_rate": 0.0008523511474883646, "loss": 0.0838, "theoretical_loss": 3.9306053061581165, "tokens_seen": 515375104 }, { "epoch": 0.16, "learning_rate": 0.0008522709035467822, "loss": 0.0834, "theoretical_loss": 3.930392706132385, "tokens_seen": 515637248 }, { "epoch": 0.16, "learning_rate": 0.0008521906596051998, "loss": 0.0845, "theoretical_loss": 3.9301802444083966, "tokens_seen": 515899392 }, { "epoch": 0.16, "learning_rate": 0.0008521104156636175, "loss": 0.0846, "theoretical_loss": 3.929967920825977, "tokens_seen": 516161536 }, { "epoch": 0.16, "learning_rate": 0.000852030171722035, "loss": 0.0805, "theoretical_loss": 3.929755735225216, "tokens_seen": 516423680 }, { "epoch": 0.16, "learning_rate": 0.0008519499277804526, "loss": 0.0829, "theoretical_loss": 3.9295436874464715, "tokens_seen": 516685824 }, { "epoch": 0.16, "learning_rate": 0.0008518696838388702, "loss": 0.0836, "theoretical_loss": 3.929331777330366, "tokens_seen": 516947968 }, { "epoch": 0.16, "learning_rate": 0.0008517894398972877, "loss": 0.0798, "theoretical_loss": 3.9291200047177886, "tokens_seen": 517210112 }, { "epoch": 0.16, "learning_rate": 0.0008517091959557054, "loss": 0.0815, "theoretical_loss": 3.9289083694498905, "tokens_seen": 517472256 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.001091600046493113, "objective/train/docs_used": 194581, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6318016052246094, "objective/train/original_loss": 1.6318016052246094, "objective/train/theoretical_loss": 3.9286968713680883, "objective/train/tokens_used": 538194400, "objective/train/value_avg": -0.00958251953125, "objective/train/value_loss": 0.0004281644069124013, "objective/train/value_max": -0.0002453327178955078, "objective/train/value_min": -0.646484375, "objective/train/value_reward_corr": 0.7146024060573813, "objective/train/value_std": 0.0170135498046875, "objective/train/weight_avg": 1.0012816190719604, "objective/train/weighted_lm_loss": 1.6339389085769653, "objective/train/weights_max": 1.4248768091201782, "objective/train/weights_min": 0.36841943860054016, "theoretical_loss": 3.9286968713680883, "tokens_seen": 517734400 }, { "epoch": 0.16, "learning_rate": 0.0008516289520141229, "loss": 0.0804, "theoretical_loss": 3.9286968713680883, "tokens_seen": 517734400 }, { "epoch": 0.16, "learning_rate": 0.0008515487080725406, "loss": 0.0823, "theoretical_loss": 3.9284855103140615, "tokens_seen": 517996544 }, { "epoch": 0.16, "learning_rate": 0.0008514684641309581, "loss": 0.0822, "theoretical_loss": 3.9282742861297524, "tokens_seen": 518258688 }, { "epoch": 0.16, "learning_rate": 0.0008513882201893756, "loss": 0.0788, "theoretical_loss": 3.928063198657365, "tokens_seen": 518520832 }, { "epoch": 0.16, "learning_rate": 0.0008513079762477934, "loss": 0.0808, "theoretical_loss": 3.9278522477393656, "tokens_seen": 518782976 }, { "epoch": 0.16, "learning_rate": 0.0008512277323062109, "loss": 0.0824, "theoretical_loss": 3.9276414332184815, "tokens_seen": 519045120 }, { "epoch": 0.16, "learning_rate": 0.0008511474883646285, "loss": 0.0818, "theoretical_loss": 3.927430754937699, "tokens_seen": 519307264 }, { "epoch": 0.16, "learning_rate": 0.0008510672444230461, "loss": 0.0795, "theoretical_loss": 3.927220212740267, "tokens_seen": 519569408 }, { "epoch": 0.16, "learning_rate": 0.0008509870004814637, "loss": 0.0795, "theoretical_loss": 3.9270098064696906, "tokens_seen": 519831552 }, { "epoch": 0.16, "learning_rate": 0.0008509067565398812, "loss": 0.0813, "theoretical_loss": 3.9267995359697356, "tokens_seen": 520093696 }, { "epoch": 0.16, "learning_rate": 0.0008508265125982988, "loss": 0.082, "theoretical_loss": 3.926589401084426, "tokens_seen": 520355840 }, { "epoch": 0.16, "learning_rate": 0.0008507462686567164, "loss": 0.0803, "theoretical_loss": 3.9263794016580427, "tokens_seen": 520617984 }, { "epoch": 0.16, "learning_rate": 0.0008506660247151339, "loss": 0.0798, "theoretical_loss": 3.9261695375351238, "tokens_seen": 520880128 }, { "epoch": 0.16, "learning_rate": 0.0008505857807735517, "loss": 0.0839, "theoretical_loss": 3.9259598085604646, "tokens_seen": 521142272 }, { "epoch": 0.16, "learning_rate": 0.0008505055368319692, "loss": 0.0809, "theoretical_loss": 3.925750214579116, "tokens_seen": 521404416 }, { "epoch": 0.16, "learning_rate": 0.0008504252928903869, "loss": 0.0844, "theoretical_loss": 3.9255407554363835, "tokens_seen": 521666560 }, { "epoch": 0.16, "learning_rate": 0.0008503450489488044, "loss": 0.0812, "theoretical_loss": 3.92533143097783, "tokens_seen": 521928704 }, { "epoch": 0.16, "learning_rate": 0.0008502648050072219, "loss": 0.0827, "theoretical_loss": 3.9251222410492694, "tokens_seen": 522190848 }, { "epoch": 0.16, "learning_rate": 0.0008501845610656396, "loss": 0.0829, "theoretical_loss": 3.924913185496772, "tokens_seen": 522452992 }, { "epoch": 0.16, "learning_rate": 0.0008501043171240571, "loss": 0.0813, "theoretical_loss": 3.924704264166659, "tokens_seen": 522715136 }, { "epoch": 0.16, "learning_rate": 0.0008500240731824747, "loss": 0.0816, "theoretical_loss": 3.9244954769055074, "tokens_seen": 522977280 }, { "epoch": 0.16, "learning_rate": 0.0008499438292408923, "loss": 0.0843, "theoretical_loss": 3.924286823560144, "tokens_seen": 523239424 }, { "epoch": 0.16, "learning_rate": 0.00084986358529931, "loss": 0.0805, "theoretical_loss": 3.9240783039776472, "tokens_seen": 523501568 }, { "epoch": 0.16, "learning_rate": 0.0008497833413577275, "loss": 0.0825, "theoretical_loss": 3.9238699180053485, "tokens_seen": 523763712 }, { "epoch": 0.16, "learning_rate": 0.0008497030974161451, "loss": 0.0836, "theoretical_loss": 3.923661665490828, "tokens_seen": 524025856 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.0016532277222722769, "objective/train/docs_used": 196964, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5641077756881714, "objective/train/original_loss": 1.5641076564788818, "objective/train/theoretical_loss": 3.9234535462819156, "objective/train/tokens_used": 544748000, "objective/train/value_avg": -0.00901031494140625, "objective/train/value_loss": 0.00023702048929408193, "objective/train/value_max": -0.0002434253692626953, "objective/train/value_min": -0.58154296875, "objective/train/value_reward_corr": 0.6424405895231002, "objective/train/value_std": 0.0155029296875, "objective/train/weight_avg": 1.0017648935317993, "objective/train/weighted_lm_loss": 1.566743016242981, "objective/train/weights_max": 1.7467089891433716, "objective/train/weights_min": 0.3871968686580658, "theoretical_loss": 3.9234535462819156, "tokens_seen": 524288000 }, { "epoch": 0.16, "learning_rate": 0.0008496228534745627, "loss": 0.0815, "theoretical_loss": 3.9234535462819156, "tokens_seen": 524288000 }, { "epoch": 0.16, "learning_rate": 0.0008495426095329802, "loss": 0.0821, "theoretical_loss": 3.923245560226693, "tokens_seen": 524550144 }, { "epoch": 0.16, "learning_rate": 0.0008494623655913979, "loss": 0.0791, "theoretical_loss": 3.9230377071734885, "tokens_seen": 524812288 }, { "epoch": 0.16, "learning_rate": 0.0008493821216498154, "loss": 0.0819, "theoretical_loss": 3.9228299869708794, "tokens_seen": 525074432 }, { "epoch": 0.16, "learning_rate": 0.000849301877708233, "loss": 0.0824, "theoretical_loss": 3.9226223994676923, "tokens_seen": 525336576 }, { "epoch": 0.16, "learning_rate": 0.0008492216337666506, "loss": 0.0852, "theoretical_loss": 3.9224149445129983, "tokens_seen": 525598720 }, { "epoch": 0.16, "learning_rate": 0.0008491413898250683, "loss": 0.0812, "theoretical_loss": 3.922207621956119, "tokens_seen": 525860864 }, { "epoch": 0.16, "learning_rate": 0.0008490611458834859, "loss": 0.0804, "theoretical_loss": 3.9220004316466186, "tokens_seen": 526123008 }, { "epoch": 0.16, "learning_rate": 0.0008489809019419034, "loss": 0.0838, "theoretical_loss": 3.9217933734343093, "tokens_seen": 526385152 }, { "epoch": 0.16, "learning_rate": 0.000848900658000321, "loss": 0.0825, "theoretical_loss": 3.9215864471692488, "tokens_seen": 526647296 }, { "epoch": 0.16, "learning_rate": 0.0008488204140587386, "loss": 0.0816, "theoretical_loss": 3.921379652701738, "tokens_seen": 526909440 }, { "epoch": 0.16, "learning_rate": 0.0008487401701171562, "loss": 0.0808, "theoretical_loss": 3.9211729898823235, "tokens_seen": 527171584 }, { "epoch": 0.16, "learning_rate": 0.0008486599261755737, "loss": 0.082, "theoretical_loss": 3.920966458561794, "tokens_seen": 527433728 }, { "epoch": 0.16, "learning_rate": 0.0008485796822339914, "loss": 0.0813, "theoretical_loss": 3.920760058591182, "tokens_seen": 527695872 }, { "epoch": 0.16, "learning_rate": 0.0008484994382924089, "loss": 0.0826, "theoretical_loss": 3.9205537898217644, "tokens_seen": 527958016 }, { "epoch": 0.16, "learning_rate": 0.0008484191943508264, "loss": 0.0817, "theoretical_loss": 3.920347652105058, "tokens_seen": 528220160 }, { "epoch": 0.16, "learning_rate": 0.0008483389504092442, "loss": 0.0826, "theoretical_loss": 3.920141645292821, "tokens_seen": 528482304 }, { "epoch": 0.16, "learning_rate": 0.0008482587064676617, "loss": 0.081, "theoretical_loss": 3.919935769237055, "tokens_seen": 528744448 }, { "epoch": 0.16, "learning_rate": 0.0008481784625260793, "loss": 0.0839, "theoretical_loss": 3.91973002379, "tokens_seen": 529006592 }, { "epoch": 0.16, "learning_rate": 0.0008480982185844969, "loss": 0.0818, "theoretical_loss": 3.919524408804137, "tokens_seen": 529268736 }, { "epoch": 0.16, "learning_rate": 0.0008480179746429145, "loss": 0.0827, "theoretical_loss": 3.9193189241321873, "tokens_seen": 529530880 }, { "epoch": 0.16, "learning_rate": 0.0008479377307013321, "loss": 0.0821, "theoretical_loss": 3.9191135696271098, "tokens_seen": 529793024 }, { "epoch": 0.16, "learning_rate": 0.0008478574867597496, "loss": 0.0851, "theoretical_loss": 3.9189083451421025, "tokens_seen": 530055168 }, { "epoch": 0.16, "learning_rate": 0.0008477772428181672, "loss": 0.0782, "theoretical_loss": 3.9187032505306023, "tokens_seen": 530317312 }, { "epoch": 0.16, "learning_rate": 0.0008476969988765848, "loss": 0.0809, "theoretical_loss": 3.918498285646282, "tokens_seen": 530579456 }, { "epoch": 0.16, "objective/train/advantage_avg": 7.436873420374468e-05, "objective/train/docs_used": 199409, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5998988151550293, "objective/train/original_loss": 1.5998989343643188, "objective/train/theoretical_loss": 3.9182934503430538, "objective/train/tokens_used": 551301600, "objective/train/value_avg": -0.00795745849609375, "objective/train/value_loss": 0.00032716241548769176, "objective/train/value_max": -0.00034332275390625, "objective/train/value_min": -0.72900390625, "objective/train/value_reward_corr": 0.5799935782849467, "objective/train/value_std": 0.01358795166015625, "objective/train/weight_avg": 1.0002270936965942, "objective/train/weighted_lm_loss": 1.5996286869049072, "objective/train/weights_max": 1.4670954942703247, "objective/train/weights_min": 0.38993605971336365, "theoretical_loss": 3.9182934503430538, "tokens_seen": 530841600 }, { "epoch": 0.16, "learning_rate": 0.0008476167549350025, "loss": 0.0815, "theoretical_loss": 3.9182934503430538, "tokens_seen": 530841600 }, { "epoch": 0.16, "learning_rate": 0.00084753651099342, "loss": 0.0792, "theoretical_loss": 3.918088744475064, "tokens_seen": 531103744 }, { "epoch": 0.16, "learning_rate": 0.0008474562670518377, "loss": 0.084, "theoretical_loss": 3.9178841678966956, "tokens_seen": 531365888 }, { "epoch": 0.16, "learning_rate": 0.0008473760231102552, "loss": 0.0841, "theoretical_loss": 3.9176797204625693, "tokens_seen": 531628032 }, { "epoch": 0.16, "learning_rate": 0.0008472957791686727, "loss": 0.083, "theoretical_loss": 3.917475402027537, "tokens_seen": 531890176 }, { "epoch": 0.16, "learning_rate": 0.0008472155352270904, "loss": 0.0796, "theoretical_loss": 3.917271212446689, "tokens_seen": 532152320 }, { "epoch": 0.16, "learning_rate": 0.0008471352912855079, "loss": 0.082, "theoretical_loss": 3.917067151575348, "tokens_seen": 532414464 }, { "epoch": 0.16, "learning_rate": 0.0008470550473439255, "loss": 0.0846, "theoretical_loss": 3.916863219269069, "tokens_seen": 532676608 }, { "epoch": 0.16, "learning_rate": 0.0008469748034023431, "loss": 0.0813, "theoretical_loss": 3.9166594153836427, "tokens_seen": 532938752 }, { "epoch": 0.16, "learning_rate": 0.0008468945594607608, "loss": 0.082, "theoretical_loss": 3.9164557397750897, "tokens_seen": 533200896 }, { "epoch": 0.16, "learning_rate": 0.0008468143155191783, "loss": 0.0815, "theoretical_loss": 3.916252192299665, "tokens_seen": 533463040 }, { "epoch": 0.16, "learning_rate": 0.0008467340715775959, "loss": 0.0816, "theoretical_loss": 3.9160487728138538, "tokens_seen": 533725184 }, { "epoch": 0.16, "learning_rate": 0.0008466538276360135, "loss": 0.0821, "theoretical_loss": 3.9158454811743733, "tokens_seen": 533987328 }, { "epoch": 0.16, "learning_rate": 0.0008465735836944311, "loss": 0.0847, "theoretical_loss": 3.915642317238171, "tokens_seen": 534249472 }, { "epoch": 0.16, "learning_rate": 0.0008464933397528487, "loss": 0.0836, "theoretical_loss": 3.915439280862423, "tokens_seen": 534511616 }, { "epoch": 0.16, "learning_rate": 0.0008464130958112662, "loss": 0.0811, "theoretical_loss": 3.915236371904539, "tokens_seen": 534773760 }, { "epoch": 0.16, "learning_rate": 0.0008463328518696839, "loss": 0.0822, "theoretical_loss": 3.915033590222153, "tokens_seen": 535035904 }, { "epoch": 0.16, "learning_rate": 0.0008462526079281014, "loss": 0.0824, "theoretical_loss": 3.914830935673132, "tokens_seen": 535298048 }, { "epoch": 0.16, "learning_rate": 0.0008461723639865191, "loss": 0.0793, "theoretical_loss": 3.914628408115569, "tokens_seen": 535560192 }, { "epoch": 0.16, "learning_rate": 0.0008460921200449367, "loss": 0.0813, "theoretical_loss": 3.9144260074077843, "tokens_seen": 535822336 }, { "epoch": 0.16, "learning_rate": 0.0008460118761033542, "loss": 0.084, "theoretical_loss": 3.9142237334083276, "tokens_seen": 536084480 }, { "epoch": 0.16, "learning_rate": 0.0008459316321617718, "loss": 0.0814, "theoretical_loss": 3.914021585975973, "tokens_seen": 536346624 }, { "epoch": 0.16, "learning_rate": 0.0008458513882201894, "loss": 0.0803, "theoretical_loss": 3.9138195649697227, "tokens_seen": 536608768 }, { "epoch": 0.16, "learning_rate": 0.000845771144278607, "loss": 0.0823, "theoretical_loss": 3.9136176702488044, "tokens_seen": 536870912 }, { "epoch": 0.16, "learning_rate": 0.0008456909003370245, "loss": 0.0789, "theoretical_loss": 3.91341590167267, "tokens_seen": 537133056 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.0004424227518029511, "objective/train/docs_used": 201641, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5473430156707764, "objective/train/original_loss": 1.5473428964614868, "objective/train/theoretical_loss": 3.9132142591009975, "objective/train/tokens_used": 557855200, "objective/train/value_avg": -0.007518768310546875, "objective/train/value_loss": 0.00024131749523803592, "objective/train/value_max": -0.00021147727966308594, "objective/train/value_min": -0.64697265625, "objective/train/value_reward_corr": 0.657343727691196, "objective/train/value_std": 0.01261138916015625, "objective/train/weight_avg": 1.0005532503128052, "objective/train/weighted_lm_loss": 1.5476828813552856, "objective/train/weights_max": 1.3888442516326904, "objective/train/weights_min": 0.3710732161998749, "theoretical_loss": 3.9132142591009975, "tokens_seen": 537395200 }, { "epoch": 0.16, "learning_rate": 0.0008456106563954422, "loss": 0.0803, "theoretical_loss": 3.9132142591009975, "tokens_seen": 537395200 }, { "epoch": 0.16, "learning_rate": 0.0008455304124538597, "loss": 0.081, "theoretical_loss": 3.9130127423936907, "tokens_seen": 537657344 }, { "epoch": 0.16, "learning_rate": 0.0008454501685122772, "loss": 0.0836, "theoretical_loss": 3.9128113514108733, "tokens_seen": 537919488 }, { "epoch": 0.16, "learning_rate": 0.000845369924570695, "loss": 0.0817, "theoretical_loss": 3.9126100860128963, "tokens_seen": 538181632 }, { "epoch": 0.16, "learning_rate": 0.0008452896806291125, "loss": 0.0832, "theoretical_loss": 3.9124089460603324, "tokens_seen": 538443776 }, { "epoch": 0.16, "learning_rate": 0.0008452094366875302, "loss": 0.0823, "theoretical_loss": 3.9122079314139766, "tokens_seen": 538705920 }, { "epoch": 0.16, "learning_rate": 0.0008451291927459477, "loss": 0.0824, "theoretical_loss": 3.9120070419348463, "tokens_seen": 538968064 }, { "epoch": 0.16, "learning_rate": 0.0008450489488043653, "loss": 0.0795, "theoretical_loss": 3.9118062774841804, "tokens_seen": 539230208 }, { "epoch": 0.16, "learning_rate": 0.0008449687048627829, "loss": 0.0836, "theoretical_loss": 3.91160563792344, "tokens_seen": 539492352 }, { "epoch": 0.16, "learning_rate": 0.0008448884609212004, "loss": 0.0825, "theoretical_loss": 3.911405123114305, "tokens_seen": 539754496 }, { "epoch": 0.16, "learning_rate": 0.000844808216979618, "loss": 0.0844, "theoretical_loss": 3.9112047329186783, "tokens_seen": 540016640 }, { "epoch": 0.16, "learning_rate": 0.0008447279730380356, "loss": 0.0829, "theoretical_loss": 3.911004467198679, "tokens_seen": 540278784 }, { "epoch": 0.16, "learning_rate": 0.0008446477290964533, "loss": 0.0839, "theoretical_loss": 3.9108043258166485, "tokens_seen": 540540928 }, { "epoch": 0.16, "learning_rate": 0.0008445674851548708, "loss": 0.0813, "theoretical_loss": 3.910604308635146, "tokens_seen": 540803072 }, { "epoch": 0.16, "learning_rate": 0.0008444872412132885, "loss": 0.0796, "theoretical_loss": 3.9104044155169495, "tokens_seen": 541065216 }, { "epoch": 0.16, "learning_rate": 0.000844406997271706, "loss": 0.0852, "theoretical_loss": 3.910204646325055, "tokens_seen": 541327360 }, { "epoch": 0.16, "learning_rate": 0.0008443267533301235, "loss": 0.0845, "theoretical_loss": 3.9100050009226752, "tokens_seen": 541589504 }, { "epoch": 0.16, "learning_rate": 0.0008442465093885412, "loss": 0.0826, "theoretical_loss": 3.9098054791732406, "tokens_seen": 541851648 }, { "epoch": 0.16, "learning_rate": 0.0008441662654469587, "loss": 0.0833, "theoretical_loss": 3.909606080940399, "tokens_seen": 542113792 }, { "epoch": 0.16, "learning_rate": 0.0008440860215053764, "loss": 0.0824, "theoretical_loss": 3.909406806088013, "tokens_seen": 542375936 }, { "epoch": 0.16, "learning_rate": 0.0008440057775637939, "loss": 0.0836, "theoretical_loss": 3.909207654480162, "tokens_seen": 542638080 }, { "epoch": 0.16, "learning_rate": 0.0008439255336222116, "loss": 0.0817, "theoretical_loss": 3.9090086259811403, "tokens_seen": 542900224 }, { "epoch": 0.16, "learning_rate": 0.0008438452896806292, "loss": 0.082, "theoretical_loss": 3.908809720455457, "tokens_seen": 543162368 }, { "epoch": 0.16, "learning_rate": 0.0008437650457390467, "loss": 0.0819, "theoretical_loss": 3.908610937767836, "tokens_seen": 543424512 }, { "epoch": 0.16, "learning_rate": 0.0008436848017974643, "loss": 0.0828, "theoretical_loss": 3.9084122777832144, "tokens_seen": 543686656 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.0003520216268952936, "objective/train/docs_used": 204061, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.68601655960083, "objective/train/original_loss": 1.6860167980194092, "objective/train/theoretical_loss": 3.908213740366744, "objective/train/tokens_used": 564408800, "objective/train/value_avg": -0.0084991455078125, "objective/train/value_loss": 0.0002631843963172287, "objective/train/value_max": -0.0003101825714111328, "objective/train/value_min": -0.2208251953125, "objective/train/value_reward_corr": 0.6833106642077473, "objective/train/value_std": 0.01250457763671875, "objective/train/weight_avg": 1.000474214553833, "objective/train/weighted_lm_loss": 1.6869690418243408, "objective/train/weights_max": 1.1504311561584473, "objective/train/weights_min": 0.3698042333126068, "theoretical_loss": 3.908213740366744, "tokens_seen": 543948800 }, { "epoch": 0.16, "learning_rate": 0.0008436045578558819, "loss": 0.0831, "theoretical_loss": 3.908213740366744, "tokens_seen": 543948800 }, { "epoch": 0.16, "learning_rate": 0.0008435243139142995, "loss": 0.0826, "theoretical_loss": 3.908015325383788, "tokens_seen": 544210944 }, { "epoch": 0.16, "learning_rate": 0.000843444069972717, "loss": 0.0819, "theoretical_loss": 3.907817032699924, "tokens_seen": 544473088 }, { "epoch": 0.17, "learning_rate": 0.0008433638260311347, "loss": 0.0809, "theoretical_loss": 3.9076188621809416, "tokens_seen": 544735232 }, { "epoch": 0.17, "learning_rate": 0.0008432835820895522, "loss": 0.0838, "theoretical_loss": 3.9074208136928408, "tokens_seen": 544997376 }, { "epoch": 0.17, "learning_rate": 0.0008432033381479699, "loss": 0.083, "theoretical_loss": 3.907222887101834, "tokens_seen": 545259520 }, { "epoch": 0.17, "learning_rate": 0.0008431230942063875, "loss": 0.0854, "theoretical_loss": 3.9070250822743446, "tokens_seen": 545521664 }, { "epoch": 0.17, "learning_rate": 0.000843042850264805, "loss": 0.0844, "theoretical_loss": 3.906827399077006, "tokens_seen": 545783808 }, { "epoch": 0.17, "learning_rate": 0.0008429626063232226, "loss": 0.0827, "theoretical_loss": 3.9066298373766615, "tokens_seen": 546045952 }, { "epoch": 0.17, "learning_rate": 0.0008428823623816402, "loss": 0.0831, "theoretical_loss": 3.9064323970403656, "tokens_seen": 546308096 }, { "epoch": 0.17, "learning_rate": 0.0008428021184400578, "loss": 0.0832, "theoretical_loss": 3.9062350779353787, "tokens_seen": 546570240 }, { "epoch": 0.17, "learning_rate": 0.0008427218744984754, "loss": 0.0825, "theoretical_loss": 3.906037879929174, "tokens_seen": 546832384 }, { "epoch": 0.17, "learning_rate": 0.000842641630556893, "loss": 0.0813, "theoretical_loss": 3.90584080288943, "tokens_seen": 547094528 }, { "epoch": 0.17, "learning_rate": 0.0008425613866153105, "loss": 0.083, "theoretical_loss": 3.905643846684034, "tokens_seen": 547356672 }, { "epoch": 0.17, "learning_rate": 0.0008424811426737282, "loss": 0.08, "theoretical_loss": 3.9054470111810815, "tokens_seen": 547618816 }, { "epoch": 0.17, "learning_rate": 0.0008424008987321458, "loss": 0.0811, "theoretical_loss": 3.9052502962488735, "tokens_seen": 547880960 }, { "epoch": 0.17, "learning_rate": 0.0008423206547905633, "loss": 0.0826, "theoretical_loss": 3.9050537017559197, "tokens_seen": 548143104 }, { "epoch": 0.17, "learning_rate": 0.000842240410848981, "loss": 0.0827, "theoretical_loss": 3.904857227570934, "tokens_seen": 548405248 }, { "epoch": 0.17, "learning_rate": 0.0008421601669073985, "loss": 0.0788, "theoretical_loss": 3.904660873562837, "tokens_seen": 548667392 }, { "epoch": 0.17, "learning_rate": 0.0008420799229658161, "loss": 0.0814, "theoretical_loss": 3.9044646396007545, "tokens_seen": 548929536 }, { "epoch": 0.17, "learning_rate": 0.0008419996790242337, "loss": 0.0818, "theoretical_loss": 3.9042685255540177, "tokens_seen": 549191680 }, { "epoch": 0.17, "learning_rate": 0.0008419194350826512, "loss": 0.0825, "theoretical_loss": 3.9040725312921616, "tokens_seen": 549453824 }, { "epoch": 0.17, "learning_rate": 0.0008418391911410688, "loss": 0.0825, "theoretical_loss": 3.9038766566849263, "tokens_seen": 549715968 }, { "epoch": 0.17, "learning_rate": 0.0008417589471994864, "loss": 0.0823, "theoretical_loss": 3.903680901602254, "tokens_seen": 549978112 }, { "epoch": 0.17, "learning_rate": 0.0008416787032579041, "loss": 0.0811, "theoretical_loss": 3.9034852659142913, "tokens_seen": 550240256 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.0010044012451544404, "objective/train/docs_used": 206347, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6453677415847778, "objective/train/original_loss": 1.6453678607940674, "objective/train/theoretical_loss": 3.9032897494913876, "objective/train/tokens_used": 570962400, "objective/train/value_avg": -0.0096435546875, "objective/train/value_loss": 0.0002447115839459002, "objective/train/value_max": -0.00025916099548339844, "objective/train/value_min": -0.59619140625, "objective/train/value_reward_corr": 0.7088037903523504, "objective/train/value_std": 0.01555633544921875, "objective/train/weight_avg": 1.0011190176010132, "objective/train/weighted_lm_loss": 1.6466437578201294, "objective/train/weights_max": 1.6212894916534424, "objective/train/weights_min": 0.3786143362522125, "theoretical_loss": 3.9032897494913876, "tokens_seen": 550502400 }, { "epoch": 0.17, "learning_rate": 0.0008415984593163216, "loss": 0.0808, "theoretical_loss": 3.9032897494913876, "tokens_seen": 550502400 }, { "epoch": 0.17, "learning_rate": 0.0008415182153747393, "loss": 0.0809, "theoretical_loss": 3.9030943522040946, "tokens_seen": 550764544 }, { "epoch": 0.17, "learning_rate": 0.0008414379714331568, "loss": 0.0847, "theoretical_loss": 3.902899073923166, "tokens_seen": 551026688 }, { "epoch": 0.17, "learning_rate": 0.0008413577274915744, "loss": 0.0825, "theoretical_loss": 3.902703914519557, "tokens_seen": 551288832 }, { "epoch": 0.17, "learning_rate": 0.000841277483549992, "loss": 0.0825, "theoretical_loss": 3.9025088738644236, "tokens_seen": 551550976 }, { "epoch": 0.17, "learning_rate": 0.0008411972396084095, "loss": 0.0796, "theoretical_loss": 3.9023139518291243, "tokens_seen": 551813120 }, { "epoch": 0.17, "learning_rate": 0.0008411169956668272, "loss": 0.0806, "theoretical_loss": 3.902119148285216, "tokens_seen": 552075264 }, { "epoch": 0.17, "learning_rate": 0.0008410367517252447, "loss": 0.0824, "theoretical_loss": 3.9019244631044563, "tokens_seen": 552337408 }, { "epoch": 0.17, "learning_rate": 0.0008409565077836624, "loss": 0.0798, "theoretical_loss": 3.9017298961588027, "tokens_seen": 552599552 }, { "epoch": 0.17, "learning_rate": 0.00084087626384208, "loss": 0.0815, "theoretical_loss": 3.901535447320412, "tokens_seen": 552861696 }, { "epoch": 0.17, "learning_rate": 0.0008407960199004975, "loss": 0.0809, "theoretical_loss": 3.901341116461639, "tokens_seen": 553123840 }, { "epoch": 0.17, "learning_rate": 0.0008407157759589151, "loss": 0.0804, "theoretical_loss": 3.9011469034550372, "tokens_seen": 553385984 }, { "epoch": 0.17, "learning_rate": 0.0008406355320173327, "loss": 0.0816, "theoretical_loss": 3.900952808173358, "tokens_seen": 553648128 }, { "epoch": 0.17, "learning_rate": 0.0008405552880757503, "loss": 0.0834, "theoretical_loss": 3.900758830489551, "tokens_seen": 553910272 }, { "epoch": 0.17, "learning_rate": 0.0008404750441341678, "loss": 0.0827, "theoretical_loss": 3.900564970276762, "tokens_seen": 554172416 }, { "epoch": 0.17, "learning_rate": 0.0008403948001925855, "loss": 0.0809, "theoretical_loss": 3.9003712274083346, "tokens_seen": 554434560 }, { "epoch": 0.17, "learning_rate": 0.000840314556251003, "loss": 0.0822, "theoretical_loss": 3.9001776017578074, "tokens_seen": 554696704 }, { "epoch": 0.17, "learning_rate": 0.0008402343123094207, "loss": 0.0812, "theoretical_loss": 3.899984093198916, "tokens_seen": 554958848 }, { "epoch": 0.17, "learning_rate": 0.0008401540683678383, "loss": 0.0798, "theoretical_loss": 3.899790701605592, "tokens_seen": 555220992 }, { "epoch": 0.17, "learning_rate": 0.0008400738244262558, "loss": 0.0807, "theoretical_loss": 3.899597426851961, "tokens_seen": 555483136 }, { "epoch": 0.17, "learning_rate": 0.0008399935804846735, "loss": 0.078, "theoretical_loss": 3.899404268812343, "tokens_seen": 555745280 }, { "epoch": 0.17, "learning_rate": 0.000839913336543091, "loss": 0.0803, "theoretical_loss": 3.8992112273612545, "tokens_seen": 556007424 }, { "epoch": 0.17, "learning_rate": 0.0008398330926015086, "loss": 0.0821, "theoretical_loss": 3.8990183023734044, "tokens_seen": 556269568 }, { "epoch": 0.17, "learning_rate": 0.0008397528486599262, "loss": 0.0813, "theoretical_loss": 3.8988254937236952, "tokens_seen": 556531712 }, { "epoch": 0.17, "learning_rate": 0.0008396726047183438, "loss": 0.0815, "theoretical_loss": 3.8986328012872233, "tokens_seen": 556793856 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.002579332096502185, "objective/train/docs_used": 208740, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5783114433288574, "objective/train/original_loss": 1.5783112049102783, "objective/train/theoretical_loss": 3.8984402249392778, "objective/train/tokens_used": 577516000, "objective/train/value_avg": -0.00783538818359375, "objective/train/value_loss": 0.0001847637031460181, "objective/train/value_max": -0.0002868175506591797, "objective/train/value_min": -0.235107421875, "objective/train/value_reward_corr": 0.47073112766333935, "objective/train/value_std": 0.00836944580078125, "objective/train/weight_avg": 1.00266432762146, "objective/train/weighted_lm_loss": 1.5828243494033813, "objective/train/weights_max": 1.1927765607833862, "objective/train/weights_min": 0.36875197291374207, "theoretical_loss": 3.8984402249392778, "tokens_seen": 557056000 }, { "epoch": 0.17, "learning_rate": 0.0008395923607767613, "loss": 0.079, "theoretical_loss": 3.8984402249392778, "tokens_seen": 557056000 }, { "epoch": 0.17, "learning_rate": 0.000839512116835179, "loss": 0.0781, "theoretical_loss": 3.8982477645553395, "tokens_seen": 557318144 }, { "epoch": 0.17, "learning_rate": 0.0008394318728935966, "loss": 0.0796, "theoretical_loss": 3.898055420011082, "tokens_seen": 557580288 }, { "epoch": 0.17, "learning_rate": 0.0008393516289520141, "loss": 0.0779, "theoretical_loss": 3.8978631911823705, "tokens_seen": 557842432 }, { "epoch": 0.17, "learning_rate": 0.0008392713850104318, "loss": 0.0799, "theoretical_loss": 3.8976710779452612, "tokens_seen": 558104576 }, { "epoch": 0.17, "learning_rate": 0.0008391911410688493, "loss": 0.0811, "theoretical_loss": 3.8974790801760015, "tokens_seen": 558366720 }, { "epoch": 0.17, "learning_rate": 0.0008391108971272669, "loss": 0.0834, "theoretical_loss": 3.897287197751029, "tokens_seen": 558628864 }, { "epoch": 0.17, "learning_rate": 0.0008390306531856845, "loss": 0.0828, "theoretical_loss": 3.897095430546971, "tokens_seen": 558891008 }, { "epoch": 0.17, "learning_rate": 0.000838950409244102, "loss": 0.083, "theoretical_loss": 3.896903778440646, "tokens_seen": 559153152 }, { "epoch": 0.17, "learning_rate": 0.0008388701653025197, "loss": 0.0822, "theoretical_loss": 3.896712241309061, "tokens_seen": 559415296 }, { "epoch": 0.17, "learning_rate": 0.0008387899213609372, "loss": 0.083, "theoretical_loss": 3.896520819029411, "tokens_seen": 559677440 }, { "epoch": 0.17, "learning_rate": 0.0008387096774193549, "loss": 0.0816, "theoretical_loss": 3.896329511479082, "tokens_seen": 559939584 }, { "epoch": 0.17, "learning_rate": 0.0008386294334777725, "loss": 0.0789, "theoretical_loss": 3.8961383185356455, "tokens_seen": 560201728 }, { "epoch": 0.17, "learning_rate": 0.0008385491895361901, "loss": 0.0789, "theoretical_loss": 3.895947240076862, "tokens_seen": 560463872 }, { "epoch": 0.17, "learning_rate": 0.0008384689455946076, "loss": 0.0782, "theoretical_loss": 3.895756275980681, "tokens_seen": 560726016 }, { "epoch": 0.17, "learning_rate": 0.0008383887016530252, "loss": 0.0817, "theoretical_loss": 3.895565426125237, "tokens_seen": 560988160 }, { "epoch": 0.17, "learning_rate": 0.0008383084577114428, "loss": 0.0809, "theoretical_loss": 3.8953746903888513, "tokens_seen": 561250304 }, { "epoch": 0.17, "learning_rate": 0.0008382282137698603, "loss": 0.0785, "theoretical_loss": 3.895184068650033, "tokens_seen": 561512448 }, { "epoch": 0.17, "learning_rate": 0.000838147969828278, "loss": 0.0814, "theoretical_loss": 3.8949935607874764, "tokens_seen": 561774592 }, { "epoch": 0.17, "learning_rate": 0.0008380677258866955, "loss": 0.0803, "theoretical_loss": 3.8948031666800613, "tokens_seen": 562036736 }, { "epoch": 0.17, "learning_rate": 0.0008379874819451132, "loss": 0.0835, "theoretical_loss": 3.8946128862068528, "tokens_seen": 562298880 }, { "epoch": 0.17, "learning_rate": 0.0008379072380035308, "loss": 0.0802, "theoretical_loss": 3.8944227192471006, "tokens_seen": 562561024 }, { "epoch": 0.17, "learning_rate": 0.0008378269940619483, "loss": 0.0818, "theoretical_loss": 3.8942326656802395, "tokens_seen": 562823168 }, { "epoch": 0.17, "learning_rate": 0.000837746750120366, "loss": 0.0825, "theoretical_loss": 3.894042725385888, "tokens_seen": 563085312 }, { "epoch": 0.17, "learning_rate": 0.0008376665061787835, "loss": 0.0834, "theoretical_loss": 3.893852898243849, "tokens_seen": 563347456 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.0005252464907243848, "objective/train/docs_used": 211161, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6151103973388672, "objective/train/original_loss": 1.6151103973388672, "objective/train/theoretical_loss": 3.8936631841341076, "objective/train/tokens_used": 584069600, "objective/train/value_avg": -0.0084381103515625, "objective/train/value_loss": 0.00027146536740474403, "objective/train/value_max": -0.0002613067626953125, "objective/train/value_min": -0.5791015625, "objective/train/value_reward_corr": 0.630790701887311, "objective/train/value_std": 0.01227569580078125, "objective/train/weight_avg": 1.0006484985351562, "objective/train/weighted_lm_loss": 1.6155797243118286, "objective/train/weights_max": 1.255659818649292, "objective/train/weights_min": 0.3694954514503479, "theoretical_loss": 3.8936631841341076, "tokens_seen": 563609600 }, { "epoch": 0.17, "learning_rate": 0.0008375862622372011, "loss": 0.0798, "theoretical_loss": 3.8936631841341076, "tokens_seen": 563609600 }, { "epoch": 0.17, "learning_rate": 0.0008375060182956187, "loss": 0.0812, "theoretical_loss": 3.893473582936833, "tokens_seen": 563871744 }, { "epoch": 0.17, "learning_rate": 0.0008374257743540363, "loss": 0.0826, "theoretical_loss": 3.8932840945323774, "tokens_seen": 564133888 }, { "epoch": 0.17, "learning_rate": 0.0008373455304124538, "loss": 0.0806, "theoretical_loss": 3.8930947188012737, "tokens_seen": 564396032 }, { "epoch": 0.17, "learning_rate": 0.0008372652864708715, "loss": 0.0783, "theoretical_loss": 3.8929054556242377, "tokens_seen": 564658176 }, { "epoch": 0.17, "learning_rate": 0.0008371850425292891, "loss": 0.0787, "theoretical_loss": 3.892716304882167, "tokens_seen": 564920320 }, { "epoch": 0.17, "learning_rate": 0.0008371047985877066, "loss": 0.0811, "theoretical_loss": 3.892527266456141, "tokens_seen": 565182464 }, { "epoch": 0.17, "learning_rate": 0.0008370245546461243, "loss": 0.0828, "theoretical_loss": 3.8923383402274174, "tokens_seen": 565444608 }, { "epoch": 0.17, "learning_rate": 0.0008369443107045418, "loss": 0.0819, "theoretical_loss": 3.8921495260774375, "tokens_seen": 565706752 }, { "epoch": 0.17, "learning_rate": 0.0008368640667629594, "loss": 0.0809, "theoretical_loss": 3.8919608238878216, "tokens_seen": 565968896 }, { "epoch": 0.17, "learning_rate": 0.000836783822821377, "loss": 0.077, "theoretical_loss": 3.891772233540369, "tokens_seen": 566231040 }, { "epoch": 0.17, "learning_rate": 0.0008367035788797946, "loss": 0.0798, "theoretical_loss": 3.8915837549170584, "tokens_seen": 566493184 }, { "epoch": 0.17, "learning_rate": 0.0008366233349382121, "loss": 0.079, "theoretical_loss": 3.89139538790005, "tokens_seen": 566755328 }, { "epoch": 0.17, "learning_rate": 0.0008365430909966297, "loss": 0.0827, "theoretical_loss": 3.8912071323716795, "tokens_seen": 567017472 }, { "epoch": 0.17, "learning_rate": 0.0008364628470550474, "loss": 0.0811, "theoretical_loss": 3.8910189882144626, "tokens_seen": 567279616 }, { "epoch": 0.17, "learning_rate": 0.000836382603113465, "loss": 0.0831, "theoretical_loss": 3.8908309553110936, "tokens_seen": 567541760 }, { "epoch": 0.17, "learning_rate": 0.0008363023591718826, "loss": 0.0802, "theoretical_loss": 3.8906430335444426, "tokens_seen": 567803904 }, { "epoch": 0.17, "learning_rate": 0.0008362221152303001, "loss": 0.0813, "theoretical_loss": 3.8904552227975593, "tokens_seen": 568066048 }, { "epoch": 0.17, "learning_rate": 0.0008361418712887178, "loss": 0.082, "theoretical_loss": 3.8902675229536685, "tokens_seen": 568328192 }, { "epoch": 0.17, "learning_rate": 0.0008360616273471353, "loss": 0.0823, "theoretical_loss": 3.8900799338961725, "tokens_seen": 568590336 }, { "epoch": 0.17, "learning_rate": 0.0008359813834055528, "loss": 0.0832, "theoretical_loss": 3.8898924555086496, "tokens_seen": 568852480 }, { "epoch": 0.17, "learning_rate": 0.0008359011394639705, "loss": 0.0809, "theoretical_loss": 3.8897050876748542, "tokens_seen": 569114624 }, { "epoch": 0.17, "learning_rate": 0.000835820895522388, "loss": 0.0821, "theoretical_loss": 3.8895178302787166, "tokens_seen": 569376768 }, { "epoch": 0.17, "learning_rate": 0.0008357406515808057, "loss": 0.0814, "theoretical_loss": 3.8893306832043404, "tokens_seen": 569638912 }, { "epoch": 0.17, "learning_rate": 0.0008356604076392233, "loss": 0.0833, "theoretical_loss": 3.8891436463360076, "tokens_seen": 569901056 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.0007684892625547945, "objective/train/docs_used": 213119, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.636327862739563, "objective/train/original_loss": 1.6363275051116943, "objective/train/theoretical_loss": 3.8889567195581716, "objective/train/tokens_used": 590623200, "objective/train/value_avg": -0.00799560546875, "objective/train/value_loss": 0.00021641695639118552, "objective/train/value_max": -0.00025916099548339844, "objective/train/value_min": -0.65771484375, "objective/train/value_reward_corr": 0.6434202611001032, "objective/train/value_std": 0.0118408203125, "objective/train/weight_avg": 1.000868797302246, "objective/train/weighted_lm_loss": 1.6379215717315674, "objective/train/weights_max": 1.409537672996521, "objective/train/weights_min": 0.36900386214256287, "theoretical_loss": 3.8889567195581716, "tokens_seen": 570163200 }, { "epoch": 0.17, "learning_rate": 0.0008355801636976409, "loss": 0.0818, "theoretical_loss": 3.8889567195581716, "tokens_seen": 570163200 }, { "epoch": 0.17, "learning_rate": 0.0008354999197560584, "loss": 0.0787, "theoretical_loss": 3.8887699027554614, "tokens_seen": 570425344 }, { "epoch": 0.17, "learning_rate": 0.000835419675814476, "loss": 0.0802, "theoretical_loss": 3.8885831958126786, "tokens_seen": 570687488 }, { "epoch": 0.17, "learning_rate": 0.0008353394318728936, "loss": 0.0823, "theoretical_loss": 3.8883965986148015, "tokens_seen": 570949632 }, { "epoch": 0.17, "learning_rate": 0.0008352591879313111, "loss": 0.079, "theoretical_loss": 3.888210111046978, "tokens_seen": 571211776 }, { "epoch": 0.17, "learning_rate": 0.0008351789439897288, "loss": 0.0822, "theoretical_loss": 3.8880237329945295, "tokens_seen": 571473920 }, { "epoch": 0.17, "learning_rate": 0.0008350987000481463, "loss": 0.0836, "theoretical_loss": 3.887837464342952, "tokens_seen": 571736064 }, { "epoch": 0.17, "learning_rate": 0.0008350184561065641, "loss": 0.0825, "theoretical_loss": 3.8876513049779113, "tokens_seen": 571998208 }, { "epoch": 0.17, "learning_rate": 0.0008349382121649816, "loss": 0.0826, "theoretical_loss": 3.887465254785246, "tokens_seen": 572260352 }, { "epoch": 0.17, "learning_rate": 0.0008348579682233991, "loss": 0.0836, "theoretical_loss": 3.887279313650967, "tokens_seen": 572522496 }, { "epoch": 0.17, "learning_rate": 0.0008347777242818168, "loss": 0.0831, "theoretical_loss": 3.8870934814612546, "tokens_seen": 572784640 }, { "epoch": 0.17, "learning_rate": 0.0008346974803402343, "loss": 0.0782, "theoretical_loss": 3.886907758102461, "tokens_seen": 573046784 }, { "epoch": 0.17, "learning_rate": 0.0008346172363986519, "loss": 0.0829, "theoretical_loss": 3.8867221434611094, "tokens_seen": 573308928 }, { "epoch": 0.17, "learning_rate": 0.0008345369924570695, "loss": 0.0817, "theoretical_loss": 3.8865366374238914, "tokens_seen": 573571072 }, { "epoch": 0.17, "learning_rate": 0.0008344567485154871, "loss": 0.0816, "theoretical_loss": 3.88635123987767, "tokens_seen": 573833216 }, { "epoch": 0.17, "learning_rate": 0.0008343765045739046, "loss": 0.0823, "theoretical_loss": 3.8861659507094766, "tokens_seen": 574095360 }, { "epoch": 0.17, "learning_rate": 0.0008342962606323223, "loss": 0.0813, "theoretical_loss": 3.885980769806513, "tokens_seen": 574357504 }, { "epoch": 0.17, "learning_rate": 0.0008342160166907399, "loss": 0.0844, "theoretical_loss": 3.8857956970561487, "tokens_seen": 574619648 }, { "epoch": 0.17, "learning_rate": 0.0008341357727491574, "loss": 0.0785, "theoretical_loss": 3.8856107323459215, "tokens_seen": 574881792 }, { "epoch": 0.17, "learning_rate": 0.0008340555288075751, "loss": 0.0819, "theoretical_loss": 3.8854258755635387, "tokens_seen": 575143936 }, { "epoch": 0.17, "learning_rate": 0.0008339752848659926, "loss": 0.0829, "theoretical_loss": 3.885241126596874, "tokens_seen": 575406080 }, { "epoch": 0.17, "learning_rate": 0.0008338950409244103, "loss": 0.0799, "theoretical_loss": 3.885056485333969, "tokens_seen": 575668224 }, { "epoch": 0.17, "learning_rate": 0.0008338147969828278, "loss": 0.0828, "theoretical_loss": 3.884871951663034, "tokens_seen": 575930368 }, { "epoch": 0.17, "learning_rate": 0.0008337345530412454, "loss": 0.0778, "theoretical_loss": 3.8846875254724442, "tokens_seen": 576192512 }, { "epoch": 0.17, "learning_rate": 0.000833654309099663, "loss": 0.0797, "theoretical_loss": 3.8845032066507414, "tokens_seen": 576454656 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.0010946927359327674, "objective/train/docs_used": 215374, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6169906854629517, "objective/train/original_loss": 1.6169908046722412, "objective/train/theoretical_loss": 3.884318995086635, "objective/train/tokens_used": 597176800, "objective/train/value_avg": -0.0078277587890625, "objective/train/value_loss": 0.0002228701050626114, "objective/train/value_max": -0.00022172927856445312, "objective/train/value_min": -0.49267578125, "objective/train/value_reward_corr": 0.7077948649588294, "objective/train/value_std": 0.01322174072265625, "objective/train/weight_avg": 1.0012009143829346, "objective/train/weighted_lm_loss": 1.6180263757705688, "objective/train/weights_max": 1.1417571306228638, "objective/train/weights_min": 0.3976996839046478, "theoretical_loss": 3.884318995086635, "tokens_seen": 576716800 }, { "epoch": 0.17, "learning_rate": 0.0008335740651580805, "loss": 0.0791, "theoretical_loss": 3.884318995086635, "tokens_seen": 576716800 }, { "epoch": 0.17, "learning_rate": 0.0008334938212164982, "loss": 0.0814, "theoretical_loss": 3.8841348906689985, "tokens_seen": 576978944 }, { "epoch": 0.17, "learning_rate": 0.0008334135772749158, "loss": 0.0828, "theoretical_loss": 3.8839508932868725, "tokens_seen": 577241088 }, { "epoch": 0.18, "learning_rate": 0.0008333333333333334, "loss": 0.0803, "theoretical_loss": 3.8837670028294626, "tokens_seen": 577503232 }, { "epoch": 0.18, "learning_rate": 0.0008332530893917509, "loss": 0.0822, "theoretical_loss": 3.883583219186138, "tokens_seen": 577765376 }, { "epoch": 0.18, "learning_rate": 0.0008331728454501686, "loss": 0.082, "theoretical_loss": 3.8833995422464342, "tokens_seen": 578027520 }, { "epoch": 0.18, "learning_rate": 0.0008330926015085861, "loss": 0.0818, "theoretical_loss": 3.88321597190005, "tokens_seen": 578289664 }, { "epoch": 0.18, "learning_rate": 0.0008330123575670036, "loss": 0.0822, "theoretical_loss": 3.883032508036848, "tokens_seen": 578551808 }, { "epoch": 0.18, "learning_rate": 0.0008329321136254213, "loss": 0.0793, "theoretical_loss": 3.882849150546856, "tokens_seen": 578813952 }, { "epoch": 0.18, "learning_rate": 0.0008328518696838388, "loss": 0.0829, "theoretical_loss": 3.8826658993202625, "tokens_seen": 579076096 }, { "epoch": 0.18, "learning_rate": 0.0008327716257422565, "loss": 0.0814, "theoretical_loss": 3.8824827542474214, "tokens_seen": 579338240 }, { "epoch": 0.18, "learning_rate": 0.0008326913818006741, "loss": 0.0819, "theoretical_loss": 3.882299715218848, "tokens_seen": 579600384 }, { "epoch": 0.18, "learning_rate": 0.0008326111378590917, "loss": 0.0813, "theoretical_loss": 3.8821167821252196, "tokens_seen": 579862528 }, { "epoch": 0.18, "learning_rate": 0.0008325308939175093, "loss": 0.0828, "theoretical_loss": 3.8819339548573772, "tokens_seen": 580124672 }, { "epoch": 0.18, "learning_rate": 0.0008324506499759268, "loss": 0.0815, "theoretical_loss": 3.881751233306322, "tokens_seen": 580386816 }, { "epoch": 0.18, "learning_rate": 0.0008323704060343444, "loss": 0.0814, "theoretical_loss": 3.881568617363218, "tokens_seen": 580648960 }, { "epoch": 0.18, "learning_rate": 0.000832290162092762, "loss": 0.0824, "theoretical_loss": 3.881386106919389, "tokens_seen": 580911104 }, { "epoch": 0.18, "learning_rate": 0.0008322099181511796, "loss": 0.0826, "theoretical_loss": 3.88120370186632, "tokens_seen": 581173248 }, { "epoch": 0.18, "learning_rate": 0.0008321296742095971, "loss": 0.0802, "theoretical_loss": 3.881021402095657, "tokens_seen": 581435392 }, { "epoch": 0.18, "learning_rate": 0.0008320494302680149, "loss": 0.0807, "theoretical_loss": 3.880839207499205, "tokens_seen": 581697536 }, { "epoch": 0.18, "learning_rate": 0.0008319691863264324, "loss": 0.082, "theoretical_loss": 3.880657117968931, "tokens_seen": 581959680 }, { "epoch": 0.18, "learning_rate": 0.0008318889423848499, "loss": 0.0844, "theoretical_loss": 3.880475133396959, "tokens_seen": 582221824 }, { "epoch": 0.18, "learning_rate": 0.0008318086984432676, "loss": 0.0812, "theoretical_loss": 3.8802932536755748, "tokens_seen": 582483968 }, { "epoch": 0.18, "learning_rate": 0.0008317284545016851, "loss": 0.0833, "theoretical_loss": 3.880111478697221, "tokens_seen": 582746112 }, { "epoch": 0.18, "learning_rate": 0.0008316482105601027, "loss": 0.085, "theoretical_loss": 3.8799298083545004, "tokens_seen": 583008256 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.0015177363529801369, "objective/train/docs_used": 217803, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.666736364364624, "objective/train/original_loss": 1.6667362451553345, "objective/train/theoretical_loss": 3.879748242540173, "objective/train/tokens_used": 603730400, "objective/train/value_avg": -0.011322021484375, "objective/train/value_loss": 0.0003124627110082656, "objective/train/value_max": -0.00027370452880859375, "objective/train/value_min": -0.642578125, "objective/train/value_reward_corr": 0.6764778613382025, "objective/train/value_std": 0.01788330078125, "objective/train/weight_avg": 1.0016603469848633, "objective/train/weighted_lm_loss": 1.668639898300171, "objective/train/weights_max": 1.3257246017456055, "objective/train/weights_min": 0.36870554089546204, "theoretical_loss": 3.879748242540173, "tokens_seen": 583270400 }, { "epoch": 0.18, "learning_rate": 0.0008315679666185203, "loss": 0.0814, "theoretical_loss": 3.879748242540173, "tokens_seen": 583270400 }, { "epoch": 0.18, "learning_rate": 0.0008314877226769379, "loss": 0.0839, "theoretical_loss": 3.8795667811471573, "tokens_seen": 583532544 }, { "epoch": 0.18, "learning_rate": 0.0008314074787353555, "loss": 0.082, "theoretical_loss": 3.8793854240685306, "tokens_seen": 583794688 }, { "epoch": 0.18, "learning_rate": 0.000831327234793773, "loss": 0.0817, "theoretical_loss": 3.879204171197525, "tokens_seen": 584056832 }, { "epoch": 0.18, "learning_rate": 0.0008312469908521907, "loss": 0.081, "theoretical_loss": 3.879023022427533, "tokens_seen": 584318976 }, { "epoch": 0.18, "learning_rate": 0.0008311667469106083, "loss": 0.0814, "theoretical_loss": 3.878841977652101, "tokens_seen": 584581120 }, { "epoch": 0.18, "learning_rate": 0.0008310865029690259, "loss": 0.0815, "theoretical_loss": 3.8786610367649343, "tokens_seen": 584843264 }, { "epoch": 0.18, "learning_rate": 0.0008310062590274434, "loss": 0.0826, "theoretical_loss": 3.8784801996598928, "tokens_seen": 585105408 }, { "epoch": 0.18, "learning_rate": 0.0008309260150858611, "loss": 0.0819, "theoretical_loss": 3.878299466230992, "tokens_seen": 585367552 }, { "epoch": 0.18, "learning_rate": 0.0008308457711442786, "loss": 0.0808, "theoretical_loss": 3.8781188363724057, "tokens_seen": 585629696 }, { "epoch": 0.18, "learning_rate": 0.0008307655272026961, "loss": 0.0807, "theoretical_loss": 3.87793830997846, "tokens_seen": 585891840 }, { "epoch": 0.18, "learning_rate": 0.0008306852832611138, "loss": 0.0803, "theoretical_loss": 3.8777578869436384, "tokens_seen": 586153984 }, { "epoch": 0.18, "learning_rate": 0.0008306050393195313, "loss": 0.0817, "theoretical_loss": 3.8775775671625765, "tokens_seen": 586416128 }, { "epoch": 0.18, "learning_rate": 0.000830524795377949, "loss": 0.082, "theoretical_loss": 3.8773973505300674, "tokens_seen": 586678272 }, { "epoch": 0.18, "learning_rate": 0.0008304445514363666, "loss": 0.08, "theoretical_loss": 3.877217236941055, "tokens_seen": 586940416 }, { "epoch": 0.18, "learning_rate": 0.0008303643074947842, "loss": 0.0833, "theoretical_loss": 3.877037226290641, "tokens_seen": 587202560 }, { "epoch": 0.18, "learning_rate": 0.0008302840635532017, "loss": 0.0798, "theoretical_loss": 3.8768573184740767, "tokens_seen": 587464704 }, { "epoch": 0.18, "learning_rate": 0.0008302038196116194, "loss": 0.0804, "theoretical_loss": 3.87667751338677, "tokens_seen": 587726848 }, { "epoch": 0.18, "learning_rate": 0.0008301235756700369, "loss": 0.0799, "theoretical_loss": 3.8764978109242794, "tokens_seen": 587988992 }, { "epoch": 0.18, "learning_rate": 0.0008300433317284545, "loss": 0.0819, "theoretical_loss": 3.8763182109823173, "tokens_seen": 588251136 }, { "epoch": 0.18, "learning_rate": 0.0008299630877868721, "loss": 0.0761, "theoretical_loss": 3.8761387134567475, "tokens_seen": 588513280 }, { "epoch": 0.18, "learning_rate": 0.0008298828438452896, "loss": 0.081, "theoretical_loss": 3.8759593182435874, "tokens_seen": 588775424 }, { "epoch": 0.18, "learning_rate": 0.0008298025999037074, "loss": 0.0844, "theoretical_loss": 3.875780025239005, "tokens_seen": 589037568 }, { "epoch": 0.18, "learning_rate": 0.0008297223559621249, "loss": 0.0793, "theoretical_loss": 3.8756008343393202, "tokens_seen": 589299712 }, { "epoch": 0.18, "learning_rate": 0.0008296421120205425, "loss": 0.0832, "theoretical_loss": 3.8754217454410043, "tokens_seen": 589561856 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.000526767922565341, "objective/train/docs_used": 220165, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7038625478744507, "objective/train/original_loss": 1.7038625478744507, "objective/train/theoretical_loss": 3.875242758440679, "objective/train/tokens_used": 610284000, "objective/train/value_avg": -0.00890350341796875, "objective/train/value_loss": 0.0003305371792521328, "objective/train/value_max": -0.0002378225326538086, "objective/train/value_min": -0.6884765625, "objective/train/value_reward_corr": 0.6204838794811066, "objective/train/value_std": 0.0142974853515625, "objective/train/weight_avg": 1.000672459602356, "objective/train/weighted_lm_loss": 1.7044023275375366, "objective/train/weights_max": 1.3236627578735352, "objective/train/weights_min": 0.3694390654563904, "theoretical_loss": 3.875242758440679, "tokens_seen": 589824000 }, { "epoch": 0.18, "learning_rate": 0.0008295618680789601, "loss": 0.0818, "theoretical_loss": 3.875242758440679, "tokens_seen": 589824000 }, { "epoch": 0.18, "learning_rate": 0.0008294816241373776, "loss": 0.0807, "theoretical_loss": 3.875063873235117, "tokens_seen": 590086144 }, { "epoch": 0.18, "learning_rate": 0.0008294013801957952, "loss": 0.085, "theoretical_loss": 3.874885089721242, "tokens_seen": 590348288 }, { "epoch": 0.18, "learning_rate": 0.0008293211362542128, "loss": 0.0778, "theoretical_loss": 3.8747064077961264, "tokens_seen": 590610432 }, { "epoch": 0.18, "learning_rate": 0.0008292408923126304, "loss": 0.0787, "theoretical_loss": 3.874527827356994, "tokens_seen": 590872576 }, { "epoch": 0.18, "learning_rate": 0.0008291606483710479, "loss": 0.0828, "theoretical_loss": 3.8743493483012172, "tokens_seen": 591134720 }, { "epoch": 0.18, "learning_rate": 0.0008290804044294657, "loss": 0.0803, "theoretical_loss": 3.874170970526317, "tokens_seen": 591396864 }, { "epoch": 0.18, "learning_rate": 0.0008290001604878832, "loss": 0.0824, "theoretical_loss": 3.873992693929965, "tokens_seen": 591659008 }, { "epoch": 0.18, "learning_rate": 0.0008289199165463007, "loss": 0.0781, "theoretical_loss": 3.8738145184099797, "tokens_seen": 591921152 }, { "epoch": 0.18, "learning_rate": 0.0008288396726047184, "loss": 0.0793, "theoretical_loss": 3.8736364438643296, "tokens_seen": 592183296 }, { "epoch": 0.18, "learning_rate": 0.0008287594286631359, "loss": 0.0831, "theoretical_loss": 3.87345847019113, "tokens_seen": 592445440 }, { "epoch": 0.18, "learning_rate": 0.0008286791847215536, "loss": 0.0787, "theoretical_loss": 3.8732805972886446, "tokens_seen": 592707584 }, { "epoch": 0.18, "learning_rate": 0.0008285989407799711, "loss": 0.0791, "theoretical_loss": 3.873102825055285, "tokens_seen": 592969728 }, { "epoch": 0.18, "learning_rate": 0.0008285186968383887, "loss": 0.0793, "theoretical_loss": 3.87292515338961, "tokens_seen": 593231872 }, { "epoch": 0.18, "learning_rate": 0.0008284384528968063, "loss": 0.08, "theoretical_loss": 3.872747582190324, "tokens_seen": 593494016 }, { "epoch": 0.18, "learning_rate": 0.0008283582089552239, "loss": 0.0813, "theoretical_loss": 3.8725701113562794, "tokens_seen": 593756160 }, { "epoch": 0.18, "learning_rate": 0.0008282779650136415, "loss": 0.0791, "theoretical_loss": 3.8723927407864758, "tokens_seen": 594018304 }, { "epoch": 0.18, "learning_rate": 0.0008281977210720591, "loss": 0.0814, "theoretical_loss": 3.8722154703800573, "tokens_seen": 594280448 }, { "epoch": 0.18, "learning_rate": 0.0008281174771304767, "loss": 0.082, "theoretical_loss": 3.8720383000363148, "tokens_seen": 594542592 }, { "epoch": 0.18, "learning_rate": 0.0008280372331888942, "loss": 0.0807, "theoretical_loss": 3.871861229654684, "tokens_seen": 594804736 }, { "epoch": 0.18, "learning_rate": 0.0008279569892473119, "loss": 0.0776, "theoretical_loss": 3.8716842591347476, "tokens_seen": 595066880 }, { "epoch": 0.18, "learning_rate": 0.0008278767453057294, "loss": 0.0818, "theoretical_loss": 3.871507388376231, "tokens_seen": 595329024 }, { "epoch": 0.18, "learning_rate": 0.0008277965013641469, "loss": 0.0792, "theoretical_loss": 3.871330617279006, "tokens_seen": 595591168 }, { "epoch": 0.18, "learning_rate": 0.0008277162574225646, "loss": 0.0812, "theoretical_loss": 3.8711539457430897, "tokens_seen": 595853312 }, { "epoch": 0.18, "learning_rate": 0.0008276360134809821, "loss": 0.0793, "theoretical_loss": 3.87097737366864, "tokens_seen": 596115456 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.0013384453486651182, "objective/train/docs_used": 222574, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5089752674102783, "objective/train/original_loss": 1.5089752674102783, "objective/train/theoretical_loss": 3.870800900955963, "objective/train/tokens_used": 616837600, "objective/train/value_avg": -0.0093841552734375, "objective/train/value_loss": 0.0004224953299853951, "objective/train/value_max": -0.00018525123596191406, "objective/train/value_min": -0.80712890625, "objective/train/value_reward_corr": 0.6443973910376418, "objective/train/value_std": 0.0171661376953125, "objective/train/weight_avg": 1.0015227794647217, "objective/train/weighted_lm_loss": 1.5108550786972046, "objective/train/weights_max": 1.870554804801941, "objective/train/weights_min": 0.3708694577217102, "theoretical_loss": 3.870800900955963, "tokens_seen": 596377600 }, { "epoch": 0.18, "learning_rate": 0.0008275557695393999, "loss": 0.079, "theoretical_loss": 3.870800900955963, "tokens_seen": 596377600 }, { "epoch": 0.18, "learning_rate": 0.0008274755255978174, "loss": 0.0812, "theoretical_loss": 3.8706245275055062, "tokens_seen": 596639744 }, { "epoch": 0.18, "learning_rate": 0.000827395281656235, "loss": 0.0796, "theoretical_loss": 3.8704482532178606, "tokens_seen": 596901888 }, { "epoch": 0.18, "learning_rate": 0.0008273150377146526, "loss": 0.0836, "theoretical_loss": 3.8702720779937607, "tokens_seen": 597164032 }, { "epoch": 0.18, "learning_rate": 0.0008272347937730702, "loss": 0.079, "theoretical_loss": 3.8700960017340833, "tokens_seen": 597426176 }, { "epoch": 0.18, "learning_rate": 0.0008271545498314877, "loss": 0.0797, "theoretical_loss": 3.8699200243398493, "tokens_seen": 597688320 }, { "epoch": 0.18, "learning_rate": 0.0008270743058899053, "loss": 0.0845, "theoretical_loss": 3.8697441457122204, "tokens_seen": 597950464 }, { "epoch": 0.18, "learning_rate": 0.0008269940619483229, "loss": 0.0819, "theoretical_loss": 3.8695683657525013, "tokens_seen": 598212608 }, { "epoch": 0.18, "learning_rate": 0.0008269138180067404, "loss": 0.0823, "theoretical_loss": 3.8693926843621376, "tokens_seen": 598474752 }, { "epoch": 0.18, "learning_rate": 0.0008268335740651582, "loss": 0.0802, "theoretical_loss": 3.8692171014427177, "tokens_seen": 598736896 }, { "epoch": 0.18, "learning_rate": 0.0008267533301235757, "loss": 0.0794, "theoretical_loss": 3.86904161689597, "tokens_seen": 598999040 }, { "epoch": 0.18, "learning_rate": 0.0008266730861819933, "loss": 0.0828, "theoretical_loss": 3.868866230623766, "tokens_seen": 599261184 }, { "epoch": 0.18, "learning_rate": 0.0008265928422404109, "loss": 0.0806, "theoretical_loss": 3.8686909425281146, "tokens_seen": 599523328 }, { "epoch": 0.18, "learning_rate": 0.0008265125982988284, "loss": 0.081, "theoretical_loss": 3.8685157525111684, "tokens_seen": 599785472 }, { "epoch": 0.18, "learning_rate": 0.000826432354357246, "loss": 0.0814, "theoretical_loss": 3.8683406604752184, "tokens_seen": 600047616 }, { "epoch": 0.18, "learning_rate": 0.0008263521104156636, "loss": 0.0827, "theoretical_loss": 3.868165666322696, "tokens_seen": 600309760 }, { "epoch": 0.18, "learning_rate": 0.0008262718664740812, "loss": 0.0826, "theoretical_loss": 3.8679907699561733, "tokens_seen": 600571904 }, { "epoch": 0.18, "learning_rate": 0.0008261916225324988, "loss": 0.0819, "theoretical_loss": 3.86781597127836, "tokens_seen": 600834048 }, { "epoch": 0.18, "learning_rate": 0.0008261113785909165, "loss": 0.0824, "theoretical_loss": 3.867641270192107, "tokens_seen": 601096192 }, { "epoch": 0.18, "learning_rate": 0.000826031134649334, "loss": 0.0831, "theoretical_loss": 3.867466666600402, "tokens_seen": 601358336 }, { "epoch": 0.18, "learning_rate": 0.0008259508907077516, "loss": 0.0815, "theoretical_loss": 3.867292160406373, "tokens_seen": 601620480 }, { "epoch": 0.18, "learning_rate": 0.0008258706467661692, "loss": 0.0796, "theoretical_loss": 3.8671177515132857, "tokens_seen": 601882624 }, { "epoch": 0.18, "learning_rate": 0.0008257904028245867, "loss": 0.0821, "theoretical_loss": 3.866943439824545, "tokens_seen": 602144768 }, { "epoch": 0.18, "learning_rate": 0.0008257101588830044, "loss": 0.0797, "theoretical_loss": 3.8667692252436914, "tokens_seen": 602406912 }, { "epoch": 0.18, "learning_rate": 0.0008256299149414219, "loss": 0.0818, "theoretical_loss": 3.8665951076744056, "tokens_seen": 602669056 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.0010131277376785874, "objective/train/docs_used": 224736, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.561912178993225, "objective/train/original_loss": 1.5619122982025146, "objective/train/theoretical_loss": 3.866421087020504, "objective/train/tokens_used": 623391200, "objective/train/value_avg": -0.006183624267578125, "objective/train/value_loss": 0.00039571512024849653, "objective/train/value_max": -0.00018966197967529297, "objective/train/value_min": -0.95263671875, "objective/train/value_reward_corr": 0.493842246751972, "objective/train/value_std": 0.01300811767578125, "objective/train/weight_avg": 1.0011917352676392, "objective/train/weighted_lm_loss": 1.5638322830200195, "objective/train/weights_max": 2.0086898803710938, "objective/train/weights_min": 0.36912351846694946, "theoretical_loss": 3.866421087020504, "tokens_seen": 602931200 }, { "epoch": 0.18, "learning_rate": 0.0008255496709998395, "loss": 0.0792, "theoretical_loss": 3.866421087020504, "tokens_seen": 602931200 }, { "epoch": 0.18, "learning_rate": 0.0008254694270582571, "loss": 0.0765, "theoretical_loss": 3.8662471631859407, "tokens_seen": 603193344 }, { "epoch": 0.18, "learning_rate": 0.0008253891831166747, "loss": 0.0819, "theoretical_loss": 3.866073336074807, "tokens_seen": 603455488 }, { "epoch": 0.18, "learning_rate": 0.0008253089391750923, "loss": 0.0785, "theoretical_loss": 3.8658996055913297, "tokens_seen": 603717632 }, { "epoch": 0.18, "learning_rate": 0.0008252286952335099, "loss": 0.0833, "theoretical_loss": 3.8657259716398737, "tokens_seen": 603979776 }, { "epoch": 0.18, "learning_rate": 0.0008251484512919275, "loss": 0.0798, "theoretical_loss": 3.8655524341249388, "tokens_seen": 604241920 }, { "epoch": 0.18, "learning_rate": 0.000825068207350345, "loss": 0.0825, "theoretical_loss": 3.865378992951161, "tokens_seen": 604504064 }, { "epoch": 0.18, "learning_rate": 0.0008249879634087627, "loss": 0.0808, "theoretical_loss": 3.865205648023311, "tokens_seen": 604766208 }, { "epoch": 0.18, "learning_rate": 0.0008249077194671802, "loss": 0.0818, "theoretical_loss": 3.8650323992462963, "tokens_seen": 605028352 }, { "epoch": 0.18, "learning_rate": 0.0008248274755255978, "loss": 0.0803, "theoretical_loss": 3.8648592465251586, "tokens_seen": 605290496 }, { "epoch": 0.18, "learning_rate": 0.0008247472315840154, "loss": 0.0813, "theoretical_loss": 3.864686189765075, "tokens_seen": 605552640 }, { "epoch": 0.18, "learning_rate": 0.000824666987642433, "loss": 0.0815, "theoretical_loss": 3.864513228871357, "tokens_seen": 605814784 }, { "epoch": 0.18, "learning_rate": 0.0008245867437008507, "loss": 0.0804, "theoretical_loss": 3.8643403637494504, "tokens_seen": 606076928 }, { "epoch": 0.18, "learning_rate": 0.0008245064997592682, "loss": 0.0794, "theoretical_loss": 3.8641675943049343, "tokens_seen": 606339072 }, { "epoch": 0.18, "learning_rate": 0.0008244262558176858, "loss": 0.0827, "theoretical_loss": 3.863994920443523, "tokens_seen": 606601216 }, { "epoch": 0.18, "learning_rate": 0.0008243460118761034, "loss": 0.0811, "theoretical_loss": 3.8638223420710647, "tokens_seen": 606863360 }, { "epoch": 0.18, "learning_rate": 0.000824265767934521, "loss": 0.0786, "theoretical_loss": 3.863649859093538, "tokens_seen": 607125504 }, { "epoch": 0.18, "learning_rate": 0.0008241855239929385, "loss": 0.0793, "theoretical_loss": 3.863477471417059, "tokens_seen": 607387648 }, { "epoch": 0.18, "learning_rate": 0.0008241052800513561, "loss": 0.0818, "theoretical_loss": 3.8633051789478734, "tokens_seen": 607649792 }, { "epoch": 0.18, "learning_rate": 0.0008240250361097737, "loss": 0.0838, "theoretical_loss": 3.8631329815923605, "tokens_seen": 607911936 }, { "epoch": 0.18, "learning_rate": 0.0008239447921681912, "loss": 0.0787, "theoretical_loss": 3.862960879257032, "tokens_seen": 608174080 }, { "epoch": 0.18, "learning_rate": 0.000823864548226609, "loss": 0.0828, "theoretical_loss": 3.8627888718485313, "tokens_seen": 608436224 }, { "epoch": 0.18, "learning_rate": 0.0008237843042850265, "loss": 0.0792, "theoretical_loss": 3.862616959273635, "tokens_seen": 608698368 }, { "epoch": 0.18, "learning_rate": 0.0008237040603434442, "loss": 0.0799, "theoretical_loss": 3.8624451414392498, "tokens_seen": 608960512 }, { "epoch": 0.18, "learning_rate": 0.0008236238164018617, "loss": 0.0782, "theoretical_loss": 3.8622734182524154, "tokens_seen": 609222656 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.0017172880470752716, "objective/train/docs_used": 227108, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5490598678588867, "objective/train/original_loss": 1.5490598678588867, "objective/train/theoretical_loss": 3.8621017896203007, "objective/train/tokens_used": 629944800, "objective/train/value_avg": -0.007549285888671875, "objective/train/value_loss": 0.00015352350601460785, "objective/train/value_max": -0.0002378225326538086, "objective/train/value_min": -0.423095703125, "objective/train/value_reward_corr": 0.6703752245048074, "objective/train/value_std": 0.011810302734375, "objective/train/weight_avg": 1.0017893314361572, "objective/train/weighted_lm_loss": 1.5520132780075073, "objective/train/weights_max": 1.3398975133895874, "objective/train/weights_min": 0.3707548677921295, "theoretical_loss": 3.8621017896203007, "tokens_seen": 609484800 }, { "epoch": 0.18, "learning_rate": 0.0008235435724602792, "loss": 0.0801, "theoretical_loss": 3.8621017896203007, "tokens_seen": 609484800 }, { "epoch": 0.18, "learning_rate": 0.0008234633285186969, "loss": 0.0791, "theoretical_loss": 3.8619302554502077, "tokens_seen": 609746944 }, { "epoch": 0.18, "learning_rate": 0.0008233830845771144, "loss": 0.0783, "theoretical_loss": 3.8617588156495666, "tokens_seen": 610009088 }, { "epoch": 0.18, "learning_rate": 0.000823302840635532, "loss": 0.0826, "theoretical_loss": 3.861587470125941, "tokens_seen": 610271232 }, { "epoch": 0.19, "learning_rate": 0.0008232225966939496, "loss": 0.0807, "theoretical_loss": 3.8614162187870216, "tokens_seen": 610533376 }, { "epoch": 0.19, "learning_rate": 0.0008231423527523673, "loss": 0.0801, "theoretical_loss": 3.861245061540631, "tokens_seen": 610795520 }, { "epoch": 0.19, "learning_rate": 0.0008230621088107848, "loss": 0.0803, "theoretical_loss": 3.8610739982947218, "tokens_seen": 611057664 }, { "epoch": 0.19, "learning_rate": 0.0008229818648692024, "loss": 0.077, "theoretical_loss": 3.8609030289573747, "tokens_seen": 611319808 }, { "epoch": 0.19, "learning_rate": 0.00082290162092762, "loss": 0.0822, "theoretical_loss": 3.8607321534368007, "tokens_seen": 611581952 }, { "epoch": 0.19, "learning_rate": 0.0008228213769860375, "loss": 0.077, "theoretical_loss": 3.8605613716413396, "tokens_seen": 611844096 }, { "epoch": 0.19, "learning_rate": 0.0008227411330444552, "loss": 0.082, "theoretical_loss": 3.860390683479459, "tokens_seen": 612106240 }, { "epoch": 0.19, "learning_rate": 0.0008226608891028727, "loss": 0.0775, "theoretical_loss": 3.860220088859757, "tokens_seen": 612368384 }, { "epoch": 0.19, "learning_rate": 0.0008225806451612903, "loss": 0.0771, "theoretical_loss": 3.860049587690958, "tokens_seen": 612630528 }, { "epoch": 0.19, "learning_rate": 0.0008225004012197079, "loss": 0.0796, "theoretical_loss": 3.8598791798819154, "tokens_seen": 612892672 }, { "epoch": 0.19, "learning_rate": 0.0008224201572781254, "loss": 0.0767, "theoretical_loss": 3.859708865341611, "tokens_seen": 613154816 }, { "epoch": 0.19, "learning_rate": 0.0008223399133365432, "loss": 0.0815, "theoretical_loss": 3.8595386439791532, "tokens_seen": 613416960 }, { "epoch": 0.19, "learning_rate": 0.0008222596693949607, "loss": 0.0792, "theoretical_loss": 3.859368515703778, "tokens_seen": 613679104 }, { "epoch": 0.19, "learning_rate": 0.0008221794254533783, "loss": 0.0777, "theoretical_loss": 3.859198480424849, "tokens_seen": 613941248 }, { "epoch": 0.19, "learning_rate": 0.0008220991815117959, "loss": 0.0783, "theoretical_loss": 3.859028538051856, "tokens_seen": 614203392 }, { "epoch": 0.19, "learning_rate": 0.0008220189375702135, "loss": 0.0797, "theoretical_loss": 3.858858688494416, "tokens_seen": 614465536 }, { "epoch": 0.19, "learning_rate": 0.000821938693628631, "loss": 0.0789, "theoretical_loss": 3.8586889316622726, "tokens_seen": 614727680 }, { "epoch": 0.19, "learning_rate": 0.0008218584496870486, "loss": 0.0758, "theoretical_loss": 3.8585192674652955, "tokens_seen": 614989824 }, { "epoch": 0.19, "learning_rate": 0.0008217782057454662, "loss": 0.0785, "theoretical_loss": 3.8583496958134793, "tokens_seen": 615251968 }, { "epoch": 0.19, "learning_rate": 0.0008216979618038837, "loss": 0.0766, "theoretical_loss": 3.8581802166169457, "tokens_seen": 615514112 }, { "epoch": 0.19, "learning_rate": 0.0008216177178623015, "loss": 0.0798, "theoretical_loss": 3.8580108297859415, "tokens_seen": 615776256 }, { "epoch": 0.19, "objective/train/advantage_avg": -0.00047024516970850527, "objective/train/docs_used": 229528, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.600695252418518, "objective/train/original_loss": 1.6006953716278076, "objective/train/theoretical_loss": 3.857841535230839, "objective/train/tokens_used": 636498400, "objective/train/value_avg": -0.01091766357421875, "objective/train/value_loss": 0.00041529766167514026, "objective/train/value_max": -0.0002033710479736328, "objective/train/value_min": -0.474853515625, "objective/train/value_reward_corr": 0.8363627909970652, "objective/train/value_std": 0.0230560302734375, "objective/train/weight_avg": 0.9997208714485168, "objective/train/weighted_lm_loss": 1.6006137132644653, "objective/train/weights_max": 1.2145042419433594, "objective/train/weights_min": 0.23296229541301727, "theoretical_loss": 3.857841535230839, "tokens_seen": 616038400 }, { "epoch": 0.19, "learning_rate": 0.000821537473920719, "loss": 0.0775, "theoretical_loss": 3.857841535230839, "tokens_seen": 616038400 }, { "epoch": 0.19, "learning_rate": 0.0008214572299791366, "loss": 0.0789, "theoretical_loss": 3.8576723328621347, "tokens_seen": 616300544 }, { "epoch": 0.19, "learning_rate": 0.0008213769860375542, "loss": 0.0791, "theoretical_loss": 3.8575032225904513, "tokens_seen": 616562688 }, { "epoch": 0.19, "learning_rate": 0.0008212967420959717, "loss": 0.0773, "theoretical_loss": 3.8573342043265346, "tokens_seen": 616824832 }, { "epoch": 0.19, "learning_rate": 0.0008212164981543894, "loss": 0.083, "theoretical_loss": 3.857165277981256, "tokens_seen": 617086976 }, { "epoch": 0.19, "learning_rate": 0.0008211362542128069, "loss": 0.0808, "theoretical_loss": 3.8569964434656105, "tokens_seen": 617349120 }, { "epoch": 0.19, "learning_rate": 0.0008210560102712245, "loss": 0.0807, "theoretical_loss": 3.856827700690718, "tokens_seen": 617611264 }, { "epoch": 0.19, "learning_rate": 0.0008209757663296422, "loss": 0.0794, "theoretical_loss": 3.8566590495678192, "tokens_seen": 617873408 }, { "epoch": 0.19, "learning_rate": 0.0008208955223880598, "loss": 0.0816, "theoretical_loss": 3.856490490008282, "tokens_seen": 618135552 }, { "epoch": 0.19, "learning_rate": 0.0008208152784464773, "loss": 0.0807, "theoretical_loss": 3.856322021923595, "tokens_seen": 618397696 }, { "epoch": 0.19, "learning_rate": 0.000820735034504895, "loss": 0.0798, "theoretical_loss": 3.8561536452253713, "tokens_seen": 618659840 }, { "epoch": 0.19, "learning_rate": 0.0008206547905633125, "loss": 0.0769, "theoretical_loss": 3.855985359825346, "tokens_seen": 618921984 }, { "epoch": 0.19, "learning_rate": 0.00082057454662173, "loss": 0.0824, "theoretical_loss": 3.855817165635377, "tokens_seen": 619184128 }, { "epoch": 0.19, "learning_rate": 0.0008204943026801477, "loss": 0.0818, "theoretical_loss": 3.8556490625674447, "tokens_seen": 619446272 }, { "epoch": 0.19, "learning_rate": 0.0008204140587385652, "loss": 0.0812, "theoretical_loss": 3.855481050533651, "tokens_seen": 619708416 }, { "epoch": 0.19, "learning_rate": 0.0008203338147969828, "loss": 0.0813, "theoretical_loss": 3.8553131294462206, "tokens_seen": 619970560 }, { "epoch": 0.19, "learning_rate": 0.0008202535708554004, "loss": 0.0813, "theoretical_loss": 3.8551452992175, "tokens_seen": 620232704 }, { "epoch": 0.19, "learning_rate": 0.0008201733269138181, "loss": 0.0813, "theoretical_loss": 3.8549775597599556, "tokens_seen": 620494848 }, { "epoch": 0.19, "learning_rate": 0.0008200930829722356, "loss": 0.0786, "theoretical_loss": 3.8548099109861775, "tokens_seen": 620756992 }, { "epoch": 0.19, "learning_rate": 0.0008200128390306532, "loss": 0.0808, "theoretical_loss": 3.8546423528088747, "tokens_seen": 621019136 }, { "epoch": 0.19, "learning_rate": 0.0008199325950890708, "loss": 0.0801, "theoretical_loss": 3.8544748851408777, "tokens_seen": 621281280 }, { "epoch": 0.19, "learning_rate": 0.0008198523511474884, "loss": 0.078, "theoretical_loss": 3.8543075078951388, "tokens_seen": 621543424 }, { "epoch": 0.19, "learning_rate": 0.000819772107205906, "loss": 0.078, "theoretical_loss": 3.8541402209847284, "tokens_seen": 621805568 }, { "epoch": 0.19, "learning_rate": 0.0008196918632643235, "loss": 0.0815, "theoretical_loss": 3.8539730243228387, "tokens_seen": 622067712 }, { "epoch": 0.19, "learning_rate": 0.0008196116193227412, "loss": 0.0774, "theoretical_loss": 3.8538059178227817, "tokens_seen": 622329856 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.00011773208098020405, "objective/train/docs_used": 231894, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6076971292495728, "objective/train/original_loss": 1.6076970100402832, "objective/train/theoretical_loss": 3.8536389013979893, "objective/train/tokens_used": 643052000, "objective/train/value_avg": -0.0093994140625, "objective/train/value_loss": 0.00031335835228674114, "objective/train/value_max": -0.00016868114471435547, "objective/train/value_min": -0.638671875, "objective/train/value_reward_corr": 0.6932073575502529, "objective/train/value_std": 0.01538848876953125, "objective/train/weight_avg": 1.000260829925537, "objective/train/weighted_lm_loss": 1.6074447631835938, "objective/train/weights_max": 1.6733942031860352, "objective/train/weights_min": 0.37257105112075806, "theoretical_loss": 3.8536389013979893, "tokens_seen": 622592000 }, { "epoch": 0.19, "learning_rate": 0.0008195313753811587, "loss": 0.0831, "theoretical_loss": 3.8536389013979893, "tokens_seen": 622592000 }, { "epoch": 0.19, "learning_rate": 0.0008194511314395762, "loss": 0.0791, "theoretical_loss": 3.8534719749620114, "tokens_seen": 622854144 }, { "epoch": 0.19, "learning_rate": 0.000819370887497994, "loss": 0.0785, "theoretical_loss": 3.8533051384285195, "tokens_seen": 623116288 }, { "epoch": 0.19, "learning_rate": 0.0008192906435564115, "loss": 0.0811, "theoretical_loss": 3.853138391711303, "tokens_seen": 623378432 }, { "epoch": 0.19, "learning_rate": 0.0008192103996148291, "loss": 0.079, "theoretical_loss": 3.852971734724269, "tokens_seen": 623640576 }, { "epoch": 0.19, "learning_rate": 0.0008191301556732467, "loss": 0.0785, "theoretical_loss": 3.8528051673814456, "tokens_seen": 623902720 }, { "epoch": 0.19, "learning_rate": 0.0008190499117316643, "loss": 0.0806, "theoretical_loss": 3.8526386895969775, "tokens_seen": 624164864 }, { "epoch": 0.19, "learning_rate": 0.0008189696677900818, "loss": 0.0821, "theoretical_loss": 3.8524723012851294, "tokens_seen": 624427008 }, { "epoch": 0.19, "learning_rate": 0.0008188894238484994, "loss": 0.0807, "theoretical_loss": 3.852306002360282, "tokens_seen": 624689152 }, { "epoch": 0.19, "learning_rate": 0.000818809179906917, "loss": 0.0811, "theoretical_loss": 3.852139792736936, "tokens_seen": 624951296 }, { "epoch": 0.19, "learning_rate": 0.0008187289359653345, "loss": 0.0784, "theoretical_loss": 3.8519736723297067, "tokens_seen": 625213440 }, { "epoch": 0.19, "learning_rate": 0.0008186486920237523, "loss": 0.0789, "theoretical_loss": 3.8518076410533304, "tokens_seen": 625475584 }, { "epoch": 0.19, "learning_rate": 0.0008185684480821698, "loss": 0.0786, "theoretical_loss": 3.8516416988226574, "tokens_seen": 625737728 }, { "epoch": 0.19, "learning_rate": 0.0008184882041405875, "loss": 0.079, "theoretical_loss": 3.851475845552658, "tokens_seen": 625999872 }, { "epoch": 0.19, "learning_rate": 0.000818407960199005, "loss": 0.0818, "theoretical_loss": 3.8513100811584158, "tokens_seen": 626262016 }, { "epoch": 0.19, "learning_rate": 0.0008183277162574225, "loss": 0.0815, "theoretical_loss": 3.851144405555134, "tokens_seen": 626524160 }, { "epoch": 0.19, "learning_rate": 0.0008182474723158402, "loss": 0.0787, "theoretical_loss": 3.85097881865813, "tokens_seen": 626786304 }, { "epoch": 0.19, "learning_rate": 0.0008181672283742577, "loss": 0.0807, "theoretical_loss": 3.850813320382839, "tokens_seen": 627048448 }, { "epoch": 0.19, "learning_rate": 0.0008180869844326753, "loss": 0.0809, "theoretical_loss": 3.8506479106448115, "tokens_seen": 627310592 }, { "epoch": 0.19, "learning_rate": 0.000818006740491093, "loss": 0.0823, "theoretical_loss": 3.8504825893597134, "tokens_seen": 627572736 }, { "epoch": 0.19, "learning_rate": 0.0008179264965495106, "loss": 0.0802, "theoretical_loss": 3.850317356443326, "tokens_seen": 627834880 }, { "epoch": 0.19, "learning_rate": 0.0008178462526079281, "loss": 0.0787, "theoretical_loss": 3.8501522118115465, "tokens_seen": 628097024 }, { "epoch": 0.19, "learning_rate": 0.0008177660086663458, "loss": 0.0824, "theoretical_loss": 3.8499871553803873, "tokens_seen": 628359168 }, { "epoch": 0.19, "learning_rate": 0.0008176857647247633, "loss": 0.0804, "theoretical_loss": 3.8498221870659743, "tokens_seen": 628621312 }, { "epoch": 0.19, "learning_rate": 0.0008176055207831808, "loss": 0.0761, "theoretical_loss": 3.8496573067845503, "tokens_seen": 628883456 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.0010866678785532713, "objective/train/docs_used": 234355, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5178707838058472, "objective/train/original_loss": 1.5178709030151367, "objective/train/theoretical_loss": 3.8494925144524705, "objective/train/tokens_used": 649605600, "objective/train/value_avg": -0.0084991455078125, "objective/train/value_loss": 0.0005936273955740035, "objective/train/value_max": -0.00015115737915039062, "objective/train/value_min": -0.94189453125, "objective/train/value_reward_corr": 0.5752126539490294, "objective/train/value_std": 0.0167694091796875, "objective/train/weight_avg": 1.001339316368103, "objective/train/weighted_lm_loss": 1.5189374685287476, "objective/train/weights_max": 1.8356927633285522, "objective/train/weights_min": 0.36808282136917114, "theoretical_loss": 3.8494925144524705, "tokens_seen": 629145600 }, { "epoch": 0.19, "learning_rate": 0.0008175252768415985, "loss": 0.081, "theoretical_loss": 3.8494925144524705, "tokens_seen": 629145600 }, { "epoch": 0.19, "learning_rate": 0.000817445032900016, "loss": 0.0801, "theoretical_loss": 3.849327809986206, "tokens_seen": 629407744 }, { "epoch": 0.19, "learning_rate": 0.0008173647889584337, "loss": 0.0767, "theoretical_loss": 3.8491631933023407, "tokens_seen": 629669888 }, { "epoch": 0.19, "learning_rate": 0.0008172845450168512, "loss": 0.0814, "theoretical_loss": 3.8489986643175733, "tokens_seen": 629932032 }, { "epoch": 0.19, "learning_rate": 0.0008172043010752689, "loss": 0.08, "theoretical_loss": 3.8488342229487156, "tokens_seen": 630194176 }, { "epoch": 0.19, "learning_rate": 0.0008171240571336865, "loss": 0.0819, "theoretical_loss": 3.8486698691126935, "tokens_seen": 630456320 }, { "epoch": 0.19, "learning_rate": 0.000817043813192104, "loss": 0.0806, "theoretical_loss": 3.8485056027265454, "tokens_seen": 630718464 }, { "epoch": 0.19, "learning_rate": 0.0008169635692505216, "loss": 0.0798, "theoretical_loss": 3.848341423707423, "tokens_seen": 630980608 }, { "epoch": 0.19, "learning_rate": 0.0008168833253089392, "loss": 0.0788, "theoretical_loss": 3.8481773319725914, "tokens_seen": 631242752 }, { "epoch": 0.19, "learning_rate": 0.0008168030813673568, "loss": 0.0829, "theoretical_loss": 3.8480133274394275, "tokens_seen": 631504896 }, { "epoch": 0.19, "learning_rate": 0.0008167228374257743, "loss": 0.076, "theoretical_loss": 3.8478494100254217, "tokens_seen": 631767040 }, { "epoch": 0.19, "learning_rate": 0.000816642593484192, "loss": 0.0823, "theoretical_loss": 3.847685579648176, "tokens_seen": 632029184 }, { "epoch": 0.19, "learning_rate": 0.0008165623495426095, "loss": 0.0821, "theoretical_loss": 3.847521836225404, "tokens_seen": 632291328 }, { "epoch": 0.19, "learning_rate": 0.000816482105601027, "loss": 0.0825, "theoretical_loss": 3.8473581796749317, "tokens_seen": 632553472 }, { "epoch": 0.19, "learning_rate": 0.0008164018616594448, "loss": 0.077, "theoretical_loss": 3.8471946099146983, "tokens_seen": 632815616 }, { "epoch": 0.19, "learning_rate": 0.0008163216177178623, "loss": 0.079, "theoretical_loss": 3.847031126862751, "tokens_seen": 633077760 }, { "epoch": 0.19, "learning_rate": 0.0008162413737762799, "loss": 0.0788, "theoretical_loss": 3.8468677304372507, "tokens_seen": 633339904 }, { "epoch": 0.19, "learning_rate": 0.0008161611298346975, "loss": 0.0777, "theoretical_loss": 3.8467044205564704, "tokens_seen": 633602048 }, { "epoch": 0.19, "learning_rate": 0.0008160808858931151, "loss": 0.0799, "theoretical_loss": 3.846541197138791, "tokens_seen": 633864192 }, { "epoch": 0.19, "learning_rate": 0.0008160006419515327, "loss": 0.0836, "theoretical_loss": 3.8463780601027056, "tokens_seen": 634126336 }, { "epoch": 0.19, "learning_rate": 0.0008159203980099502, "loss": 0.0784, "theoretical_loss": 3.846215009366819, "tokens_seen": 634388480 }, { "epoch": 0.19, "learning_rate": 0.0008158401540683678, "loss": 0.0809, "theoretical_loss": 3.846052044849843, "tokens_seen": 634650624 }, { "epoch": 0.19, "learning_rate": 0.0008157599101267855, "loss": 0.0793, "theoretical_loss": 3.845889166470604, "tokens_seen": 634912768 }, { "epoch": 0.19, "learning_rate": 0.0008156796661852031, "loss": 0.08, "theoretical_loss": 3.8457263741480343, "tokens_seen": 635174912 }, { "epoch": 0.19, "learning_rate": 0.0008155994222436206, "loss": 0.0787, "theoretical_loss": 3.845563667801178, "tokens_seen": 635437056 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.0005161234876140952, "objective/train/docs_used": 236526, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5930426120758057, "objective/train/original_loss": 1.5930426120758057, "objective/train/theoretical_loss": 3.8454010473491884, "objective/train/tokens_used": 656159200, "objective/train/value_avg": -0.007137298583984375, "objective/train/value_loss": 0.00014894510968588293, "objective/train/value_max": -0.00016224384307861328, "objective/train/value_min": -0.2247314453125, "objective/train/value_reward_corr": 0.6557688653083844, "objective/train/value_std": 0.01020050048828125, "objective/train/weight_avg": 1.0005874633789062, "objective/train/weighted_lm_loss": 1.5939151048660278, "objective/train/weights_max": 1.1805914640426636, "objective/train/weights_min": 0.3701472282409668, "theoretical_loss": 3.8454010473491884, "tokens_seen": 635699200 }, { "epoch": 0.19, "learning_rate": 0.0008155191783020383, "loss": 0.0807, "theoretical_loss": 3.8454010473491884, "tokens_seen": 635699200 }, { "epoch": 0.19, "learning_rate": 0.0008154389343604558, "loss": 0.0791, "theoretical_loss": 3.845238512711327, "tokens_seen": 635961344 }, { "epoch": 0.19, "learning_rate": 0.0008153586904188733, "loss": 0.0815, "theoretical_loss": 3.845076063806966, "tokens_seen": 636223488 }, { "epoch": 0.19, "learning_rate": 0.000815278446477291, "loss": 0.0828, "theoretical_loss": 3.844913700555586, "tokens_seen": 636485632 }, { "epoch": 0.19, "learning_rate": 0.0008151982025357085, "loss": 0.0779, "theoretical_loss": 3.8447514228767763, "tokens_seen": 636747776 }, { "epoch": 0.19, "learning_rate": 0.0008151179585941261, "loss": 0.0827, "theoretical_loss": 3.844589230690234, "tokens_seen": 637009920 }, { "epoch": 0.19, "learning_rate": 0.0008150377146525437, "loss": 0.0793, "theoretical_loss": 3.844427123915766, "tokens_seen": 637272064 }, { "epoch": 0.19, "learning_rate": 0.0008149574707109614, "loss": 0.0779, "theoretical_loss": 3.8442651024732863, "tokens_seen": 637534208 }, { "epoch": 0.19, "learning_rate": 0.000814877226769379, "loss": 0.078, "theoretical_loss": 3.8441031662828173, "tokens_seen": 637796352 }, { "epoch": 0.19, "learning_rate": 0.0008147969828277966, "loss": 0.0796, "theoretical_loss": 3.843941315264489, "tokens_seen": 638058496 }, { "epoch": 0.19, "learning_rate": 0.0008147167388862141, "loss": 0.0786, "theoretical_loss": 3.8437795493385387, "tokens_seen": 638320640 }, { "epoch": 0.19, "learning_rate": 0.0008146364949446317, "loss": 0.0812, "theoretical_loss": 3.8436178684253126, "tokens_seen": 638582784 }, { "epoch": 0.19, "learning_rate": 0.0008145562510030493, "loss": 0.0816, "theoretical_loss": 3.843456272445262, "tokens_seen": 638844928 }, { "epoch": 0.19, "learning_rate": 0.0008144760070614668, "loss": 0.0778, "theoretical_loss": 3.8432947613189468, "tokens_seen": 639107072 }, { "epoch": 0.19, "learning_rate": 0.0008143957631198845, "loss": 0.0821, "theoretical_loss": 3.8431333349670336, "tokens_seen": 639369216 }, { "epoch": 0.19, "learning_rate": 0.000814315519178302, "loss": 0.0785, "theoretical_loss": 3.842971993310294, "tokens_seen": 639631360 }, { "epoch": 0.19, "learning_rate": 0.0008142352752367197, "loss": 0.0789, "theoretical_loss": 3.8428107362696085, "tokens_seen": 639893504 }, { "epoch": 0.19, "learning_rate": 0.0008141550312951373, "loss": 0.0778, "theoretical_loss": 3.842649563765962, "tokens_seen": 640155648 }, { "epoch": 0.19, "learning_rate": 0.0008140747873535548, "loss": 0.0815, "theoretical_loss": 3.8424884757204474, "tokens_seen": 640417792 }, { "epoch": 0.19, "learning_rate": 0.0008139945434119724, "loss": 0.0797, "theoretical_loss": 3.8423274720542606, "tokens_seen": 640679936 }, { "epoch": 0.19, "learning_rate": 0.00081391429947039, "loss": 0.0784, "theoretical_loss": 3.842166552688706, "tokens_seen": 640942080 }, { "epoch": 0.19, "learning_rate": 0.0008138340555288076, "loss": 0.0803, "theoretical_loss": 3.8420057175451934, "tokens_seen": 641204224 }, { "epoch": 0.19, "learning_rate": 0.0008137538115872251, "loss": 0.0807, "theoretical_loss": 3.841844966545236, "tokens_seen": 641466368 }, { "epoch": 0.19, "learning_rate": 0.0008136735676456428, "loss": 0.08, "theoretical_loss": 3.841684299610453, "tokens_seen": 641728512 }, { "epoch": 0.19, "learning_rate": 0.0008135933237040603, "loss": 0.0785, "theoretical_loss": 3.8415237166625698, "tokens_seen": 641990656 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.00029344300855882466, "objective/train/docs_used": 239059, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.705520510673523, "objective/train/original_loss": 1.7055203914642334, "objective/train/theoretical_loss": 3.8413632176234147, "objective/train/tokens_used": 662712800, "objective/train/value_avg": -0.007610321044921875, "objective/train/value_loss": 0.0002510190533939749, "objective/train/value_max": -0.00013768672943115234, "objective/train/value_min": -0.431396484375, "objective/train/value_reward_corr": 0.6089659714411729, "objective/train/value_std": 0.0107879638671875, "objective/train/weight_avg": 1.000407338142395, "objective/train/weighted_lm_loss": 1.704552412033081, "objective/train/weights_max": 1.1669175624847412, "objective/train/weights_min": 0.24382223188877106, "theoretical_loss": 3.8413632176234147, "tokens_seen": 642252800 }, { "epoch": 0.19, "learning_rate": 0.000813513079762478, "loss": 0.0791, "theoretical_loss": 3.8413632176234147, "tokens_seen": 642252800 }, { "epoch": 0.19, "learning_rate": 0.0008134328358208956, "loss": 0.0832, "theoretical_loss": 3.8412028024149225, "tokens_seen": 642514944 }, { "epoch": 0.19, "learning_rate": 0.0008133525918793131, "loss": 0.0788, "theoretical_loss": 3.841042470959131, "tokens_seen": 642777088 }, { "epoch": 0.19, "learning_rate": 0.0008132723479377308, "loss": 0.0788, "theoretical_loss": 3.840882223178183, "tokens_seen": 643039232 }, { "epoch": 0.19, "learning_rate": 0.0008131921039961483, "loss": 0.0809, "theoretical_loss": 3.8407220589943254, "tokens_seen": 643301376 }, { "epoch": 0.2, "learning_rate": 0.0008131118600545659, "loss": 0.0775, "theoretical_loss": 3.8405619783299083, "tokens_seen": 643563520 }, { "epoch": 0.2, "learning_rate": 0.0008130316161129835, "loss": 0.0781, "theoretical_loss": 3.8404019811073864, "tokens_seen": 643825664 }, { "epoch": 0.2, "learning_rate": 0.000812951372171401, "loss": 0.0784, "theoretical_loss": 3.840242067249317, "tokens_seen": 644087808 }, { "epoch": 0.2, "learning_rate": 0.0008128711282298186, "loss": 0.0782, "theoretical_loss": 3.840082236678362, "tokens_seen": 644349952 }, { "epoch": 0.2, "learning_rate": 0.0008127908842882363, "loss": 0.0797, "theoretical_loss": 3.8399224893172854, "tokens_seen": 644612096 }, { "epoch": 0.2, "learning_rate": 0.0008127106403466539, "loss": 0.0789, "theoretical_loss": 3.839762825088955, "tokens_seen": 644874240 }, { "epoch": 0.2, "learning_rate": 0.0008126303964050714, "loss": 0.0828, "theoretical_loss": 3.8396032439163394, "tokens_seen": 645136384 }, { "epoch": 0.2, "learning_rate": 0.0008125501524634891, "loss": 0.0803, "theoretical_loss": 3.8394437457225132, "tokens_seen": 645398528 }, { "epoch": 0.2, "learning_rate": 0.0008124699085219066, "loss": 0.0831, "theoretical_loss": 3.839284330430651, "tokens_seen": 645660672 }, { "epoch": 0.2, "learning_rate": 0.0008123896645803241, "loss": 0.0806, "theoretical_loss": 3.8391249979640305, "tokens_seen": 645922816 }, { "epoch": 0.2, "learning_rate": 0.0008123094206387418, "loss": 0.0798, "theoretical_loss": 3.8389657482460313, "tokens_seen": 646184960 }, { "epoch": 0.2, "learning_rate": 0.0008122291766971593, "loss": 0.0771, "theoretical_loss": 3.838806581200134, "tokens_seen": 646447104 }, { "epoch": 0.2, "learning_rate": 0.000812148932755577, "loss": 0.0806, "theoretical_loss": 3.838647496749924, "tokens_seen": 646709248 }, { "epoch": 0.2, "learning_rate": 0.0008120686888139945, "loss": 0.0815, "theoretical_loss": 3.8384884948190847, "tokens_seen": 646971392 }, { "epoch": 0.2, "learning_rate": 0.0008119884448724122, "loss": 0.0805, "theoretical_loss": 3.838329575331403, "tokens_seen": 647233536 }, { "epoch": 0.2, "learning_rate": 0.0008119082009308298, "loss": 0.0795, "theoretical_loss": 3.8381707382107657, "tokens_seen": 647495680 }, { "epoch": 0.2, "learning_rate": 0.0008118279569892473, "loss": 0.0834, "theoretical_loss": 3.838011983381162, "tokens_seen": 647757824 }, { "epoch": 0.2, "learning_rate": 0.0008117477130476649, "loss": 0.0788, "theoretical_loss": 3.8378533107666817, "tokens_seen": 648019968 }, { "epoch": 0.2, "learning_rate": 0.0008116674691060825, "loss": 0.0831, "theoretical_loss": 3.8376947202915144, "tokens_seen": 648282112 }, { "epoch": 0.2, "learning_rate": 0.0008115872251645001, "loss": 0.0797, "theoretical_loss": 3.8375362118799505, "tokens_seen": 648544256 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.0012510496890172362, "objective/train/docs_used": 241544, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5836241245269775, "objective/train/original_loss": 1.5836241245269775, "objective/train/theoretical_loss": 3.8373777854563813, "objective/train/tokens_used": 669266400, "objective/train/value_avg": -0.00818634033203125, "objective/train/value_loss": 0.00038118340307846665, "objective/train/value_max": -0.0001150369644165039, "objective/train/value_min": -0.9169921875, "objective/train/value_reward_corr": 0.6984226450131933, "objective/train/value_std": 0.016143798828125, "objective/train/weight_avg": 1.0014175176620483, "objective/train/weighted_lm_loss": 1.5856133699417114, "objective/train/weights_max": 1.2598052024841309, "objective/train/weights_min": 0.25575754046440125, "theoretical_loss": 3.8373777854563813, "tokens_seen": 648806400 }, { "epoch": 0.2, "learning_rate": 0.0008115069812229176, "loss": 0.0792, "theoretical_loss": 3.8373777854563813, "tokens_seen": 648806400 }, { "epoch": 0.2, "learning_rate": 0.0008114267372813353, "loss": 0.0814, "theoretical_loss": 3.837219440945298, "tokens_seen": 649068544 }, { "epoch": 0.2, "learning_rate": 0.0008113464933397528, "loss": 0.0798, "theoretical_loss": 3.8370611782712922, "tokens_seen": 649330688 }, { "epoch": 0.2, "learning_rate": 0.0008112662493981705, "loss": 0.0821, "theoretical_loss": 3.8369029973590543, "tokens_seen": 649592832 }, { "epoch": 0.2, "learning_rate": 0.0008111860054565881, "loss": 0.0793, "theoretical_loss": 3.836744898133376, "tokens_seen": 649854976 }, { "epoch": 0.2, "learning_rate": 0.0008111057615150056, "loss": 0.0786, "theoretical_loss": 3.8365868805191456, "tokens_seen": 650117120 }, { "epoch": 0.2, "learning_rate": 0.0008110255175734233, "loss": 0.0798, "theoretical_loss": 3.836428944441354, "tokens_seen": 650379264 }, { "epoch": 0.2, "learning_rate": 0.0008109452736318408, "loss": 0.0803, "theoretical_loss": 3.8362710898250896, "tokens_seen": 650641408 }, { "epoch": 0.2, "learning_rate": 0.0008108650296902584, "loss": 0.0817, "theoretical_loss": 3.83611331659554, "tokens_seen": 650903552 }, { "epoch": 0.2, "learning_rate": 0.000810784785748676, "loss": 0.0762, "theoretical_loss": 3.8359556246779913, "tokens_seen": 651165696 }, { "epoch": 0.2, "learning_rate": 0.0008107045418070936, "loss": 0.0826, "theoretical_loss": 3.8357980139978283, "tokens_seen": 651427840 }, { "epoch": 0.2, "learning_rate": 0.0008106242978655111, "loss": 0.0823, "theoretical_loss": 3.8356404844805354, "tokens_seen": 651689984 }, { "epoch": 0.2, "learning_rate": 0.0008105440539239288, "loss": 0.0812, "theoretical_loss": 3.835483036051694, "tokens_seen": 651952128 }, { "epoch": 0.2, "learning_rate": 0.0008104638099823464, "loss": 0.0799, "theoretical_loss": 3.835325668636983, "tokens_seen": 652214272 }, { "epoch": 0.2, "learning_rate": 0.0008103835660407639, "loss": 0.0815, "theoretical_loss": 3.8351683821621814, "tokens_seen": 652476416 }, { "epoch": 0.2, "learning_rate": 0.0008103033220991816, "loss": 0.0816, "theoretical_loss": 3.8350111765531647, "tokens_seen": 652738560 }, { "epoch": 0.2, "learning_rate": 0.0008102230781575991, "loss": 0.0796, "theoretical_loss": 3.834854051735906, "tokens_seen": 653000704 }, { "epoch": 0.2, "learning_rate": 0.0008101428342160167, "loss": 0.0823, "theoretical_loss": 3.8346970076364757, "tokens_seen": 653262848 }, { "epoch": 0.2, "learning_rate": 0.0008100625902744343, "loss": 0.0795, "theoretical_loss": 3.8345400441810424, "tokens_seen": 653524992 }, { "epoch": 0.2, "learning_rate": 0.0008099823463328518, "loss": 0.0816, "theoretical_loss": 3.8343831612958703, "tokens_seen": 653787136 }, { "epoch": 0.2, "learning_rate": 0.0008099021023912694, "loss": 0.0815, "theoretical_loss": 3.834226358907322, "tokens_seen": 654049280 }, { "epoch": 0.2, "learning_rate": 0.000809821858449687, "loss": 0.0816, "theoretical_loss": 3.8340696369418565, "tokens_seen": 654311424 }, { "epoch": 0.2, "learning_rate": 0.0008097416145081047, "loss": 0.0783, "theoretical_loss": 3.833912995326029, "tokens_seen": 654573568 }, { "epoch": 0.2, "learning_rate": 0.0008096613705665223, "loss": 0.0789, "theoretical_loss": 3.8337564339864914, "tokens_seen": 654835712 }, { "epoch": 0.2, "learning_rate": 0.0008095811266249399, "loss": 0.0798, "theoretical_loss": 3.8335999528499913, "tokens_seen": 655097856 }, { "debugging/Compilability": 0.9523809523809523, "debugging/distinct-1-grams": 0.7472213669987466, "debugging/entropy-1-grams": 5.482485850115401, "debugging/length": 435.9047619047619, "debugging/num_segments": 21, "debugging/raw_token_scores_avg": 0.0068328846246004105, "debugging/raw_token_scores_std": 0.01969163864850998, "debugging/score": 0.013429834759321626, "debugging/score_std": 0.01710154615280685, "epoch": 0.2, "objective/train/advantage_avg": 0.0014720445033162832, "objective/train/docs_used": 243976, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.65055251121521, "objective/train/original_loss": 1.65055251121521, "objective/train/theoretical_loss": 3.833443551843374, "objective/train/tokens_used": 675820000, "objective/train/value_avg": -0.00830841064453125, "objective/train/value_loss": 0.00020881656382698566, "objective/train/value_max": -0.00023055076599121094, "objective/train/value_min": -0.321533203125, "objective/train/value_reward_corr": 0.6838538864516404, "objective/train/value_std": 0.01381683349609375, "objective/train/weight_avg": 1.0015703439712524, "objective/train/weighted_lm_loss": 1.6532535552978516, "objective/train/weights_max": 1.1742674112319946, "objective/train/weights_min": 0.3706134557723999, "theoretical_loss": 3.833443551843374, "tokens_seen": 655360000 }, { "epoch": 0.2, "learning_rate": 0.0008095008826833574, "loss": 0.0823, "theoretical_loss": 3.833443551843374, "tokens_seen": 655360000 }, { "epoch": 0.2, "learning_rate": 0.000809420638741775, "loss": 0.0799, "theoretical_loss": 3.8332872308935793, "tokens_seen": 655622144 }, { "epoch": 0.2, "learning_rate": 0.0008093403948001926, "loss": 0.0809, "theoretical_loss": 3.833130989927643, "tokens_seen": 655884288 }, { "epoch": 0.2, "learning_rate": 0.0008092601508586101, "loss": 0.0794, "theoretical_loss": 3.8329748288726972, "tokens_seen": 656146432 }, { "epoch": 0.2, "learning_rate": 0.0008091799069170278, "loss": 0.0833, "theoretical_loss": 3.8328187476559687, "tokens_seen": 656408576 }, { "epoch": 0.2, "learning_rate": 0.0008090996629754453, "loss": 0.0818, "theoretical_loss": 3.83266274620478, "tokens_seen": 656670720 }, { "epoch": 0.2, "learning_rate": 0.000809019419033863, "loss": 0.0781, "theoretical_loss": 3.832506824446549, "tokens_seen": 656932864 }, { "epoch": 0.2, "learning_rate": 0.0008089391750922806, "loss": 0.0791, "theoretical_loss": 3.832350982308788, "tokens_seen": 657195008 }, { "epoch": 0.2, "learning_rate": 0.0008088589311506981, "loss": 0.0781, "theoretical_loss": 3.8321952197191043, "tokens_seen": 657457152 }, { "epoch": 0.2, "learning_rate": 0.0008087786872091157, "loss": 0.0813, "theoretical_loss": 3.8320395366052, "tokens_seen": 657719296 }, { "epoch": 0.2, "learning_rate": 0.0008086984432675333, "loss": 0.0791, "theoretical_loss": 3.8318839328948715, "tokens_seen": 657981440 }, { "epoch": 0.2, "learning_rate": 0.0008086181993259509, "loss": 0.0802, "theoretical_loss": 3.8317284085160095, "tokens_seen": 658243584 }, { "epoch": 0.2, "learning_rate": 0.0008085379553843684, "loss": 0.0802, "theoretical_loss": 3.8315729633965994, "tokens_seen": 658505728 }, { "epoch": 0.2, "learning_rate": 0.0008084577114427861, "loss": 0.0783, "theoretical_loss": 3.8314175974647195, "tokens_seen": 658767872 }, { "epoch": 0.2, "learning_rate": 0.0008083774675012036, "loss": 0.0803, "theoretical_loss": 3.831262310648544, "tokens_seen": 659030016 }, { "epoch": 0.2, "learning_rate": 0.0008082972235596214, "loss": 0.0813, "theoretical_loss": 3.831107102876338, "tokens_seen": 659292160 }, { "epoch": 0.2, "learning_rate": 0.0008082169796180389, "loss": 0.0825, "theoretical_loss": 3.830951974076463, "tokens_seen": 659554304 }, { "epoch": 0.2, "learning_rate": 0.0008081367356764564, "loss": 0.0821, "theoretical_loss": 3.830796924177371, "tokens_seen": 659816448 }, { "epoch": 0.2, "learning_rate": 0.0008080564917348741, "loss": 0.079, "theoretical_loss": 3.830641953107609, "tokens_seen": 660078592 }, { "epoch": 0.2, "learning_rate": 0.0008079762477932916, "loss": 0.0803, "theoretical_loss": 3.8304870607958175, "tokens_seen": 660340736 }, { "epoch": 0.2, "learning_rate": 0.0008078960038517092, "loss": 0.0804, "theoretical_loss": 3.8303322471707286, "tokens_seen": 660602880 }, { "epoch": 0.2, "learning_rate": 0.0008078157599101268, "loss": 0.0782, "theoretical_loss": 3.830177512161167, "tokens_seen": 660865024 }, { "epoch": 0.2, "learning_rate": 0.0008077355159685444, "loss": 0.0815, "theoretical_loss": 3.8300228556960523, "tokens_seen": 661127168 }, { "epoch": 0.2, "learning_rate": 0.0008076552720269619, "loss": 0.0774, "theoretical_loss": 3.829868277704393, "tokens_seen": 661389312 }, { "epoch": 0.2, "learning_rate": 0.0008075750280853796, "loss": 0.0806, "theoretical_loss": 3.829713778115293, "tokens_seen": 661651456 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.00017224658222403377, "objective/train/docs_used": 246530, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.7009872198104858, "objective/train/original_loss": 1.7009871006011963, "objective/train/theoretical_loss": 3.8295593568579465, "objective/train/tokens_used": 682373600, "objective/train/value_avg": -0.01042938232421875, "objective/train/value_loss": 0.00039018821553327143, "objective/train/value_max": -0.0001634359359741211, "objective/train/value_min": -0.291748046875, "objective/train/value_reward_corr": 0.6474135841444115, "objective/train/value_std": 0.01617431640625, "objective/train/weight_avg": 1.0003470182418823, "objective/train/weighted_lm_loss": 1.7011173963546753, "objective/train/weights_max": 1.2405765056610107, "objective/train/weights_min": 0.3682652413845062, "theoretical_loss": 3.8295593568579465, "tokens_seen": 661913600 }, { "epoch": 0.2, "learning_rate": 0.0008074947841437972, "loss": 0.0814, "theoretical_loss": 3.8295593568579465, "tokens_seen": 661913600 }, { "epoch": 0.2, "learning_rate": 0.0008074145402022147, "loss": 0.0811, "theoretical_loss": 3.8294050138616402, "tokens_seen": 662175744 }, { "epoch": 0.2, "learning_rate": 0.0008073342962606324, "loss": 0.0806, "theoretical_loss": 3.8292507490557526, "tokens_seen": 662437888 }, { "epoch": 0.2, "learning_rate": 0.0008072540523190499, "loss": 0.0764, "theoretical_loss": 3.8290965623697537, "tokens_seen": 662700032 }, { "epoch": 0.2, "learning_rate": 0.0008071738083774676, "loss": 0.078, "theoretical_loss": 3.8289424537332053, "tokens_seen": 662962176 }, { "epoch": 0.2, "learning_rate": 0.0008070935644358851, "loss": 0.0825, "theoretical_loss": 3.82878842307576, "tokens_seen": 663224320 }, { "epoch": 0.2, "learning_rate": 0.0008070133204943026, "loss": 0.0794, "theoretical_loss": 3.828634470327162, "tokens_seen": 663486464 }, { "epoch": 0.2, "learning_rate": 0.0008069330765527203, "loss": 0.0805, "theoretical_loss": 3.8284805954172474, "tokens_seen": 663748608 }, { "epoch": 0.2, "learning_rate": 0.0008068528326111379, "loss": 0.081, "theoretical_loss": 3.828326798275941, "tokens_seen": 664010752 }, { "epoch": 0.2, "learning_rate": 0.0008067725886695555, "loss": 0.0793, "theoretical_loss": 3.82817307883326, "tokens_seen": 664272896 }, { "epoch": 0.2, "learning_rate": 0.0008066923447279731, "loss": 0.0795, "theoretical_loss": 3.8280194370193112, "tokens_seen": 664535040 }, { "epoch": 0.2, "learning_rate": 0.0008066121007863907, "loss": 0.0796, "theoretical_loss": 3.827865872764293, "tokens_seen": 664797184 }, { "epoch": 0.2, "learning_rate": 0.0008065318568448082, "loss": 0.0799, "theoretical_loss": 3.8277123859984936, "tokens_seen": 665059328 }, { "epoch": 0.2, "learning_rate": 0.0008064516129032258, "loss": 0.0811, "theoretical_loss": 3.8275589766522895, "tokens_seen": 665321472 }, { "epoch": 0.2, "learning_rate": 0.0008063713689616434, "loss": 0.0794, "theoretical_loss": 3.8274056446561504, "tokens_seen": 665583616 }, { "epoch": 0.2, "learning_rate": 0.0008062911250200609, "loss": 0.0768, "theoretical_loss": 3.827252389940633, "tokens_seen": 665845760 }, { "epoch": 0.2, "learning_rate": 0.0008062108810784786, "loss": 0.0787, "theoretical_loss": 3.827099212436386, "tokens_seen": 666107904 }, { "epoch": 0.2, "learning_rate": 0.0008061306371368961, "loss": 0.0801, "theoretical_loss": 3.826946112074145, "tokens_seen": 666370048 }, { "epoch": 0.2, "learning_rate": 0.0008060503931953138, "loss": 0.0789, "theoretical_loss": 3.826793088784737, "tokens_seen": 666632192 }, { "epoch": 0.2, "learning_rate": 0.0008059701492537314, "loss": 0.0816, "theoretical_loss": 3.826640142499077, "tokens_seen": 666894336 }, { "epoch": 0.2, "learning_rate": 0.0008058899053121489, "loss": 0.0782, "theoretical_loss": 3.8264872731481705, "tokens_seen": 667156480 }, { "epoch": 0.2, "learning_rate": 0.0008058096613705666, "loss": 0.0785, "theoretical_loss": 3.8263344806631103, "tokens_seen": 667418624 }, { "epoch": 0.2, "learning_rate": 0.0008057294174289841, "loss": 0.0802, "theoretical_loss": 3.8261817649750784, "tokens_seen": 667680768 }, { "epoch": 0.2, "learning_rate": 0.0008056491734874017, "loss": 0.0778, "theoretical_loss": 3.8260291260153463, "tokens_seen": 667942912 }, { "epoch": 0.2, "learning_rate": 0.0008055689295458193, "loss": 0.0796, "theoretical_loss": 3.8258765637152727, "tokens_seen": 668205056 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.0008796442998573184, "objective/train/docs_used": 248744, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6819145679473877, "objective/train/original_loss": 1.6819145679473877, "objective/train/theoretical_loss": 3.8257240780063055, "objective/train/tokens_used": 688927200, "objective/train/value_avg": -0.007282257080078125, "objective/train/value_loss": 0.00022768820053897798, "objective/train/value_max": -0.00021314620971679688, "objective/train/value_min": -0.29150390625, "objective/train/value_reward_corr": 0.70498133906636, "objective/train/value_std": 0.01126861572265625, "objective/train/weight_avg": 1.0009862184524536, "objective/train/weighted_lm_loss": 1.6842213869094849, "objective/train/weights_max": 1.2005013227462769, "objective/train/weights_min": 0.37098827958106995, "theoretical_loss": 3.8257240780063055, "tokens_seen": 668467200 }, { "epoch": 0.2, "learning_rate": 0.0008054886856042369, "loss": 0.0805, "theoretical_loss": 3.8257240780063055, "tokens_seen": 668467200 }, { "epoch": 0.2, "learning_rate": 0.0008054084416626544, "loss": 0.0779, "theoretical_loss": 3.8255716688199803, "tokens_seen": 668729344 }, { "epoch": 0.2, "learning_rate": 0.0008053281977210722, "loss": 0.0788, "theoretical_loss": 3.825419336087921, "tokens_seen": 668991488 }, { "epoch": 0.2, "learning_rate": 0.0008052479537794897, "loss": 0.0786, "theoretical_loss": 3.825267079741839, "tokens_seen": 669253632 }, { "epoch": 0.2, "learning_rate": 0.0008051677098379072, "loss": 0.0792, "theoretical_loss": 3.825114899713533, "tokens_seen": 669515776 }, { "epoch": 0.2, "learning_rate": 0.0008050874658963249, "loss": 0.0808, "theoretical_loss": 3.8249627959348915, "tokens_seen": 669777920 }, { "epoch": 0.2, "learning_rate": 0.0008050072219547424, "loss": 0.0788, "theoretical_loss": 3.824810768337887, "tokens_seen": 670040064 }, { "epoch": 0.2, "learning_rate": 0.00080492697801316, "loss": 0.0798, "theoretical_loss": 3.8246588168545816, "tokens_seen": 670302208 }, { "epoch": 0.2, "learning_rate": 0.0008048467340715776, "loss": 0.0745, "theoretical_loss": 3.824506941417125, "tokens_seen": 670564352 }, { "epoch": 0.2, "learning_rate": 0.0008047664901299952, "loss": 0.0809, "theoretical_loss": 3.824355141957752, "tokens_seen": 670826496 }, { "epoch": 0.2, "learning_rate": 0.0008046862461884128, "loss": 0.0798, "theoretical_loss": 3.8242034184087847, "tokens_seen": 671088640 }, { "epoch": 0.2, "learning_rate": 0.0008046060022468304, "loss": 0.0772, "theoretical_loss": 3.824051770702633, "tokens_seen": 671350784 }, { "epoch": 0.2, "learning_rate": 0.000804525758305248, "loss": 0.0819, "theoretical_loss": 3.823900198771792, "tokens_seen": 671612928 }, { "epoch": 0.2, "learning_rate": 0.0008044455143636656, "loss": 0.0765, "theoretical_loss": 3.823748702548845, "tokens_seen": 671875072 }, { "epoch": 0.2, "learning_rate": 0.0008043652704220832, "loss": 0.0772, "theoretical_loss": 3.823597281966459, "tokens_seen": 672137216 }, { "epoch": 0.2, "learning_rate": 0.0008042850264805007, "loss": 0.0785, "theoretical_loss": 3.8234459369573894, "tokens_seen": 672399360 }, { "epoch": 0.2, "learning_rate": 0.0008042047825389184, "loss": 0.0792, "theoretical_loss": 3.8232946674544763, "tokens_seen": 672661504 }, { "epoch": 0.2, "learning_rate": 0.0008041245385973359, "loss": 0.0802, "theoretical_loss": 3.8231434733906458, "tokens_seen": 672923648 }, { "epoch": 0.2, "learning_rate": 0.0008040442946557534, "loss": 0.0799, "theoretical_loss": 3.822992354698911, "tokens_seen": 673185792 }, { "epoch": 0.2, "learning_rate": 0.0008039640507141711, "loss": 0.0783, "theoretical_loss": 3.822841311312368, "tokens_seen": 673447936 }, { "epoch": 0.2, "learning_rate": 0.0008038838067725887, "loss": 0.0791, "theoretical_loss": 3.8226903431642008, "tokens_seen": 673710080 }, { "epoch": 0.2, "learning_rate": 0.0008038035628310063, "loss": 0.0789, "theoretical_loss": 3.8225394501876764, "tokens_seen": 673972224 }, { "epoch": 0.2, "learning_rate": 0.0008037233188894239, "loss": 0.0798, "theoretical_loss": 3.822388632316149, "tokens_seen": 674234368 }, { "epoch": 0.2, "learning_rate": 0.0008036430749478415, "loss": 0.0776, "theoretical_loss": 3.822237889483057, "tokens_seen": 674496512 }, { "epoch": 0.2, "learning_rate": 0.000803562831006259, "loss": 0.0791, "theoretical_loss": 3.822087221621923, "tokens_seen": 674758656 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.00036516584805212915, "objective/train/docs_used": 251058, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5383315086364746, "objective/train/original_loss": 1.5383315086364746, "objective/train/theoretical_loss": 3.821936628666355, "objective/train/tokens_used": 695480800, "objective/train/value_avg": -0.0069732666015625, "objective/train/value_loss": 0.00019810259982477874, "objective/train/value_max": -0.0002269744873046875, "objective/train/value_min": -0.9658203125, "objective/train/value_reward_corr": 0.6702206529549529, "objective/train/value_std": 0.0112762451171875, "objective/train/weight_avg": 1.0004541873931885, "objective/train/weighted_lm_loss": 1.5389385223388672, "objective/train/weights_max": 1.1621463298797607, "objective/train/weights_min": 0.23926687240600586, "theoretical_loss": 3.821936628666355, "tokens_seen": 675020800 }, { "epoch": 0.2, "learning_rate": 0.0008034825870646766, "loss": 0.075, "theoretical_loss": 3.821936628666355, "tokens_seen": 675020800 }, { "epoch": 0.2, "learning_rate": 0.0008034023431230942, "loss": 0.0775, "theoretical_loss": 3.821786110550045, "tokens_seen": 675282944 }, { "epoch": 0.2, "learning_rate": 0.0008033220991815118, "loss": 0.0766, "theoretical_loss": 3.8216356672067704, "tokens_seen": 675545088 }, { "epoch": 0.2, "learning_rate": 0.0008032418552399294, "loss": 0.0791, "theoretical_loss": 3.821485298570391, "tokens_seen": 675807232 }, { "epoch": 0.2, "learning_rate": 0.000803161611298347, "loss": 0.0771, "theoretical_loss": 3.8213350045748533, "tokens_seen": 676069376 }, { "epoch": 0.2, "learning_rate": 0.0008030813673567647, "loss": 0.0784, "theoretical_loss": 3.821184785154186, "tokens_seen": 676331520 }, { "epoch": 0.21, "learning_rate": 0.0008030011234151822, "loss": 0.0789, "theoretical_loss": 3.8210346402425013, "tokens_seen": 676593664 }, { "epoch": 0.21, "learning_rate": 0.0008029208794735997, "loss": 0.0787, "theoretical_loss": 3.820884569773997, "tokens_seen": 676855808 }, { "epoch": 0.21, "learning_rate": 0.0008028406355320174, "loss": 0.0768, "theoretical_loss": 3.8207345736829526, "tokens_seen": 677117952 }, { "epoch": 0.21, "learning_rate": 0.0008027603915904349, "loss": 0.0778, "theoretical_loss": 3.820584651903732, "tokens_seen": 677380096 }, { "epoch": 0.21, "learning_rate": 0.0008026801476488525, "loss": 0.0782, "theoretical_loss": 3.820434804370782, "tokens_seen": 677642240 }, { "epoch": 0.21, "learning_rate": 0.0008025999037072701, "loss": 0.0778, "theoretical_loss": 3.820285031018633, "tokens_seen": 677904384 }, { "epoch": 0.21, "learning_rate": 0.0008025196597656877, "loss": 0.0785, "theoretical_loss": 3.8201353317818985, "tokens_seen": 678166528 }, { "epoch": 0.21, "learning_rate": 0.0008024394158241052, "loss": 0.0764, "theoretical_loss": 3.819985706595274, "tokens_seen": 678428672 }, { "epoch": 0.21, "learning_rate": 0.0008023591718825229, "loss": 0.0815, "theoretical_loss": 3.8198361553935385, "tokens_seen": 678690816 }, { "epoch": 0.21, "learning_rate": 0.0008022789279409405, "loss": 0.0812, "theoretical_loss": 3.8196866781115526, "tokens_seen": 678952960 }, { "epoch": 0.21, "learning_rate": 0.000802198683999358, "loss": 0.0828, "theoretical_loss": 3.8195372746842615, "tokens_seen": 679215104 }, { "epoch": 0.21, "learning_rate": 0.0008021184400577757, "loss": 0.0766, "theoretical_loss": 3.8193879450466905, "tokens_seen": 679477248 }, { "epoch": 0.21, "learning_rate": 0.0008020381961161932, "loss": 0.0797, "theoretical_loss": 3.819238689133948, "tokens_seen": 679739392 }, { "epoch": 0.21, "learning_rate": 0.0008019579521746109, "loss": 0.0793, "theoretical_loss": 3.819089506881225, "tokens_seen": 680001536 }, { "epoch": 0.21, "learning_rate": 0.0008018777082330284, "loss": 0.0789, "theoretical_loss": 3.8189403982237935, "tokens_seen": 680263680 }, { "epoch": 0.21, "learning_rate": 0.000801797464291446, "loss": 0.0772, "theoretical_loss": 3.818791363097008, "tokens_seen": 680525824 }, { "epoch": 0.21, "learning_rate": 0.0008017172203498636, "loss": 0.0787, "theoretical_loss": 3.818642401436304, "tokens_seen": 680787968 }, { "epoch": 0.21, "learning_rate": 0.0008016369764082812, "loss": 0.0791, "theoretical_loss": 3.8184935131771987, "tokens_seen": 681050112 }, { "epoch": 0.21, "learning_rate": 0.0008015567324666988, "loss": 0.0784, "theoretical_loss": 3.8183446982552915, "tokens_seen": 681312256 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.0005019235541112721, "objective/train/docs_used": 253498, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.601072072982788, "objective/train/original_loss": 1.6010721921920776, "objective/train/theoretical_loss": 3.818195956606262, "objective/train/tokens_used": 702034400, "objective/train/value_avg": -0.006389617919921875, "objective/train/value_loss": 0.00024236510216724128, "objective/train/value_max": -0.0001366138458251953, "objective/train/value_min": -0.63427734375, "objective/train/value_reward_corr": 0.4653045941913805, "objective/train/value_std": 0.0096893310546875, "objective/train/weight_avg": 1.0006093978881836, "objective/train/weighted_lm_loss": 1.6022027730941772, "objective/train/weights_max": 1.7492560148239136, "objective/train/weights_min": 0.37081003189086914, "theoretical_loss": 3.818195956606262, "tokens_seen": 681574400 }, { "epoch": 0.21, "learning_rate": 0.0008014764885251164, "loss": 0.0804, "theoretical_loss": 3.818195956606262, "tokens_seen": 681574400 }, { "epoch": 0.21, "learning_rate": 0.000801396244583534, "loss": 0.0799, "theoretical_loss": 3.8180472881658707, "tokens_seen": 681836544 }, { "epoch": 0.21, "learning_rate": 0.0008013160006419515, "loss": 0.0805, "theoretical_loss": 3.817898692869961, "tokens_seen": 682098688 }, { "epoch": 0.21, "learning_rate": 0.0008012357567003692, "loss": 0.0809, "theoretical_loss": 3.817750170654455, "tokens_seen": 682360832 }, { "epoch": 0.21, "learning_rate": 0.0008011555127587867, "loss": 0.0793, "theoretical_loss": 3.8176017214553575, "tokens_seen": 682622976 }, { "epoch": 0.21, "learning_rate": 0.0008010752688172042, "loss": 0.0801, "theoretical_loss": 3.8174533452087513, "tokens_seen": 682885120 }, { "epoch": 0.21, "learning_rate": 0.0008009950248756219, "loss": 0.0771, "theoretical_loss": 3.817305041850802, "tokens_seen": 683147264 }, { "epoch": 0.21, "learning_rate": 0.0008009147809340394, "loss": 0.0804, "theoretical_loss": 3.817156811317754, "tokens_seen": 683409408 }, { "epoch": 0.21, "learning_rate": 0.0008008345369924572, "loss": 0.0799, "theoretical_loss": 3.8170086535459333, "tokens_seen": 683671552 }, { "epoch": 0.21, "learning_rate": 0.0008007542930508747, "loss": 0.0797, "theoretical_loss": 3.8168605684717454, "tokens_seen": 683933696 }, { "epoch": 0.21, "learning_rate": 0.0008006740491092923, "loss": 0.0796, "theoretical_loss": 3.816712556031675, "tokens_seen": 684195840 }, { "epoch": 0.21, "learning_rate": 0.0008005938051677099, "loss": 0.0788, "theoretical_loss": 3.816564616162287, "tokens_seen": 684457984 }, { "epoch": 0.21, "learning_rate": 0.0008005135612261274, "loss": 0.0776, "theoretical_loss": 3.8164167488002265, "tokens_seen": 684720128 }, { "epoch": 0.21, "learning_rate": 0.000800433317284545, "loss": 0.0795, "theoretical_loss": 3.816268953882218, "tokens_seen": 684982272 }, { "epoch": 0.21, "learning_rate": 0.0008003530733429626, "loss": 0.0775, "theoretical_loss": 3.8161212313450648, "tokens_seen": 685244416 }, { "epoch": 0.21, "learning_rate": 0.0008002728294013802, "loss": 0.079, "theoretical_loss": 3.81597358112565, "tokens_seen": 685506560 }, { "epoch": 0.21, "learning_rate": 0.0008001925854597977, "loss": 0.0796, "theoretical_loss": 3.815826003160935, "tokens_seen": 685768704 }, { "epoch": 0.21, "learning_rate": 0.0008001123415182155, "loss": 0.0809, "theoretical_loss": 3.815678497387962, "tokens_seen": 686030848 }, { "epoch": 0.21, "learning_rate": 0.000800032097576633, "loss": 0.0785, "theoretical_loss": 3.8155310637438506, "tokens_seen": 686292992 }, { "epoch": 0.21, "learning_rate": 0.0007999518536350505, "loss": 0.0787, "theoretical_loss": 3.8153837021657995, "tokens_seen": 686555136 }, { "epoch": 0.21, "learning_rate": 0.0007998716096934682, "loss": 0.0774, "theoretical_loss": 3.8152364125910863, "tokens_seen": 686817280 }, { "epoch": 0.21, "learning_rate": 0.0007997913657518857, "loss": 0.0779, "theoretical_loss": 3.8150891949570664, "tokens_seen": 687079424 }, { "epoch": 0.21, "learning_rate": 0.0007997111218103033, "loss": 0.078, "theoretical_loss": 3.814942049201175, "tokens_seen": 687341568 }, { "epoch": 0.21, "learning_rate": 0.0007996308778687209, "loss": 0.0812, "theoretical_loss": 3.8147949752609236, "tokens_seen": 687603712 }, { "epoch": 0.21, "learning_rate": 0.0007995506339271385, "loss": 0.0787, "theoretical_loss": 3.814647973073903, "tokens_seen": 687865856 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.0018313382752239704, "objective/train/docs_used": 255827, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5675907135009766, "objective/train/original_loss": 1.5675907135009766, "objective/train/theoretical_loss": 3.8145010425777826, "objective/train/tokens_used": 708588000, "objective/train/value_avg": -0.01007080078125, "objective/train/value_loss": 0.00023178788251243532, "objective/train/value_max": -0.0001499652862548828, "objective/train/value_min": -0.5947265625, "objective/train/value_reward_corr": 0.7783312114068648, "objective/train/value_std": 0.018707275390625, "objective/train/weight_avg": 1.0019445419311523, "objective/train/weighted_lm_loss": 1.5707687139511108, "objective/train/weights_max": 1.2117313146591187, "objective/train/weights_min": 0.6250301003456116, "theoretical_loss": 3.8145010425777826, "tokens_seen": 688128000 }, { "epoch": 0.21, "learning_rate": 0.0007994703899855562, "loss": 0.0784, "theoretical_loss": 3.8145010425777826, "tokens_seen": 688128000 }, { "epoch": 0.21, "learning_rate": 0.0007993901460439737, "loss": 0.0793, "theoretical_loss": 3.814354183710308, "tokens_seen": 688390144 }, { "epoch": 0.21, "learning_rate": 0.0007993099021023913, "loss": 0.0793, "theoretical_loss": 3.8142073964093046, "tokens_seen": 688652288 }, { "epoch": 0.21, "learning_rate": 0.0007992296581608089, "loss": 0.0781, "theoretical_loss": 3.8140606806126733, "tokens_seen": 688914432 }, { "epoch": 0.21, "learning_rate": 0.0007991494142192265, "loss": 0.0792, "theoretical_loss": 3.813914036258393, "tokens_seen": 689176576 }, { "epoch": 0.21, "learning_rate": 0.000799069170277644, "loss": 0.0786, "theoretical_loss": 3.813767463284522, "tokens_seen": 689438720 }, { "epoch": 0.21, "learning_rate": 0.0007989889263360617, "loss": 0.0793, "theoretical_loss": 3.8136209616291934, "tokens_seen": 689700864 }, { "epoch": 0.21, "learning_rate": 0.0007989086823944792, "loss": 0.0794, "theoretical_loss": 3.813474531230618, "tokens_seen": 689963008 }, { "epoch": 0.21, "learning_rate": 0.0007988284384528968, "loss": 0.0782, "theoretical_loss": 3.8133281720270835, "tokens_seen": 690225152 }, { "epoch": 0.21, "learning_rate": 0.0007987481945113144, "loss": 0.0802, "theoretical_loss": 3.8131818839569562, "tokens_seen": 690487296 }, { "epoch": 0.21, "learning_rate": 0.000798667950569732, "loss": 0.0804, "theoretical_loss": 3.8130356669586765, "tokens_seen": 690749440 }, { "epoch": 0.21, "learning_rate": 0.0007985877066281496, "loss": 0.0765, "theoretical_loss": 3.8128895209707627, "tokens_seen": 691011584 }, { "epoch": 0.21, "learning_rate": 0.0007985074626865672, "loss": 0.0817, "theoretical_loss": 3.81274344593181, "tokens_seen": 691273728 }, { "epoch": 0.21, "learning_rate": 0.0007984272187449848, "loss": 0.0773, "theoretical_loss": 3.8125974417804893, "tokens_seen": 691535872 }, { "epoch": 0.21, "learning_rate": 0.0007983469748034023, "loss": 0.0765, "theoretical_loss": 3.8124515084555477, "tokens_seen": 691798016 }, { "epoch": 0.21, "learning_rate": 0.00079826673086182, "loss": 0.0782, "theoretical_loss": 3.8123056458958087, "tokens_seen": 692060160 }, { "epoch": 0.21, "learning_rate": 0.0007981864869202375, "loss": 0.0776, "theoretical_loss": 3.812159854040172, "tokens_seen": 692322304 }, { "epoch": 0.21, "learning_rate": 0.0007981062429786551, "loss": 0.0793, "theoretical_loss": 3.8120141328276125, "tokens_seen": 692584448 }, { "epoch": 0.21, "learning_rate": 0.0007980259990370727, "loss": 0.0808, "theoretical_loss": 3.8118684821971813, "tokens_seen": 692846592 }, { "epoch": 0.21, "learning_rate": 0.0007979457550954902, "loss": 0.0803, "theoretical_loss": 3.8117229020880057, "tokens_seen": 693108736 }, { "epoch": 0.21, "learning_rate": 0.000797865511153908, "loss": 0.08, "theoretical_loss": 3.811577392439287, "tokens_seen": 693370880 }, { "epoch": 0.21, "learning_rate": 0.0007977852672123255, "loss": 0.08, "theoretical_loss": 3.8114319531903025, "tokens_seen": 693633024 }, { "epoch": 0.21, "learning_rate": 0.0007977050232707431, "loss": 0.0802, "theoretical_loss": 3.811286584280406, "tokens_seen": 693895168 }, { "epoch": 0.21, "learning_rate": 0.0007976247793291607, "loss": 0.0797, "theoretical_loss": 3.8111412856490245, "tokens_seen": 694157312 }, { "epoch": 0.21, "learning_rate": 0.0007975445353875782, "loss": 0.08, "theoretical_loss": 3.810996057235661, "tokens_seen": 694419456 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.001043860800564289, "objective/train/docs_used": 258341, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6668003797531128, "objective/train/original_loss": 1.6668004989624023, "objective/train/theoretical_loss": 3.810850898979894, "objective/train/tokens_used": 715141600, "objective/train/value_avg": -0.0081634521484375, "objective/train/value_loss": 0.00030596103169955313, "objective/train/value_max": -0.00021827220916748047, "objective/train/value_min": -0.765625, "objective/train/value_reward_corr": 0.5807898030559664, "objective/train/value_std": 0.01239776611328125, "objective/train/weight_avg": 1.001173973083496, "objective/train/weighted_lm_loss": 1.668200969696045, "objective/train/weights_max": 1.4868050813674927, "objective/train/weights_min": 0.22793063521385193, "theoretical_loss": 3.810850898979894, "tokens_seen": 694681600 }, { "epoch": 0.21, "learning_rate": 0.0007974642914459958, "loss": 0.0797, "theoretical_loss": 3.810850898979894, "tokens_seen": 694681600 }, { "epoch": 0.21, "learning_rate": 0.0007973840475044134, "loss": 0.0806, "theoretical_loss": 3.810705810821375, "tokens_seen": 694943744 }, { "epoch": 0.21, "learning_rate": 0.000797303803562831, "loss": 0.0794, "theoretical_loss": 3.8105607926998326, "tokens_seen": 695205888 }, { "epoch": 0.21, "learning_rate": 0.0007972235596212485, "loss": 0.0782, "theoretical_loss": 3.810415844555067, "tokens_seen": 695468032 }, { "epoch": 0.21, "learning_rate": 0.0007971433156796663, "loss": 0.0775, "theoretical_loss": 3.8102709663269554, "tokens_seen": 695730176 }, { "epoch": 0.21, "learning_rate": 0.0007970630717380838, "loss": 0.0788, "theoretical_loss": 3.810126157955448, "tokens_seen": 695992320 }, { "epoch": 0.21, "learning_rate": 0.0007969828277965014, "loss": 0.0777, "theoretical_loss": 3.809981419380569, "tokens_seen": 696254464 }, { "epoch": 0.21, "learning_rate": 0.000796902583854919, "loss": 0.0796, "theoretical_loss": 3.809836750542418, "tokens_seen": 696516608 }, { "epoch": 0.21, "learning_rate": 0.0007968223399133365, "loss": 0.08, "theoretical_loss": 3.8096921513811663, "tokens_seen": 696778752 }, { "epoch": 0.21, "learning_rate": 0.0007967420959717542, "loss": 0.08, "theoretical_loss": 3.809547621837061, "tokens_seen": 697040896 }, { "epoch": 0.21, "learning_rate": 0.0007966618520301717, "loss": 0.0763, "theoretical_loss": 3.809403161850423, "tokens_seen": 697303040 }, { "epoch": 0.21, "learning_rate": 0.0007965816080885893, "loss": 0.0822, "theoretical_loss": 3.8092587713616446, "tokens_seen": 697565184 }, { "epoch": 0.21, "learning_rate": 0.000796501364147007, "loss": 0.0808, "theoretical_loss": 3.809114450311193, "tokens_seen": 697827328 }, { "epoch": 0.21, "learning_rate": 0.0007964211202054245, "loss": 0.0796, "theoretical_loss": 3.808970198639609, "tokens_seen": 698089472 }, { "epoch": 0.21, "learning_rate": 0.0007963408762638421, "loss": 0.0778, "theoretical_loss": 3.808826016287507, "tokens_seen": 698351616 }, { "epoch": 0.21, "learning_rate": 0.0007962606323222597, "loss": 0.0802, "theoretical_loss": 3.8086819031955725, "tokens_seen": 698613760 }, { "epoch": 0.21, "learning_rate": 0.0007961803883806773, "loss": 0.0786, "theoretical_loss": 3.8085378593045665, "tokens_seen": 698875904 }, { "epoch": 0.21, "learning_rate": 0.0007961001444390948, "loss": 0.0816, "theoretical_loss": 3.8083938845553202, "tokens_seen": 699138048 }, { "epoch": 0.21, "learning_rate": 0.0007960199004975125, "loss": 0.0778, "theoretical_loss": 3.80824997888874, "tokens_seen": 699400192 }, { "epoch": 0.21, "learning_rate": 0.00079593965655593, "loss": 0.0807, "theoretical_loss": 3.8081061422458036, "tokens_seen": 699662336 }, { "epoch": 0.21, "learning_rate": 0.0007958594126143476, "loss": 0.0784, "theoretical_loss": 3.8079623745675613, "tokens_seen": 699924480 }, { "epoch": 0.21, "learning_rate": 0.0007957791686727652, "loss": 0.0752, "theoretical_loss": 3.8078186757951364, "tokens_seen": 700186624 }, { "epoch": 0.21, "learning_rate": 0.0007956989247311828, "loss": 0.0805, "theoretical_loss": 3.8076750458697237, "tokens_seen": 700448768 }, { "epoch": 0.21, "learning_rate": 0.0007956186807896005, "loss": 0.0788, "theoretical_loss": 3.807531484732591, "tokens_seen": 700710912 }, { "epoch": 0.21, "learning_rate": 0.000795538436848018, "loss": 0.0793, "theoretical_loss": 3.8073879923250775, "tokens_seen": 700973056 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.0004075298202224076, "objective/train/docs_used": 260260, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5826354026794434, "objective/train/original_loss": 1.5826352834701538, "objective/train/theoretical_loss": 3.8072445685885947, "objective/train/tokens_used": 721695200, "objective/train/value_avg": -0.00783538818359375, "objective/train/value_loss": 0.00022773307864554226, "objective/train/value_max": -0.00013136863708496094, "objective/train/value_min": -0.63134765625, "objective/train/value_reward_corr": 0.635312677935756, "objective/train/value_std": 0.01226806640625, "objective/train/weight_avg": 1.0005120038986206, "objective/train/weighted_lm_loss": 1.5832562446594238, "objective/train/weights_max": 1.4071308374404907, "objective/train/weights_min": 0.36930587887763977, "theoretical_loss": 3.8072445685885947, "tokens_seen": 701235200 }, { "epoch": 0.21, "learning_rate": 0.0007954581929064356, "loss": 0.0774, "theoretical_loss": 3.8072445685885947, "tokens_seen": 701235200 }, { "epoch": 0.21, "learning_rate": 0.0007953779489648532, "loss": 0.0784, "theoretical_loss": 3.807101213464625, "tokens_seen": 701497344 }, { "epoch": 0.21, "learning_rate": 0.0007952977050232708, "loss": 0.08, "theoretical_loss": 3.8069579268947242, "tokens_seen": 701759488 }, { "epoch": 0.21, "learning_rate": 0.0007952174610816883, "loss": 0.0812, "theoretical_loss": 3.806814708820519, "tokens_seen": 702021632 }, { "epoch": 0.21, "learning_rate": 0.0007951372171401059, "loss": 0.0758, "theoretical_loss": 3.806671559183706, "tokens_seen": 702283776 }, { "epoch": 0.21, "learning_rate": 0.0007950569731985235, "loss": 0.0778, "theoretical_loss": 3.806528477926056, "tokens_seen": 702545920 }, { "epoch": 0.21, "learning_rate": 0.000794976729256941, "loss": 0.081, "theoretical_loss": 3.806385464989409, "tokens_seen": 702808064 }, { "epoch": 0.21, "learning_rate": 0.0007948964853153588, "loss": 0.0806, "theoretical_loss": 3.806242520315676, "tokens_seen": 703070208 }, { "epoch": 0.21, "learning_rate": 0.0007948162413737763, "loss": 0.0789, "theoretical_loss": 3.806099643846841, "tokens_seen": 703332352 }, { "epoch": 0.21, "learning_rate": 0.0007947359974321939, "loss": 0.0789, "theoretical_loss": 3.8059568355249564, "tokens_seen": 703594496 }, { "epoch": 0.21, "learning_rate": 0.0007946557534906115, "loss": 0.0786, "theoretical_loss": 3.8058140952921478, "tokens_seen": 703856640 }, { "epoch": 0.21, "learning_rate": 0.000794575509549029, "loss": 0.0794, "theoretical_loss": 3.805671423090609, "tokens_seen": 704118784 }, { "epoch": 0.21, "learning_rate": 0.0007944952656074467, "loss": 0.0779, "theoretical_loss": 3.805528818862607, "tokens_seen": 704380928 }, { "epoch": 0.21, "learning_rate": 0.0007944150216658642, "loss": 0.0777, "theoretical_loss": 3.8053862825504776, "tokens_seen": 704643072 }, { "epoch": 0.21, "learning_rate": 0.0007943347777242818, "loss": 0.0805, "theoretical_loss": 3.8052438140966265, "tokens_seen": 704905216 }, { "epoch": 0.21, "learning_rate": 0.0007942545337826995, "loss": 0.08, "theoretical_loss": 3.8051014134435315, "tokens_seen": 705167360 }, { "epoch": 0.21, "learning_rate": 0.0007941742898411171, "loss": 0.0751, "theoretical_loss": 3.804959080533739, "tokens_seen": 705429504 }, { "epoch": 0.21, "learning_rate": 0.0007940940458995346, "loss": 0.0788, "theoretical_loss": 3.8048168153098656, "tokens_seen": 705691648 }, { "epoch": 0.21, "learning_rate": 0.0007940138019579522, "loss": 0.0784, "theoretical_loss": 3.8046746177145985, "tokens_seen": 705953792 }, { "epoch": 0.21, "learning_rate": 0.0007939335580163698, "loss": 0.0796, "theoretical_loss": 3.804532487690694, "tokens_seen": 706215936 }, { "epoch": 0.21, "learning_rate": 0.0007938533140747873, "loss": 0.078, "theoretical_loss": 3.8043904251809786, "tokens_seen": 706478080 }, { "epoch": 0.21, "learning_rate": 0.000793773070133205, "loss": 0.0793, "theoretical_loss": 3.8042484301283475, "tokens_seen": 706740224 }, { "epoch": 0.21, "learning_rate": 0.0007936928261916225, "loss": 0.0805, "theoretical_loss": 3.8041065024757668, "tokens_seen": 707002368 }, { "epoch": 0.21, "learning_rate": 0.0007936125822500401, "loss": 0.0788, "theoretical_loss": 3.8039646421662705, "tokens_seen": 707264512 }, { "epoch": 0.21, "learning_rate": 0.0007935323383084578, "loss": 0.078, "theoretical_loss": 3.8038228491429624, "tokens_seen": 707526656 }, { "epoch": 0.21, "objective/train/advantage_avg": -0.0002181452582590282, "objective/train/docs_used": 262675, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6911776065826416, "objective/train/original_loss": 1.6911776065826416, "objective/train/theoretical_loss": 3.8036811233490164, "objective/train/tokens_used": 728248800, "objective/train/value_avg": -0.0099334716796875, "objective/train/value_loss": 0.0003097241569776088, "objective/train/value_max": -0.00015723705291748047, "objective/train/value_min": -0.83837890625, "objective/train/value_reward_corr": 0.6940126679848521, "objective/train/value_std": 0.0159912109375, "objective/train/weight_avg": 0.9999237656593323, "objective/train/weighted_lm_loss": 1.6909995079040527, "objective/train/weights_max": 1.2376219034194946, "objective/train/weights_min": 0.3753642737865448, "theoretical_loss": 3.8036811233490164, "tokens_seen": 707788800 }, { "epoch": 0.21, "learning_rate": 0.0007934520943668753, "loss": 0.08, "theoretical_loss": 3.8036811233490164, "tokens_seen": 707788800 }, { "epoch": 0.21, "learning_rate": 0.0007933718504252929, "loss": 0.0775, "theoretical_loss": 3.803539464727673, "tokens_seen": 708050944 }, { "epoch": 0.21, "learning_rate": 0.0007932916064837105, "loss": 0.0812, "theoretical_loss": 3.803397873222244, "tokens_seen": 708313088 }, { "epoch": 0.21, "learning_rate": 0.0007932113625421281, "loss": 0.08, "theoretical_loss": 3.8032563487761095, "tokens_seen": 708575232 }, { "epoch": 0.21, "learning_rate": 0.0007931311186005457, "loss": 0.0772, "theoretical_loss": 3.8031148913327177, "tokens_seen": 708837376 }, { "epoch": 0.21, "learning_rate": 0.0007930508746589633, "loss": 0.0804, "theoretical_loss": 3.8029735008355843, "tokens_seen": 709099520 }, { "epoch": 0.21, "learning_rate": 0.0007929706307173808, "loss": 0.0799, "theoretical_loss": 3.8028321772282965, "tokens_seen": 709361664 }, { "epoch": 0.22, "learning_rate": 0.0007928903867757984, "loss": 0.0789, "theoretical_loss": 3.8026909204545065, "tokens_seen": 709623808 }, { "epoch": 0.22, "learning_rate": 0.000792810142834216, "loss": 0.0811, "theoretical_loss": 3.8025497304579376, "tokens_seen": 709885952 }, { "epoch": 0.22, "learning_rate": 0.0007927298988926336, "loss": 0.0782, "theoretical_loss": 3.80240860718238, "tokens_seen": 710148096 }, { "epoch": 0.22, "learning_rate": 0.0007926496549510513, "loss": 0.0827, "theoretical_loss": 3.802267550571691, "tokens_seen": 710410240 }, { "epoch": 0.22, "learning_rate": 0.0007925694110094688, "loss": 0.0818, "theoretical_loss": 3.802126560569798, "tokens_seen": 710672384 }, { "epoch": 0.22, "learning_rate": 0.0007924891670678864, "loss": 0.0812, "theoretical_loss": 3.801985637120694, "tokens_seen": 710934528 }, { "epoch": 0.22, "learning_rate": 0.000792408923126304, "loss": 0.0785, "theoretical_loss": 3.801844780168441, "tokens_seen": 711196672 }, { "epoch": 0.22, "learning_rate": 0.0007923286791847216, "loss": 0.0815, "theoretical_loss": 3.8017039896571685, "tokens_seen": 711458816 }, { "epoch": 0.22, "learning_rate": 0.0007922484352431391, "loss": 0.0784, "theoretical_loss": 3.8015632655310734, "tokens_seen": 711720960 }, { "epoch": 0.22, "learning_rate": 0.0007921681913015567, "loss": 0.08, "theoretical_loss": 3.8014226077344198, "tokens_seen": 711983104 }, { "epoch": 0.22, "learning_rate": 0.0007920879473599743, "loss": 0.0775, "theoretical_loss": 3.8012820162115393, "tokens_seen": 712245248 }, { "epoch": 0.22, "learning_rate": 0.0007920077034183918, "loss": 0.077, "theoretical_loss": 3.801141490906831, "tokens_seen": 712507392 }, { "epoch": 0.22, "learning_rate": 0.0007919274594768096, "loss": 0.0777, "theoretical_loss": 3.80100103176476, "tokens_seen": 712769536 }, { "epoch": 0.22, "learning_rate": 0.0007918472155352271, "loss": 0.0814, "theoretical_loss": 3.8008606387298594, "tokens_seen": 713031680 }, { "epoch": 0.22, "learning_rate": 0.0007917669715936448, "loss": 0.0764, "theoretical_loss": 3.80072031174673, "tokens_seen": 713293824 }, { "epoch": 0.22, "learning_rate": 0.0007916867276520623, "loss": 0.0802, "theoretical_loss": 3.800580050760036, "tokens_seen": 713555968 }, { "epoch": 0.22, "learning_rate": 0.0007916064837104798, "loss": 0.0763, "theoretical_loss": 3.800439855714512, "tokens_seen": 713818112 }, { "epoch": 0.22, "learning_rate": 0.0007915262397688975, "loss": 0.0798, "theoretical_loss": 3.8002997265549574, "tokens_seen": 714080256 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.0012540040770545602, "objective/train/docs_used": 265036, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5773979425430298, "objective/train/original_loss": 1.5773980617523193, "objective/train/theoretical_loss": 3.8001596632262387, "objective/train/tokens_used": 734802400, "objective/train/value_avg": -0.01009368896484375, "objective/train/value_loss": 0.00023382111976388842, "objective/train/value_max": -0.00015115737915039062, "objective/train/value_min": -0.39892578125, "objective/train/value_reward_corr": 0.6356383073725016, "objective/train/value_std": 0.01374053955078125, "objective/train/weight_avg": 1.001361608505249, "objective/train/weighted_lm_loss": 1.57895028591156, "objective/train/weights_max": 1.2423335313796997, "objective/train/weights_min": 0.3686169385910034, "theoretical_loss": 3.8001596632262387, "tokens_seen": 714342400 }, { "epoch": 0.22, "learning_rate": 0.000791445995827315, "loss": 0.0799, "theoretical_loss": 3.8001596632262387, "tokens_seen": 714342400 }, { "epoch": 0.22, "learning_rate": 0.0007913657518857326, "loss": 0.0776, "theoretical_loss": 3.8000196656732874, "tokens_seen": 714604544 }, { "epoch": 0.22, "learning_rate": 0.0007912855079441503, "loss": 0.0778, "theoretical_loss": 3.7998797338411032, "tokens_seen": 714866688 }, { "epoch": 0.22, "learning_rate": 0.0007912052640025679, "loss": 0.0794, "theoretical_loss": 3.7997398676747496, "tokens_seen": 715128832 }, { "epoch": 0.22, "learning_rate": 0.0007911250200609854, "loss": 0.0795, "theoretical_loss": 3.7996000671193593, "tokens_seen": 715390976 }, { "epoch": 0.22, "learning_rate": 0.000791044776119403, "loss": 0.0774, "theoretical_loss": 3.7994603321201277, "tokens_seen": 715653120 }, { "epoch": 0.22, "learning_rate": 0.0007909645321778206, "loss": 0.0785, "theoretical_loss": 3.7993206626223177, "tokens_seen": 715915264 }, { "epoch": 0.22, "learning_rate": 0.0007908842882362381, "loss": 0.0787, "theoretical_loss": 3.799181058571258, "tokens_seen": 716177408 }, { "epoch": 0.22, "learning_rate": 0.0007908040442946558, "loss": 0.0754, "theoretical_loss": 3.7990415199123424, "tokens_seen": 716439552 }, { "epoch": 0.22, "learning_rate": 0.0007907238003530733, "loss": 0.0804, "theoretical_loss": 3.79890204659103, "tokens_seen": 716701696 }, { "epoch": 0.22, "learning_rate": 0.000790643556411491, "loss": 0.0781, "theoretical_loss": 3.7987626385528466, "tokens_seen": 716963840 }, { "epoch": 0.22, "learning_rate": 0.0007905633124699085, "loss": 0.0815, "theoretical_loss": 3.798623295743382, "tokens_seen": 717225984 }, { "epoch": 0.22, "learning_rate": 0.000790483068528326, "loss": 0.0817, "theoretical_loss": 3.798484018108291, "tokens_seen": 717488128 }, { "epoch": 0.22, "learning_rate": 0.0007904028245867438, "loss": 0.078, "theoretical_loss": 3.7983448055932953, "tokens_seen": 717750272 }, { "epoch": 0.22, "learning_rate": 0.0007903225806451613, "loss": 0.0819, "theoretical_loss": 3.79820565814418, "tokens_seen": 718012416 }, { "epoch": 0.22, "learning_rate": 0.0007902423367035789, "loss": 0.0782, "theoretical_loss": 3.798066575706795, "tokens_seen": 718274560 }, { "epoch": 0.22, "learning_rate": 0.0007901620927619965, "loss": 0.0779, "theoretical_loss": 3.797927558227056, "tokens_seen": 718536704 }, { "epoch": 0.22, "learning_rate": 0.0007900818488204141, "loss": 0.077, "theoretical_loss": 3.7977886056509433, "tokens_seen": 718798848 }, { "epoch": 0.22, "learning_rate": 0.0007900016048788316, "loss": 0.078, "theoretical_loss": 3.797649717924502, "tokens_seen": 719060992 }, { "epoch": 0.22, "learning_rate": 0.0007899213609372492, "loss": 0.0789, "theoretical_loss": 3.797510894993839, "tokens_seen": 719323136 }, { "epoch": 0.22, "learning_rate": 0.0007898411169956668, "loss": 0.078, "theoretical_loss": 3.79737213680513, "tokens_seen": 719585280 }, { "epoch": 0.22, "learning_rate": 0.0007897608730540844, "loss": 0.0787, "theoretical_loss": 3.797233443304612, "tokens_seen": 719847424 }, { "epoch": 0.22, "learning_rate": 0.0007896806291125021, "loss": 0.0801, "theoretical_loss": 3.7970948144385868, "tokens_seen": 720109568 }, { "epoch": 0.22, "learning_rate": 0.0007896003851709196, "loss": 0.0761, "theoretical_loss": 3.796956250153421, "tokens_seen": 720371712 }, { "epoch": 0.22, "learning_rate": 0.0007895201412293372, "loss": 0.0796, "theoretical_loss": 3.796817750395544, "tokens_seen": 720633856 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.0021576914004981518, "objective/train/docs_used": 267624, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.453436255455017, "objective/train/original_loss": 1.4534361362457275, "objective/train/theoretical_loss": 3.7966793151114504, "objective/train/tokens_used": 741356000, "objective/train/value_avg": -0.00920867919921875, "objective/train/value_loss": 0.0003623893717303872, "objective/train/value_max": -0.0002148151397705078, "objective/train/value_min": -0.8974609375, "objective/train/value_reward_corr": 0.59584870633317, "objective/train/value_std": 0.0142364501953125, "objective/train/weight_avg": 1.0023139715194702, "objective/train/weighted_lm_loss": 1.456945538520813, "objective/train/weights_max": 1.6503322124481201, "objective/train/weights_min": 0.2744547724723816, "theoretical_loss": 3.7966793151114504, "tokens_seen": 720896000 }, { "epoch": 0.22, "learning_rate": 0.0007894398972877548, "loss": 0.0765, "theoretical_loss": 3.7966793151114504, "tokens_seen": 720896000 }, { "epoch": 0.22, "learning_rate": 0.0007893596533461724, "loss": 0.0766, "theoretical_loss": 3.796540944247698, "tokens_seen": 721158144 }, { "epoch": 0.22, "learning_rate": 0.00078927940940459, "loss": 0.0778, "theoretical_loss": 3.796402637750908, "tokens_seen": 721420288 }, { "epoch": 0.22, "learning_rate": 0.0007891991654630075, "loss": 0.0806, "theoretical_loss": 3.796264395567766, "tokens_seen": 721682432 }, { "epoch": 0.22, "learning_rate": 0.0007891189215214251, "loss": 0.0784, "theoretical_loss": 3.7961262176450195, "tokens_seen": 721944576 }, { "epoch": 0.22, "learning_rate": 0.0007890386775798428, "loss": 0.0797, "theoretical_loss": 3.795988103929482, "tokens_seen": 722206720 }, { "epoch": 0.22, "learning_rate": 0.0007889584336382604, "loss": 0.0758, "theoretical_loss": 3.7958500543680276, "tokens_seen": 722468864 }, { "epoch": 0.22, "learning_rate": 0.0007888781896966779, "loss": 0.0788, "theoretical_loss": 3.795712068907596, "tokens_seen": 722731008 }, { "epoch": 0.22, "learning_rate": 0.0007887979457550956, "loss": 0.0822, "theoretical_loss": 3.795574147495188, "tokens_seen": 722993152 }, { "epoch": 0.22, "learning_rate": 0.0007887177018135131, "loss": 0.0806, "theoretical_loss": 3.795436290077868, "tokens_seen": 723255296 }, { "epoch": 0.22, "learning_rate": 0.0007886374578719306, "loss": 0.0809, "theoretical_loss": 3.795298496602765, "tokens_seen": 723517440 }, { "epoch": 0.22, "learning_rate": 0.0007885572139303483, "loss": 0.0776, "theoretical_loss": 3.795160767017068, "tokens_seen": 723779584 }, { "epoch": 0.22, "learning_rate": 0.0007884769699887658, "loss": 0.0785, "theoretical_loss": 3.795023101268031, "tokens_seen": 724041728 }, { "epoch": 0.22, "learning_rate": 0.0007883967260471834, "loss": 0.0826, "theoretical_loss": 3.7948854993029695, "tokens_seen": 724303872 }, { "epoch": 0.22, "learning_rate": 0.000788316482105601, "loss": 0.0766, "theoretical_loss": 3.7947479610692616, "tokens_seen": 724566016 }, { "epoch": 0.22, "learning_rate": 0.0007882362381640187, "loss": 0.0783, "theoretical_loss": 3.794610486514348, "tokens_seen": 724828160 }, { "epoch": 0.22, "learning_rate": 0.0007881559942224363, "loss": 0.0788, "theoretical_loss": 3.7944730755857323, "tokens_seen": 725090304 }, { "epoch": 0.22, "learning_rate": 0.0007880757502808538, "loss": 0.0772, "theoretical_loss": 3.794335728230979, "tokens_seen": 725352448 }, { "epoch": 0.22, "learning_rate": 0.0007879955063392714, "loss": 0.0779, "theoretical_loss": 3.7941984443977157, "tokens_seen": 725614592 }, { "epoch": 0.22, "learning_rate": 0.000787915262397689, "loss": 0.0761, "theoretical_loss": 3.7940612240336327, "tokens_seen": 725876736 }, { "epoch": 0.22, "learning_rate": 0.0007878350184561066, "loss": 0.0785, "theoretical_loss": 3.793924067086481, "tokens_seen": 726138880 }, { "epoch": 0.22, "learning_rate": 0.0007877547745145241, "loss": 0.078, "theoretical_loss": 3.793786973504073, "tokens_seen": 726401024 }, { "epoch": 0.22, "learning_rate": 0.0007876745305729418, "loss": 0.077, "theoretical_loss": 3.7936499432342847, "tokens_seen": 726663168 }, { "epoch": 0.22, "learning_rate": 0.0007875942866313593, "loss": 0.0779, "theoretical_loss": 3.7935129762250526, "tokens_seen": 726925312 }, { "epoch": 0.22, "learning_rate": 0.0007875140426897769, "loss": 0.0811, "theoretical_loss": 3.7933760724243752, "tokens_seen": 727187456 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.001988545060157776, "objective/train/docs_used": 269904, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5989689826965332, "objective/train/original_loss": 1.5989689826965332, "objective/train/theoretical_loss": 3.7932392317803116, "objective/train/tokens_used": 747909600, "objective/train/value_avg": -0.0074920654296875, "objective/train/value_loss": 0.00017211709928233176, "objective/train/value_max": -0.0002397298812866211, "objective/train/value_min": -0.310302734375, "objective/train/value_reward_corr": 0.5813086882183613, "objective/train/value_std": 0.0099639892578125, "objective/train/weight_avg": 1.0020697116851807, "objective/train/weighted_lm_loss": 1.6021634340286255, "objective/train/weights_max": 1.3142492771148682, "objective/train/weights_min": 0.36833229660987854, "theoretical_loss": 3.7932392317803116, "tokens_seen": 727449600 }, { "epoch": 0.22, "learning_rate": 0.0007874337987481946, "loss": 0.0795, "theoretical_loss": 3.7932392317803116, "tokens_seen": 727449600 }, { "epoch": 0.22, "learning_rate": 0.0007873535548066121, "loss": 0.077, "theoretical_loss": 3.7931024542409837, "tokens_seen": 727711744 }, { "epoch": 0.22, "learning_rate": 0.0007872733108650297, "loss": 0.0782, "theoretical_loss": 3.7929657397545733, "tokens_seen": 727973888 }, { "epoch": 0.22, "learning_rate": 0.0007871930669234473, "loss": 0.0766, "theoretical_loss": 3.792829088269324, "tokens_seen": 728236032 }, { "epoch": 0.22, "learning_rate": 0.0007871128229818649, "loss": 0.0768, "theoretical_loss": 3.792692499733541, "tokens_seen": 728498176 }, { "epoch": 0.22, "learning_rate": 0.0007870325790402824, "loss": 0.0784, "theoretical_loss": 3.7925559740955896, "tokens_seen": 728760320 }, { "epoch": 0.22, "learning_rate": 0.0007869523350987, "loss": 0.0775, "theoretical_loss": 3.7924195113038968, "tokens_seen": 729022464 }, { "epoch": 0.22, "learning_rate": 0.0007868720911571176, "loss": 0.0786, "theoretical_loss": 3.7922831113069493, "tokens_seen": 729284608 }, { "epoch": 0.22, "learning_rate": 0.0007867918472155353, "loss": 0.0761, "theoretical_loss": 3.792146774053296, "tokens_seen": 729546752 }, { "epoch": 0.22, "learning_rate": 0.0007867116032739529, "loss": 0.0767, "theoretical_loss": 3.792010499491545, "tokens_seen": 729808896 }, { "epoch": 0.22, "learning_rate": 0.0007866313593323704, "loss": 0.0752, "theoretical_loss": 3.7918742875703657, "tokens_seen": 730071040 }, { "epoch": 0.22, "learning_rate": 0.0007865511153907881, "loss": 0.0762, "theoretical_loss": 3.7917381382384883, "tokens_seen": 730333184 }, { "epoch": 0.22, "learning_rate": 0.0007864708714492056, "loss": 0.0767, "theoretical_loss": 3.791602051444703, "tokens_seen": 730595328 }, { "epoch": 0.22, "learning_rate": 0.0007863906275076232, "loss": 0.075, "theoretical_loss": 3.791466027137859, "tokens_seen": 730857472 }, { "epoch": 0.22, "learning_rate": 0.0007863103835660408, "loss": 0.0781, "theoretical_loss": 3.7913300652668678, "tokens_seen": 731119616 }, { "epoch": 0.22, "learning_rate": 0.0007862301396244583, "loss": 0.0776, "theoretical_loss": 3.7911941657807002, "tokens_seen": 731381760 }, { "epoch": 0.22, "learning_rate": 0.0007861498956828759, "loss": 0.0752, "theoretical_loss": 3.7910583286283854, "tokens_seen": 731643904 }, { "epoch": 0.22, "learning_rate": 0.0007860696517412936, "loss": 0.0783, "theoretical_loss": 3.7909225537590157, "tokens_seen": 731906048 }, { "epoch": 0.22, "learning_rate": 0.0007859894077997112, "loss": 0.0766, "theoretical_loss": 3.790786841121739, "tokens_seen": 732168192 }, { "epoch": 0.22, "learning_rate": 0.0007859091638581287, "loss": 0.0765, "theoretical_loss": 3.7906511906657676, "tokens_seen": 732430336 }, { "epoch": 0.22, "learning_rate": 0.0007858289199165464, "loss": 0.0764, "theoretical_loss": 3.7905156023403697, "tokens_seen": 732692480 }, { "epoch": 0.22, "learning_rate": 0.0007857486759749639, "loss": 0.0781, "theoretical_loss": 3.7903800760948743, "tokens_seen": 732954624 }, { "epoch": 0.22, "learning_rate": 0.0007856684320333814, "loss": 0.0776, "theoretical_loss": 3.790244611878671, "tokens_seen": 733216768 }, { "epoch": 0.22, "learning_rate": 0.0007855881880917991, "loss": 0.0778, "theoretical_loss": 3.790109209641206, "tokens_seen": 733478912 }, { "epoch": 0.22, "learning_rate": 0.0007855079441502166, "loss": 0.0768, "theoretical_loss": 3.7899738693319875, "tokens_seen": 733741056 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.001371144549921155, "objective/train/docs_used": 272255, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6625961065292358, "objective/train/original_loss": 1.6625961065292358, "objective/train/theoretical_loss": 3.7898385909005814, "objective/train/tokens_used": 754463200, "objective/train/value_avg": -0.007289886474609375, "objective/train/value_loss": 0.00022098013141658157, "objective/train/value_max": -8.547306060791016e-05, "objective/train/value_min": -0.287353515625, "objective/train/value_reward_corr": 0.5914789303006973, "objective/train/value_std": 0.0107879638671875, "objective/train/weight_avg": 1.0014703273773193, "objective/train/weighted_lm_loss": 1.6649521589279175, "objective/train/weights_max": 1.150571584701538, "objective/train/weights_min": 0.3686802387237549, "theoretical_loss": 3.7898385909005814, "tokens_seen": 734003200 }, { "epoch": 0.22, "learning_rate": 0.0007854277002086343, "loss": 0.08, "theoretical_loss": 3.7898385909005814, "tokens_seen": 734003200 }, { "epoch": 0.22, "learning_rate": 0.0007853474562670519, "loss": 0.0773, "theoretical_loss": 3.7897033742966135, "tokens_seen": 734265344 }, { "epoch": 0.22, "learning_rate": 0.0007852672123254695, "loss": 0.0774, "theoretical_loss": 3.789568219469767, "tokens_seen": 734527488 }, { "epoch": 0.22, "learning_rate": 0.0007851869683838871, "loss": 0.0798, "theoretical_loss": 3.789433126369786, "tokens_seen": 734789632 }, { "epoch": 0.22, "learning_rate": 0.0007851067244423046, "loss": 0.0765, "theoretical_loss": 3.7892980949464716, "tokens_seen": 735051776 }, { "epoch": 0.22, "learning_rate": 0.0007850264805007222, "loss": 0.0779, "theoretical_loss": 3.7891631251496856, "tokens_seen": 735313920 }, { "epoch": 0.22, "learning_rate": 0.0007849462365591398, "loss": 0.0772, "theoretical_loss": 3.7890282169293465, "tokens_seen": 735576064 }, { "epoch": 0.22, "learning_rate": 0.0007848659926175574, "loss": 0.0762, "theoretical_loss": 3.7888933702354324, "tokens_seen": 735838208 }, { "epoch": 0.22, "learning_rate": 0.0007847857486759749, "loss": 0.0792, "theoretical_loss": 3.7887585850179786, "tokens_seen": 736100352 }, { "epoch": 0.22, "learning_rate": 0.0007847055047343926, "loss": 0.0784, "theoretical_loss": 3.788623861227081, "tokens_seen": 736362496 }, { "epoch": 0.22, "learning_rate": 0.0007846252607928101, "loss": 0.0751, "theoretical_loss": 3.7884891988128926, "tokens_seen": 736624640 }, { "epoch": 0.22, "learning_rate": 0.0007845450168512277, "loss": 0.0778, "theoretical_loss": 3.7883545977256228, "tokens_seen": 736886784 }, { "epoch": 0.22, "learning_rate": 0.0007844647729096454, "loss": 0.0781, "theoretical_loss": 3.7882200579155416, "tokens_seen": 737148928 }, { "epoch": 0.22, "learning_rate": 0.0007843845289680629, "loss": 0.0788, "theoretical_loss": 3.788085579332977, "tokens_seen": 737411072 }, { "epoch": 0.22, "learning_rate": 0.0007843042850264806, "loss": 0.0772, "theoretical_loss": 3.787951161928312, "tokens_seen": 737673216 }, { "epoch": 0.22, "learning_rate": 0.0007842240410848981, "loss": 0.0782, "theoretical_loss": 3.7878168056519916, "tokens_seen": 737935360 }, { "epoch": 0.22, "learning_rate": 0.0007841437971433157, "loss": 0.0766, "theoretical_loss": 3.787682510454515, "tokens_seen": 738197504 }, { "epoch": 0.22, "learning_rate": 0.0007840635532017333, "loss": 0.0777, "theoretical_loss": 3.7875482762864405, "tokens_seen": 738459648 }, { "epoch": 0.22, "learning_rate": 0.0007839833092601508, "loss": 0.0757, "theoretical_loss": 3.787414103098384, "tokens_seen": 738721792 }, { "epoch": 0.22, "learning_rate": 0.0007839030653185684, "loss": 0.0765, "theoretical_loss": 3.7872799908410193, "tokens_seen": 738983936 }, { "epoch": 0.22, "learning_rate": 0.0007838228213769861, "loss": 0.0761, "theoretical_loss": 3.787145939465076, "tokens_seen": 739246080 }, { "epoch": 0.22, "learning_rate": 0.0007837425774354037, "loss": 0.0771, "theoretical_loss": 3.7870119489213425, "tokens_seen": 739508224 }, { "epoch": 0.22, "learning_rate": 0.0007836623334938212, "loss": 0.0794, "theoretical_loss": 3.786878019160664, "tokens_seen": 739770368 }, { "epoch": 0.22, "learning_rate": 0.0007835820895522389, "loss": 0.0761, "theoretical_loss": 3.7867441501339427, "tokens_seen": 740032512 }, { "epoch": 0.22, "learning_rate": 0.0007835018456106564, "loss": 0.0781, "theoretical_loss": 3.7866103417921373, "tokens_seen": 740294656 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.0015989760868251324, "objective/train/docs_used": 274540, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6382532119750977, "objective/train/original_loss": 1.6382529735565186, "objective/train/theoretical_loss": 3.786476594086265, "objective/train/tokens_used": 761016800, "objective/train/value_avg": -0.0094757080078125, "objective/train/value_loss": 0.0003717961080837995, "objective/train/value_max": -0.00016605854034423828, "objective/train/value_min": -0.59716796875, "objective/train/value_reward_corr": 0.6324069891531287, "objective/train/value_std": 0.0156707763671875, "objective/train/weight_avg": 1.0017673969268799, "objective/train/weighted_lm_loss": 1.6408437490463257, "objective/train/weights_max": 1.6781408786773682, "objective/train/weights_min": 0.37242892384529114, "theoretical_loss": 3.786476594086265, "tokens_seen": 740556800 }, { "epoch": 0.22, "learning_rate": 0.0007834216016690739, "loss": 0.0818, "theoretical_loss": 3.786476594086265, "tokens_seen": 740556800 }, { "epoch": 0.22, "learning_rate": 0.0007833413577274916, "loss": 0.076, "theoretical_loss": 3.7863429069673984, "tokens_seen": 740818944 }, { "epoch": 0.22, "learning_rate": 0.0007832611137859091, "loss": 0.0773, "theoretical_loss": 3.7862092803866663, "tokens_seen": 741081088 }, { "epoch": 0.22, "learning_rate": 0.0007831808698443267, "loss": 0.0743, "theoretical_loss": 3.786075714295257, "tokens_seen": 741343232 }, { "epoch": 0.22, "learning_rate": 0.0007831006259027444, "loss": 0.0797, "theoretical_loss": 3.7859422086444123, "tokens_seen": 741605376 }, { "epoch": 0.22, "learning_rate": 0.000783020381961162, "loss": 0.0777, "theoretical_loss": 3.7858087633854325, "tokens_seen": 741867520 }, { "epoch": 0.22, "learning_rate": 0.0007829401380195796, "loss": 0.077, "theoretical_loss": 3.785675378469673, "tokens_seen": 742129664 }, { "epoch": 0.22, "learning_rate": 0.0007828598940779972, "loss": 0.0771, "theoretical_loss": 3.7855420538485474, "tokens_seen": 742391808 }, { "epoch": 0.23, "learning_rate": 0.0007827796501364147, "loss": 0.0786, "theoretical_loss": 3.7854087894735233, "tokens_seen": 742653952 }, { "epoch": 0.23, "learning_rate": 0.0007826994061948323, "loss": 0.0749, "theoretical_loss": 3.7852755852961257, "tokens_seen": 742916096 }, { "epoch": 0.23, "learning_rate": 0.0007826191622532499, "loss": 0.0789, "theoretical_loss": 3.785142441267936, "tokens_seen": 743178240 }, { "epoch": 0.23, "learning_rate": 0.0007825389183116674, "loss": 0.0776, "theoretical_loss": 3.7850093573405905, "tokens_seen": 743440384 }, { "epoch": 0.23, "learning_rate": 0.0007824586743700851, "loss": 0.076, "theoretical_loss": 3.7848763334657827, "tokens_seen": 743702528 }, { "epoch": 0.23, "learning_rate": 0.0007823784304285027, "loss": 0.0805, "theoretical_loss": 3.7847433695952617, "tokens_seen": 743964672 }, { "epoch": 0.23, "learning_rate": 0.0007822981864869203, "loss": 0.0783, "theoretical_loss": 3.7846104656808306, "tokens_seen": 744226816 }, { "epoch": 0.23, "learning_rate": 0.0007822179425453379, "loss": 0.0757, "theoretical_loss": 3.7844776216743505, "tokens_seen": 744488960 }, { "epoch": 0.23, "learning_rate": 0.0007821376986037554, "loss": 0.077, "theoretical_loss": 3.784344837527737, "tokens_seen": 744751104 }, { "epoch": 0.23, "learning_rate": 0.000782057454662173, "loss": 0.0759, "theoretical_loss": 3.784212113192961, "tokens_seen": 745013248 }, { "epoch": 0.23, "learning_rate": 0.0007819772107205906, "loss": 0.0773, "theoretical_loss": 3.7840794486220495, "tokens_seen": 745275392 }, { "epoch": 0.23, "learning_rate": 0.0007818969667790082, "loss": 0.0777, "theoretical_loss": 3.783946843767084, "tokens_seen": 745537536 }, { "epoch": 0.23, "learning_rate": 0.0007818167228374257, "loss": 0.0785, "theoretical_loss": 3.783814298580203, "tokens_seen": 745799680 }, { "epoch": 0.23, "learning_rate": 0.0007817364788958434, "loss": 0.0772, "theoretical_loss": 3.7836818130135974, "tokens_seen": 746061824 }, { "epoch": 0.23, "learning_rate": 0.000781656234954261, "loss": 0.0756, "theoretical_loss": 3.783549387019515, "tokens_seen": 746323968 }, { "epoch": 0.23, "learning_rate": 0.0007815759910126786, "loss": 0.0779, "theoretical_loss": 3.7834170205502584, "tokens_seen": 746586112 }, { "epoch": 0.23, "learning_rate": 0.0007814957470710962, "loss": 0.0764, "theoretical_loss": 3.783284713558186, "tokens_seen": 746848256 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.001808889559470117, "objective/train/docs_used": 277055, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6275434494018555, "objective/train/original_loss": 1.627543330192566, "objective/train/theoretical_loss": 3.783152465995708, "objective/train/tokens_used": 767570400, "objective/train/value_avg": -0.01079559326171875, "objective/train/value_loss": 0.0003174249141011387, "objective/train/value_max": -0.00019109249114990234, "objective/train/value_min": -0.4189453125, "objective/train/value_reward_corr": 0.6689077552958134, "objective/train/value_std": 0.01580810546875, "objective/train/weight_avg": 1.001956582069397, "objective/train/weighted_lm_loss": 1.6307531595230103, "objective/train/weights_max": 1.1777127981185913, "objective/train/weights_min": 0.3695334792137146, "theoretical_loss": 3.783152465995708, "tokens_seen": 747110400 }, { "epoch": 0.23, "learning_rate": 0.0007814155031295137, "loss": 0.079, "theoretical_loss": 3.783152465995708, "tokens_seen": 747110400 }, { "epoch": 0.23, "learning_rate": 0.0007813352591879314, "loss": 0.077, "theoretical_loss": 3.7830202778152935, "tokens_seen": 747372544 }, { "epoch": 0.23, "learning_rate": 0.0007812550152463489, "loss": 0.0748, "theoretical_loss": 3.7828881489694632, "tokens_seen": 747634688 }, { "epoch": 0.23, "learning_rate": 0.0007811747713047665, "loss": 0.0756, "theoretical_loss": 3.7827560794107926, "tokens_seen": 747896832 }, { "epoch": 0.23, "learning_rate": 0.0007810945273631841, "loss": 0.0765, "theoretical_loss": 3.7826240690919137, "tokens_seen": 748158976 }, { "epoch": 0.23, "learning_rate": 0.0007810142834216016, "loss": 0.0792, "theoretical_loss": 3.7824921179655115, "tokens_seen": 748421120 }, { "epoch": 0.23, "learning_rate": 0.0007809340394800192, "loss": 0.0773, "theoretical_loss": 3.782360225984325, "tokens_seen": 748683264 }, { "epoch": 0.23, "learning_rate": 0.0007808537955384369, "loss": 0.0769, "theoretical_loss": 3.782228393101149, "tokens_seen": 748945408 }, { "epoch": 0.23, "learning_rate": 0.0007807735515968545, "loss": 0.0775, "theoretical_loss": 3.78209661926883, "tokens_seen": 749207552 }, { "epoch": 0.23, "learning_rate": 0.000780693307655272, "loss": 0.0771, "theoretical_loss": 3.781964904440271, "tokens_seen": 749469696 }, { "epoch": 0.23, "learning_rate": 0.0007806130637136897, "loss": 0.0795, "theoretical_loss": 3.7818332485684283, "tokens_seen": 749731840 }, { "epoch": 0.23, "learning_rate": 0.0007805328197721072, "loss": 0.0772, "theoretical_loss": 3.781701651606311, "tokens_seen": 749993984 }, { "epoch": 0.23, "learning_rate": 0.0007804525758305248, "loss": 0.0793, "theoretical_loss": 3.7815701135069846, "tokens_seen": 750256128 }, { "epoch": 0.23, "learning_rate": 0.0007803723318889424, "loss": 0.0765, "theoretical_loss": 3.7814386342235653, "tokens_seen": 750518272 }, { "epoch": 0.23, "learning_rate": 0.0007802920879473599, "loss": 0.0726, "theoretical_loss": 3.7813072137092254, "tokens_seen": 750780416 }, { "epoch": 0.23, "learning_rate": 0.0007802118440057776, "loss": 0.0749, "theoretical_loss": 3.7811758519171894, "tokens_seen": 751042560 }, { "epoch": 0.23, "learning_rate": 0.0007801316000641952, "loss": 0.0788, "theoretical_loss": 3.781044548800736, "tokens_seen": 751304704 }, { "epoch": 0.23, "learning_rate": 0.0007800513561226128, "loss": 0.0794, "theoretical_loss": 3.7809133043131973, "tokens_seen": 751566848 }, { "epoch": 0.23, "learning_rate": 0.0007799711121810304, "loss": 0.0784, "theoretical_loss": 3.7807821184079584, "tokens_seen": 751828992 }, { "epoch": 0.23, "learning_rate": 0.000779890868239448, "loss": 0.0775, "theoretical_loss": 3.780650991038459, "tokens_seen": 752091136 }, { "epoch": 0.23, "learning_rate": 0.0007798106242978655, "loss": 0.0773, "theoretical_loss": 3.7805199221581893, "tokens_seen": 752353280 }, { "epoch": 0.23, "learning_rate": 0.0007797303803562831, "loss": 0.0796, "theoretical_loss": 3.7803889117206957, "tokens_seen": 752615424 }, { "epoch": 0.23, "learning_rate": 0.0007796501364147007, "loss": 0.0788, "theoretical_loss": 3.7802579596795756, "tokens_seen": 752877568 }, { "epoch": 0.23, "learning_rate": 0.0007795698924731182, "loss": 0.0753, "theoretical_loss": 3.7801270659884807, "tokens_seen": 753139712 }, { "epoch": 0.23, "learning_rate": 0.0007794896485315359, "loss": 0.0779, "theoretical_loss": 3.7799962306011143, "tokens_seen": 753401856 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.000318468373734504, "objective/train/docs_used": 279492, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5279823541641235, "objective/train/original_loss": 1.527982473373413, "objective/train/theoretical_loss": 3.779865453471234, "objective/train/tokens_used": 774124000, "objective/train/value_avg": -0.009857177734375, "objective/train/value_loss": 0.0003417794650886208, "objective/train/value_max": -0.0001767873764038086, "objective/train/value_min": -0.51904296875, "objective/train/value_reward_corr": 0.737366687695921, "objective/train/value_std": 0.01702880859375, "objective/train/weight_avg": 1.0004727840423584, "objective/train/weighted_lm_loss": 1.528694748878479, "objective/train/weights_max": 1.1768189668655396, "objective/train/weights_min": 0.3686802387237549, "theoretical_loss": 3.779865453471234, "tokens_seen": 753664000 }, { "epoch": 0.23, "learning_rate": 0.0007794094045899534, "loss": 0.079, "theoretical_loss": 3.779865453471234, "tokens_seen": 753664000 }, { "epoch": 0.23, "learning_rate": 0.0007793291606483711, "loss": 0.0776, "theoretical_loss": 3.7797347345526484, "tokens_seen": 753926144 }, { "epoch": 0.23, "learning_rate": 0.0007792489167067887, "loss": 0.0775, "theoretical_loss": 3.7796040737992205, "tokens_seen": 754188288 }, { "epoch": 0.23, "learning_rate": 0.0007791686727652062, "loss": 0.0782, "theoretical_loss": 3.7794734711648648, "tokens_seen": 754450432 }, { "epoch": 0.23, "learning_rate": 0.0007790884288236239, "loss": 0.0791, "theoretical_loss": 3.779342926603549, "tokens_seen": 754712576 }, { "epoch": 0.23, "learning_rate": 0.0007790081848820414, "loss": 0.0767, "theoretical_loss": 3.7792124400692924, "tokens_seen": 754974720 }, { "epoch": 0.23, "learning_rate": 0.000778927940940459, "loss": 0.0761, "theoretical_loss": 3.7790820115161674, "tokens_seen": 755236864 }, { "epoch": 0.23, "learning_rate": 0.0007788476969988766, "loss": 0.079, "theoretical_loss": 3.778951640898298, "tokens_seen": 755499008 }, { "epoch": 0.23, "learning_rate": 0.0007787674530572942, "loss": 0.0779, "theoretical_loss": 3.7788213281698617, "tokens_seen": 755761152 }, { "epoch": 0.23, "learning_rate": 0.0007786872091157117, "loss": 0.0758, "theoretical_loss": 3.778691073285086, "tokens_seen": 756023296 }, { "epoch": 0.23, "learning_rate": 0.0007786069651741294, "loss": 0.0796, "theoretical_loss": 3.7785608761982523, "tokens_seen": 756285440 }, { "epoch": 0.23, "learning_rate": 0.000778526721232547, "loss": 0.0763, "theoretical_loss": 3.7784307368636934, "tokens_seen": 756547584 }, { "epoch": 0.23, "learning_rate": 0.0007784464772909645, "loss": 0.0754, "theoretical_loss": 3.7783006552357934, "tokens_seen": 756809728 }, { "epoch": 0.23, "learning_rate": 0.0007783662333493822, "loss": 0.0791, "theoretical_loss": 3.7781706312689893, "tokens_seen": 757071872 }, { "epoch": 0.23, "learning_rate": 0.0007782859894077997, "loss": 0.0778, "theoretical_loss": 3.7780406649177696, "tokens_seen": 757334016 }, { "epoch": 0.23, "learning_rate": 0.0007782057454662173, "loss": 0.0771, "theoretical_loss": 3.777910756136673, "tokens_seen": 757596160 }, { "epoch": 0.23, "learning_rate": 0.0007781255015246349, "loss": 0.0779, "theoretical_loss": 3.777780904880292, "tokens_seen": 757858304 }, { "epoch": 0.23, "learning_rate": 0.0007780452575830524, "loss": 0.0791, "theoretical_loss": 3.777651111103269, "tokens_seen": 758120448 }, { "epoch": 0.23, "learning_rate": 0.0007779650136414702, "loss": 0.0767, "theoretical_loss": 3.777521374760298, "tokens_seen": 758382592 }, { "epoch": 0.23, "learning_rate": 0.0007778847696998877, "loss": 0.0794, "theoretical_loss": 3.7773916958061253, "tokens_seen": 758644736 }, { "epoch": 0.23, "learning_rate": 0.0007778045257583053, "loss": 0.081, "theoretical_loss": 3.777262074195548, "tokens_seen": 758906880 }, { "epoch": 0.23, "learning_rate": 0.0007777242818167229, "loss": 0.079, "theoretical_loss": 3.777132509883413, "tokens_seen": 759169024 }, { "epoch": 0.23, "learning_rate": 0.0007776440378751405, "loss": 0.0786, "theoretical_loss": 3.7770030028246215, "tokens_seen": 759431168 }, { "epoch": 0.23, "learning_rate": 0.000777563793933558, "loss": 0.0776, "theoretical_loss": 3.7768735529741226, "tokens_seen": 759693312 }, { "epoch": 0.23, "learning_rate": 0.0007774835499919756, "loss": 0.0756, "theoretical_loss": 3.776744160286918, "tokens_seen": 759955456 }, { "epoch": 0.23, "objective/train/advantage_avg": -0.00012553480337373912, "objective/train/docs_used": 281753, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.536434531211853, "objective/train/original_loss": 1.5364346504211426, "objective/train/theoretical_loss": 3.77661482471806, "objective/train/tokens_used": 780677600, "objective/train/value_avg": -0.009521484375, "objective/train/value_loss": 0.0005207026842981577, "objective/train/value_max": -0.00010311603546142578, "objective/train/value_min": -0.755859375, "objective/train/value_reward_corr": 0.614135921117643, "objective/train/value_std": 0.01593017578125, "objective/train/weight_avg": 1.0001031160354614, "objective/train/weighted_lm_loss": 1.5369855165481567, "objective/train/weights_max": 1.5545350313186646, "objective/train/weights_min": 0.26852282881736755, "theoretical_loss": 3.77661482471806, "tokens_seen": 760217600 }, { "epoch": 0.23, "learning_rate": 0.0007774033060503932, "loss": 0.0804, "theoretical_loss": 3.77661482471806, "tokens_seen": 760217600 }, { "epoch": 0.23, "learning_rate": 0.0007773230621088107, "loss": 0.0797, "theoretical_loss": 3.776485546222651, "tokens_seen": 760479744 }, { "epoch": 0.23, "learning_rate": 0.0007772428181672284, "loss": 0.0783, "theoretical_loss": 3.776356324755847, "tokens_seen": 760741888 }, { "epoch": 0.23, "learning_rate": 0.000777162574225646, "loss": 0.0766, "theoretical_loss": 3.7762271602728497, "tokens_seen": 761004032 }, { "epoch": 0.23, "learning_rate": 0.0007770823302840636, "loss": 0.0784, "theoretical_loss": 3.7760980527289156, "tokens_seen": 761266176 }, { "epoch": 0.23, "learning_rate": 0.0007770020863424812, "loss": 0.0756, "theoretical_loss": 3.77596900207935, "tokens_seen": 761528320 }, { "epoch": 0.23, "learning_rate": 0.0007769218424008987, "loss": 0.076, "theoretical_loss": 3.775840008279509, "tokens_seen": 761790464 }, { "epoch": 0.23, "learning_rate": 0.0007768415984593163, "loss": 0.0785, "theoretical_loss": 3.7757110712847997, "tokens_seen": 762052608 }, { "epoch": 0.23, "learning_rate": 0.0007767613545177339, "loss": 0.0791, "theoretical_loss": 3.775582191050678, "tokens_seen": 762314752 }, { "epoch": 0.23, "learning_rate": 0.0007766811105761515, "loss": 0.0781, "theoretical_loss": 3.775453367532651, "tokens_seen": 762576896 }, { "epoch": 0.23, "learning_rate": 0.0007766008666345691, "loss": 0.0781, "theoretical_loss": 3.775324600686276, "tokens_seen": 762839040 }, { "epoch": 0.23, "learning_rate": 0.0007765206226929867, "loss": 0.0774, "theoretical_loss": 3.7751958904671614, "tokens_seen": 763101184 }, { "epoch": 0.23, "learning_rate": 0.0007764403787514042, "loss": 0.0785, "theoretical_loss": 3.7750672368309623, "tokens_seen": 763363328 }, { "epoch": 0.23, "learning_rate": 0.000776360134809822, "loss": 0.0812, "theoretical_loss": 3.7749386397333873, "tokens_seen": 763625472 }, { "epoch": 0.23, "learning_rate": 0.0007762798908682395, "loss": 0.0782, "theoretical_loss": 3.774810099130193, "tokens_seen": 763887616 }, { "epoch": 0.23, "learning_rate": 0.000776199646926657, "loss": 0.0785, "theoretical_loss": 3.7746816149771862, "tokens_seen": 764149760 }, { "epoch": 0.23, "learning_rate": 0.0007761194029850747, "loss": 0.0772, "theoretical_loss": 3.7745531872302234, "tokens_seen": 764411904 }, { "epoch": 0.23, "learning_rate": 0.0007760391590434922, "loss": 0.078, "theoretical_loss": 3.774424815845211, "tokens_seen": 764674048 }, { "epoch": 0.23, "learning_rate": 0.0007759589151019098, "loss": 0.0778, "theoretical_loss": 3.774296500778105, "tokens_seen": 764936192 }, { "epoch": 0.23, "learning_rate": 0.0007758786711603274, "loss": 0.0783, "theoretical_loss": 3.77416824198491, "tokens_seen": 765198336 }, { "epoch": 0.23, "learning_rate": 0.000775798427218745, "loss": 0.0762, "theoretical_loss": 3.7740400394216813, "tokens_seen": 765460480 }, { "epoch": 0.23, "learning_rate": 0.0007757181832771625, "loss": 0.081, "theoretical_loss": 3.7739118930445223, "tokens_seen": 765722624 }, { "epoch": 0.23, "learning_rate": 0.0007756379393355802, "loss": 0.0798, "theoretical_loss": 3.7737838028095867, "tokens_seen": 765984768 }, { "epoch": 0.23, "learning_rate": 0.0007755576953939978, "loss": 0.0771, "theoretical_loss": 3.773655768673077, "tokens_seen": 766246912 }, { "epoch": 0.23, "learning_rate": 0.0007754774514524153, "loss": 0.0804, "theoretical_loss": 3.7735277905912445, "tokens_seen": 766509056 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.0010230033658444881, "objective/train/docs_used": 283988, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5113848447799683, "objective/train/original_loss": 1.5113848447799683, "objective/train/theoretical_loss": 3.773399868520391, "objective/train/tokens_used": 787231200, "objective/train/value_avg": -0.009429931640625, "objective/train/value_loss": 0.00033447632449679077, "objective/train/value_max": -0.00022518634796142578, "objective/train/value_min": -0.59326171875, "objective/train/value_reward_corr": 0.6839028954132548, "objective/train/value_std": 0.0179901123046875, "objective/train/weight_avg": 1.0011742115020752, "objective/train/weighted_lm_loss": 1.5128288269042969, "objective/train/weights_max": 1.5810500383377075, "objective/train/weights_min": 0.370164155960083, "theoretical_loss": 3.773399868520391, "tokens_seen": 766771200 }, { "epoch": 0.23, "learning_rate": 0.000775397207510833, "loss": 0.0784, "theoretical_loss": 3.773399868520391, "tokens_seen": 766771200 }, { "epoch": 0.23, "learning_rate": 0.0007753169635692505, "loss": 0.0784, "theoretical_loss": 3.7732720024168644, "tokens_seen": 767033344 }, { "epoch": 0.23, "learning_rate": 0.0007752367196276682, "loss": 0.0771, "theoretical_loss": 3.773144192237065, "tokens_seen": 767295488 }, { "epoch": 0.23, "learning_rate": 0.0007751564756860857, "loss": 0.078, "theoretical_loss": 3.7730164379374402, "tokens_seen": 767557632 }, { "epoch": 0.23, "learning_rate": 0.0007750762317445032, "loss": 0.0775, "theoretical_loss": 3.772888739474485, "tokens_seen": 767819776 }, { "epoch": 0.23, "learning_rate": 0.000774995987802921, "loss": 0.0768, "theoretical_loss": 3.772761096804745, "tokens_seen": 768081920 }, { "epoch": 0.23, "learning_rate": 0.0007749157438613385, "loss": 0.0777, "theoretical_loss": 3.7726335098848143, "tokens_seen": 768344064 }, { "epoch": 0.23, "learning_rate": 0.0007748354999197561, "loss": 0.0773, "theoretical_loss": 3.772505978671334, "tokens_seen": 768606208 }, { "epoch": 0.23, "learning_rate": 0.0007747552559781737, "loss": 0.081, "theoretical_loss": 3.772378503120996, "tokens_seen": 768868352 }, { "epoch": 0.23, "learning_rate": 0.0007746750120365913, "loss": 0.0785, "theoretical_loss": 3.7722510831905387, "tokens_seen": 769130496 }, { "epoch": 0.23, "learning_rate": 0.0007745947680950088, "loss": 0.0791, "theoretical_loss": 3.7721237188367494, "tokens_seen": 769392640 }, { "epoch": 0.23, "learning_rate": 0.0007745145241534264, "loss": 0.0773, "theoretical_loss": 3.771996410016464, "tokens_seen": 769654784 }, { "epoch": 0.23, "learning_rate": 0.000774434280211844, "loss": 0.0756, "theoretical_loss": 3.7718691566865665, "tokens_seen": 769916928 }, { "epoch": 0.23, "learning_rate": 0.0007743540362702615, "loss": 0.0781, "theoretical_loss": 3.7717419588039887, "tokens_seen": 770179072 }, { "epoch": 0.23, "learning_rate": 0.0007742737923286792, "loss": 0.0779, "theoretical_loss": 3.7716148163257115, "tokens_seen": 770441216 }, { "epoch": 0.23, "learning_rate": 0.0007741935483870968, "loss": 0.0748, "theoretical_loss": 3.7714877292087623, "tokens_seen": 770703360 }, { "epoch": 0.23, "learning_rate": 0.0007741133044455145, "loss": 0.0772, "theoretical_loss": 3.7713606974102167, "tokens_seen": 770965504 }, { "epoch": 0.23, "learning_rate": 0.000774033060503932, "loss": 0.0807, "theoretical_loss": 3.7712337208872, "tokens_seen": 771227648 }, { "epoch": 0.23, "learning_rate": 0.0007739528165623495, "loss": 0.0764, "theoretical_loss": 3.7711067995968826, "tokens_seen": 771489792 }, { "epoch": 0.23, "learning_rate": 0.0007738725726207672, "loss": 0.0797, "theoretical_loss": 3.770979933496485, "tokens_seen": 771751936 }, { "epoch": 0.23, "learning_rate": 0.0007737923286791847, "loss": 0.0769, "theoretical_loss": 3.770853122543274, "tokens_seen": 772014080 }, { "epoch": 0.23, "learning_rate": 0.0007737120847376023, "loss": 0.0752, "theoretical_loss": 3.770726366694564, "tokens_seen": 772276224 }, { "epoch": 0.23, "learning_rate": 0.0007736318407960199, "loss": 0.0788, "theoretical_loss": 3.7705996659077172, "tokens_seen": 772538368 }, { "epoch": 0.23, "learning_rate": 0.0007735515968544375, "loss": 0.0765, "theoretical_loss": 3.770473020140143, "tokens_seen": 772800512 }, { "epoch": 0.23, "learning_rate": 0.000773471352912855, "loss": 0.0816, "theoretical_loss": 3.770346429349299, "tokens_seen": 773062656 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.0005200718878768384, "objective/train/docs_used": 286255, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5496317148208618, "objective/train/original_loss": 1.5496317148208618, "objective/train/theoretical_loss": 3.7702198934926896, "objective/train/tokens_used": 793784800, "objective/train/value_avg": -0.00848388671875, "objective/train/value_loss": 0.0003523084451444447, "objective/train/value_max": -0.0001302957534790039, "objective/train/value_min": -0.4052734375, "objective/train/value_reward_corr": 0.5911265747381873, "objective/train/value_std": 0.01352691650390625, "objective/train/weight_avg": 1.000672698020935, "objective/train/weighted_lm_loss": 1.550767421722412, "objective/train/weights_max": 1.2783743143081665, "objective/train/weights_min": 0.36907142400741577, "theoretical_loss": 3.7702198934926896, "tokens_seen": 773324800 }, { "epoch": 0.23, "learning_rate": 0.0007733911089712728, "loss": 0.0771, "theoretical_loss": 3.7702198934926896, "tokens_seen": 773324800 }, { "epoch": 0.23, "learning_rate": 0.0007733108650296903, "loss": 0.0765, "theoretical_loss": 3.7700934125278653, "tokens_seen": 773586944 }, { "epoch": 0.23, "learning_rate": 0.0007732306210881078, "loss": 0.0802, "theoretical_loss": 3.7699669864124266, "tokens_seen": 773849088 }, { "epoch": 0.23, "learning_rate": 0.0007731503771465255, "loss": 0.0774, "theoretical_loss": 3.769840615104018, "tokens_seen": 774111232 }, { "epoch": 0.23, "learning_rate": 0.000773070133204943, "loss": 0.0806, "theoretical_loss": 3.7697142985603325, "tokens_seen": 774373376 }, { "epoch": 0.23, "learning_rate": 0.0007729898892633606, "loss": 0.0798, "theoretical_loss": 3.76958803673911, "tokens_seen": 774635520 }, { "epoch": 0.23, "learning_rate": 0.0007729096453217782, "loss": 0.0797, "theoretical_loss": 3.7694618295981375, "tokens_seen": 774897664 }, { "epoch": 0.23, "learning_rate": 0.0007728294013801958, "loss": 0.0777, "theoretical_loss": 3.769335677095248, "tokens_seen": 775159808 }, { "epoch": 0.23, "learning_rate": 0.0007727491574386135, "loss": 0.0804, "theoretical_loss": 3.769209579188323, "tokens_seen": 775421952 }, { "epoch": 0.24, "learning_rate": 0.000772668913497031, "loss": 0.0778, "theoretical_loss": 3.7690835358352883, "tokens_seen": 775684096 }, { "epoch": 0.24, "learning_rate": 0.0007725886695554486, "loss": 0.0804, "theoretical_loss": 3.7689575469941183, "tokens_seen": 775946240 }, { "epoch": 0.24, "learning_rate": 0.0007725084256138662, "loss": 0.079, "theoretical_loss": 3.768831612622833, "tokens_seen": 776208384 }, { "epoch": 0.24, "learning_rate": 0.0007724281816722838, "loss": 0.0773, "theoretical_loss": 3.7687057326794986, "tokens_seen": 776470528 }, { "epoch": 0.24, "learning_rate": 0.0007723479377307013, "loss": 0.0805, "theoretical_loss": 3.768579907122229, "tokens_seen": 776732672 }, { "epoch": 0.24, "learning_rate": 0.000772267693789119, "loss": 0.078, "theoretical_loss": 3.768454135909183, "tokens_seen": 776994816 }, { "epoch": 0.24, "learning_rate": 0.0007721874498475365, "loss": 0.078, "theoretical_loss": 3.768328418998567, "tokens_seen": 777256960 }, { "epoch": 0.24, "learning_rate": 0.000772107205905954, "loss": 0.0777, "theoretical_loss": 3.7682027563486327, "tokens_seen": 777519104 }, { "epoch": 0.24, "learning_rate": 0.0007720269619643718, "loss": 0.0805, "theoretical_loss": 3.768077147917678, "tokens_seen": 777781248 }, { "epoch": 0.24, "learning_rate": 0.0007719467180227893, "loss": 0.0771, "theoretical_loss": 3.7679515936640477, "tokens_seen": 778043392 }, { "epoch": 0.24, "learning_rate": 0.0007718664740812069, "loss": 0.0757, "theoretical_loss": 3.7678260935461316, "tokens_seen": 778305536 }, { "epoch": 0.24, "learning_rate": 0.0007717862301396245, "loss": 0.074, "theoretical_loss": 3.767700647522366, "tokens_seen": 778567680 }, { "epoch": 0.24, "learning_rate": 0.0007717059861980421, "loss": 0.0735, "theoretical_loss": 3.7675752555512334, "tokens_seen": 778829824 }, { "epoch": 0.24, "learning_rate": 0.0007716257422564597, "loss": 0.0782, "theoretical_loss": 3.7674499175912617, "tokens_seen": 779091968 }, { "epoch": 0.24, "learning_rate": 0.0007715454983148772, "loss": 0.0774, "theoretical_loss": 3.767324633601024, "tokens_seen": 779354112 }, { "epoch": 0.24, "learning_rate": 0.0007714652543732948, "loss": 0.0786, "theoretical_loss": 3.7671994035391405, "tokens_seen": 779616256 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.0016698199324309826, "objective/train/docs_used": 288522, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6363639831542969, "objective/train/original_loss": 1.6363639831542969, "objective/train/theoretical_loss": 3.767074227364275, "objective/train/tokens_used": 800338400, "objective/train/value_avg": -0.00970458984375, "objective/train/value_loss": 0.0003810957132373005, "objective/train/value_max": -0.00016224384307861328, "objective/train/value_min": -0.76025390625, "objective/train/value_reward_corr": 0.7845589972125679, "objective/train/value_std": 0.0241241455078125, "objective/train/weight_avg": 1.0018428564071655, "objective/train/weighted_lm_loss": 1.6393061876296997, "objective/train/weights_max": 1.5092625617980957, "objective/train/weights_min": 0.36901721358299255, "theoretical_loss": 3.767074227364275, "tokens_seen": 779878400 }, { "epoch": 0.24, "learning_rate": 0.0007713850104317124, "loss": 0.0804, "theoretical_loss": 3.767074227364275, "tokens_seen": 779878400 }, { "epoch": 0.24, "learning_rate": 0.00077130476649013, "loss": 0.0804, "theoretical_loss": 3.7669491050351396, "tokens_seen": 780140544 }, { "epoch": 0.24, "learning_rate": 0.0007712245225485476, "loss": 0.0792, "theoretical_loss": 3.7668240365104895, "tokens_seen": 780402688 }, { "epoch": 0.24, "learning_rate": 0.0007711442786069653, "loss": 0.0796, "theoretical_loss": 3.7666990217491265, "tokens_seen": 780664832 }, { "epoch": 0.24, "learning_rate": 0.0007710640346653828, "loss": 0.0777, "theoretical_loss": 3.7665740607098974, "tokens_seen": 780926976 }, { "epoch": 0.24, "learning_rate": 0.0007709837907238003, "loss": 0.077, "theoretical_loss": 3.7664491533516946, "tokens_seen": 781189120 }, { "epoch": 0.24, "learning_rate": 0.000770903546782218, "loss": 0.0771, "theoretical_loss": 3.766324299633455, "tokens_seen": 781451264 }, { "epoch": 0.24, "learning_rate": 0.0007708233028406355, "loss": 0.0786, "theoretical_loss": 3.766199499514162, "tokens_seen": 781713408 }, { "epoch": 0.24, "learning_rate": 0.0007707430588990531, "loss": 0.0753, "theoretical_loss": 3.7660747529528424, "tokens_seen": 781975552 }, { "epoch": 0.24, "learning_rate": 0.0007706628149574707, "loss": 0.0789, "theoretical_loss": 3.76595005990857, "tokens_seen": 782237696 }, { "epoch": 0.24, "learning_rate": 0.0007705825710158883, "loss": 0.0751, "theoretical_loss": 3.7658254203404615, "tokens_seen": 782499840 }, { "epoch": 0.24, "learning_rate": 0.0007705023270743058, "loss": 0.0771, "theoretical_loss": 3.7657008342076796, "tokens_seen": 782761984 }, { "epoch": 0.24, "learning_rate": 0.0007704220831327236, "loss": 0.076, "theoretical_loss": 3.765576301469433, "tokens_seen": 783024128 }, { "epoch": 0.24, "learning_rate": 0.0007703418391911411, "loss": 0.0775, "theoretical_loss": 3.7654518220849726, "tokens_seen": 783286272 }, { "epoch": 0.24, "learning_rate": 0.0007702615952495587, "loss": 0.0768, "theoretical_loss": 3.7653273960135962, "tokens_seen": 783548416 }, { "epoch": 0.24, "learning_rate": 0.0007701813513079763, "loss": 0.0809, "theoretical_loss": 3.765203023214645, "tokens_seen": 783810560 }, { "epoch": 0.24, "learning_rate": 0.0007701011073663938, "loss": 0.0754, "theoretical_loss": 3.7650787036475055, "tokens_seen": 784072704 }, { "epoch": 0.24, "learning_rate": 0.0007700208634248115, "loss": 0.0765, "theoretical_loss": 3.764954437271608, "tokens_seen": 784334848 }, { "epoch": 0.24, "learning_rate": 0.000769940619483229, "loss": 0.0718, "theoretical_loss": 3.7648302240464284, "tokens_seen": 784596992 }, { "epoch": 0.24, "learning_rate": 0.0007698603755416466, "loss": 0.0762, "theoretical_loss": 3.764706063931486, "tokens_seen": 784859136 }, { "epoch": 0.24, "learning_rate": 0.0007697801316000643, "loss": 0.0783, "theoretical_loss": 3.764581956886345, "tokens_seen": 785121280 }, { "epoch": 0.24, "learning_rate": 0.0007696998876584818, "loss": 0.0777, "theoretical_loss": 3.7644579028706135, "tokens_seen": 785383424 }, { "epoch": 0.24, "learning_rate": 0.0007696196437168994, "loss": 0.0756, "theoretical_loss": 3.764333901843944, "tokens_seen": 785645568 }, { "epoch": 0.24, "learning_rate": 0.000769539399775317, "loss": 0.0771, "theoretical_loss": 3.764209953766033, "tokens_seen": 785907712 }, { "epoch": 0.24, "learning_rate": 0.0007694591558337346, "loss": 0.0753, "theoretical_loss": 3.7640860585966207, "tokens_seen": 786169856 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.0009356484515592456, "objective/train/docs_used": 290810, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.523367166519165, "objective/train/original_loss": 1.523367166519165, "objective/train/theoretical_loss": 3.763962216295493, "objective/train/tokens_used": 806892000, "objective/train/value_avg": -0.007610321044921875, "objective/train/value_loss": 0.0002046849112957716, "objective/train/value_max": -0.0001838207244873047, "objective/train/value_min": -0.347900390625, "objective/train/value_reward_corr": 0.6702344519054119, "objective/train/value_std": 0.01255035400390625, "objective/train/weight_avg": 1.0010277032852173, "objective/train/weighted_lm_loss": 1.5253008604049683, "objective/train/weights_max": 1.1150742769241333, "objective/train/weights_min": 0.3685617744922638, "theoretical_loss": 3.763962216295493, "tokens_seen": 786432000 }, { "epoch": 0.24, "learning_rate": 0.0007693789118921521, "loss": 0.0757, "theoretical_loss": 3.763962216295493, "tokens_seen": 786432000 }, { "epoch": 0.24, "learning_rate": 0.0007692986679505698, "loss": 0.0761, "theoretical_loss": 3.7638384268224776, "tokens_seen": 786694144 }, { "epoch": 0.24, "learning_rate": 0.0007692184240089873, "loss": 0.0773, "theoretical_loss": 3.7637146901374474, "tokens_seen": 786956288 }, { "epoch": 0.24, "learning_rate": 0.0007691381800674048, "loss": 0.0785, "theoretical_loss": 3.7635910062003193, "tokens_seen": 787218432 }, { "epoch": 0.24, "learning_rate": 0.0007690579361258225, "loss": 0.0765, "theoretical_loss": 3.7634673749710523, "tokens_seen": 787480576 }, { "epoch": 0.24, "learning_rate": 0.00076897769218424, "loss": 0.0753, "theoretical_loss": 3.763343796409651, "tokens_seen": 787742720 }, { "epoch": 0.24, "learning_rate": 0.0007688974482426578, "loss": 0.076, "theoretical_loss": 3.7632202704761637, "tokens_seen": 788004864 }, { "epoch": 0.24, "learning_rate": 0.0007688172043010753, "loss": 0.0756, "theoretical_loss": 3.7630967971306797, "tokens_seen": 788267008 }, { "epoch": 0.24, "learning_rate": 0.0007687369603594929, "loss": 0.0767, "theoretical_loss": 3.762973376333335, "tokens_seen": 788529152 }, { "epoch": 0.24, "learning_rate": 0.0007686567164179105, "loss": 0.0775, "theoretical_loss": 3.7628500080443077, "tokens_seen": 788791296 }, { "epoch": 0.24, "learning_rate": 0.000768576472476328, "loss": 0.077, "theoretical_loss": 3.7627266922238185, "tokens_seen": 789053440 }, { "epoch": 0.24, "learning_rate": 0.0007684962285347456, "loss": 0.0768, "theoretical_loss": 3.762603428832133, "tokens_seen": 789315584 }, { "epoch": 0.24, "learning_rate": 0.0007684159845931632, "loss": 0.0751, "theoretical_loss": 3.7624802178295584, "tokens_seen": 789577728 }, { "epoch": 0.24, "learning_rate": 0.0007683357406515808, "loss": 0.0778, "theoretical_loss": 3.762357059176447, "tokens_seen": 789839872 }, { "epoch": 0.24, "learning_rate": 0.0007682554967099984, "loss": 0.0773, "theoretical_loss": 3.762233952833193, "tokens_seen": 790102016 }, { "epoch": 0.24, "learning_rate": 0.0007681752527684161, "loss": 0.0798, "theoretical_loss": 3.7621108987602336, "tokens_seen": 790364160 }, { "epoch": 0.24, "learning_rate": 0.0007680950088268336, "loss": 0.0764, "theoretical_loss": 3.76198789691805, "tokens_seen": 790626304 }, { "epoch": 0.24, "learning_rate": 0.0007680147648852511, "loss": 0.0798, "theoretical_loss": 3.7618649472671652, "tokens_seen": 790888448 }, { "epoch": 0.24, "learning_rate": 0.0007679345209436688, "loss": 0.0781, "theoretical_loss": 3.761742049768146, "tokens_seen": 791150592 }, { "epoch": 0.24, "learning_rate": 0.0007678542770020863, "loss": 0.0789, "theoretical_loss": 3.761619204381602, "tokens_seen": 791412736 }, { "epoch": 0.24, "learning_rate": 0.000767774033060504, "loss": 0.0751, "theoretical_loss": 3.7614964110681846, "tokens_seen": 791674880 }, { "epoch": 0.24, "learning_rate": 0.0007676937891189215, "loss": 0.0769, "theoretical_loss": 3.761373669788589, "tokens_seen": 791937024 }, { "epoch": 0.24, "learning_rate": 0.0007676135451773391, "loss": 0.0765, "theoretical_loss": 3.7612509805035526, "tokens_seen": 792199168 }, { "epoch": 0.24, "learning_rate": 0.0007675333012357568, "loss": 0.075, "theoretical_loss": 3.761128343173856, "tokens_seen": 792461312 }, { "epoch": 0.24, "learning_rate": 0.0007674530572941743, "loss": 0.0755, "theoretical_loss": 3.7610057577603215, "tokens_seen": 792723456 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.0012367293238639832, "objective/train/docs_used": 293128, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5767234563827515, "objective/train/original_loss": 1.576723575592041, "objective/train/theoretical_loss": 3.7608832242238144, "objective/train/tokens_used": 813445600, "objective/train/value_avg": -0.0086669921875, "objective/train/value_loss": 0.00022169639123603702, "objective/train/value_max": -0.0002472400665283203, "objective/train/value_min": -0.58544921875, "objective/train/value_reward_corr": 0.6488604890647144, "objective/train/value_std": 0.0127410888671875, "objective/train/weight_avg": 1.001336932182312, "objective/train/weighted_lm_loss": 1.5789175033569336, "objective/train/weights_max": 1.3074339628219604, "objective/train/weights_min": 0.36864086985588074, "theoretical_loss": 3.7608832242238144, "tokens_seen": 792985600 }, { "epoch": 0.24, "learning_rate": 0.0007673728133525919, "loss": 0.0782, "theoretical_loss": 3.7608832242238144, "tokens_seen": 792985600 }, { "epoch": 0.24, "learning_rate": 0.0007672925694110095, "loss": 0.0778, "theoretical_loss": 3.7607607425252416, "tokens_seen": 793247744 }, { "epoch": 0.24, "learning_rate": 0.0007672123254694271, "loss": 0.0788, "theoretical_loss": 3.7606383126255536, "tokens_seen": 793509888 }, { "epoch": 0.24, "learning_rate": 0.0007671320815278446, "loss": 0.0792, "theoretical_loss": 3.760515934485743, "tokens_seen": 793772032 }, { "epoch": 0.24, "learning_rate": 0.0007670518375862623, "loss": 0.0776, "theoretical_loss": 3.760393608066843, "tokens_seen": 794034176 }, { "epoch": 0.24, "learning_rate": 0.0007669715936446798, "loss": 0.0759, "theoretical_loss": 3.760271333329932, "tokens_seen": 794296320 }, { "epoch": 0.24, "learning_rate": 0.0007668913497030974, "loss": 0.0779, "theoretical_loss": 3.7601491102361275, "tokens_seen": 794558464 }, { "epoch": 0.24, "learning_rate": 0.000766811105761515, "loss": 0.0776, "theoretical_loss": 3.7600269387465914, "tokens_seen": 794820608 }, { "epoch": 0.24, "learning_rate": 0.0007667308618199326, "loss": 0.078, "theoretical_loss": 3.759904818822525, "tokens_seen": 795082752 }, { "epoch": 0.24, "learning_rate": 0.0007666506178783502, "loss": 0.0787, "theoretical_loss": 3.759782750425175, "tokens_seen": 795344896 }, { "epoch": 0.24, "learning_rate": 0.0007665703739367678, "loss": 0.076, "theoretical_loss": 3.759660733515826, "tokens_seen": 795607040 }, { "epoch": 0.24, "learning_rate": 0.0007664901299951854, "loss": 0.0765, "theoretical_loss": 3.7595387680558088, "tokens_seen": 795869184 }, { "epoch": 0.24, "learning_rate": 0.000766409886053603, "loss": 0.0791, "theoretical_loss": 3.759416854006492, "tokens_seen": 796131328 }, { "epoch": 0.24, "learning_rate": 0.0007663296421120206, "loss": 0.0793, "theoretical_loss": 3.7592949913292886, "tokens_seen": 796393472 }, { "epoch": 0.24, "learning_rate": 0.0007662493981704381, "loss": 0.0786, "theoretical_loss": 3.759173179985652, "tokens_seen": 796655616 }, { "epoch": 0.24, "learning_rate": 0.0007661691542288557, "loss": 0.0747, "theoretical_loss": 3.7590514199370775, "tokens_seen": 796917760 }, { "epoch": 0.24, "learning_rate": 0.0007660889102872733, "loss": 0.0735, "theoretical_loss": 3.758929711145101, "tokens_seen": 797179904 }, { "epoch": 0.24, "learning_rate": 0.0007660086663456909, "loss": 0.0756, "theoretical_loss": 3.758808053571302, "tokens_seen": 797442048 }, { "epoch": 0.24, "learning_rate": 0.0007659284224041086, "loss": 0.0805, "theoretical_loss": 3.7586864471772996, "tokens_seen": 797704192 }, { "epoch": 0.24, "learning_rate": 0.0007658481784625261, "loss": 0.0771, "theoretical_loss": 3.758564891924755, "tokens_seen": 797966336 }, { "epoch": 0.24, "learning_rate": 0.0007657679345209437, "loss": 0.0792, "theoretical_loss": 3.758443387775371, "tokens_seen": 798228480 }, { "epoch": 0.24, "learning_rate": 0.0007656876905793613, "loss": 0.0762, "theoretical_loss": 3.7583219346908905, "tokens_seen": 798490624 }, { "epoch": 0.24, "learning_rate": 0.0007656074466377788, "loss": 0.077, "theoretical_loss": 3.758200532633099, "tokens_seen": 798752768 }, { "epoch": 0.24, "learning_rate": 0.0007655272026961964, "loss": 0.0767, "theoretical_loss": 3.7580791815638213, "tokens_seen": 799014912 }, { "epoch": 0.24, "learning_rate": 0.000765446958754614, "loss": 0.0782, "theoretical_loss": 3.7579578814449253, "tokens_seen": 799277056 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.0012315761996433139, "objective/train/docs_used": 295724, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5393258333206177, "objective/train/original_loss": 1.5393258333206177, "objective/train/theoretical_loss": 3.7578366322383188, "objective/train/tokens_used": 819999200, "objective/train/value_avg": -0.007904052734375, "objective/train/value_loss": 0.00038409262197092175, "objective/train/value_max": -0.00012433528900146484, "objective/train/value_min": -0.87158203125, "objective/train/value_reward_corr": 0.6161805024099198, "objective/train/value_std": 0.0164794921875, "objective/train/weight_avg": 1.0014045238494873, "objective/train/weighted_lm_loss": 1.5412237644195557, "objective/train/weights_max": 1.616646409034729, "objective/train/weights_min": 0.3682824373245239, "theoretical_loss": 3.7578366322383188, "tokens_seen": 799539200 }, { "epoch": 0.24, "learning_rate": 0.0007653667148130316, "loss": 0.0778, "theoretical_loss": 3.7578366322383188, "tokens_seen": 799539200 }, { "epoch": 0.24, "learning_rate": 0.0007652864708714491, "loss": 0.0762, "theoretical_loss": 3.7577154339059504, "tokens_seen": 799801344 }, { "epoch": 0.24, "learning_rate": 0.0007652062269298669, "loss": 0.0768, "theoretical_loss": 3.7575942864098106, "tokens_seen": 800063488 }, { "epoch": 0.24, "learning_rate": 0.0007651259829882844, "loss": 0.0755, "theoretical_loss": 3.75747318971193, "tokens_seen": 800325632 }, { "epoch": 0.24, "learning_rate": 0.000765045739046702, "loss": 0.0757, "theoretical_loss": 3.7573521437743795, "tokens_seen": 800587776 }, { "epoch": 0.24, "learning_rate": 0.0007649654951051196, "loss": 0.0775, "theoretical_loss": 3.7572311485592715, "tokens_seen": 800849920 }, { "epoch": 0.24, "learning_rate": 0.0007648852511635371, "loss": 0.0754, "theoretical_loss": 3.7571102040287596, "tokens_seen": 801112064 }, { "epoch": 0.24, "learning_rate": 0.0007648050072219548, "loss": 0.0803, "theoretical_loss": 3.7569893101450367, "tokens_seen": 801374208 }, { "epoch": 0.24, "learning_rate": 0.0007647247632803723, "loss": 0.0804, "theoretical_loss": 3.756868466870337, "tokens_seen": 801636352 }, { "epoch": 0.24, "learning_rate": 0.0007646445193387899, "loss": 0.078, "theoretical_loss": 3.7567476741669346, "tokens_seen": 801898496 }, { "epoch": 0.24, "learning_rate": 0.0007645642753972076, "loss": 0.0778, "theoretical_loss": 3.756626931997145, "tokens_seen": 802160640 }, { "epoch": 0.24, "learning_rate": 0.0007644840314556251, "loss": 0.0745, "theoretical_loss": 3.7565062403233234, "tokens_seen": 802422784 }, { "epoch": 0.24, "learning_rate": 0.0007644037875140427, "loss": 0.0742, "theoretical_loss": 3.7563855991078654, "tokens_seen": 802684928 }, { "epoch": 0.24, "learning_rate": 0.0007643235435724603, "loss": 0.0725, "theoretical_loss": 3.7562650083132074, "tokens_seen": 802947072 }, { "epoch": 0.24, "learning_rate": 0.0007642432996308779, "loss": 0.0782, "theoretical_loss": 3.756144467901825, "tokens_seen": 803209216 }, { "epoch": 0.24, "learning_rate": 0.0007641630556892954, "loss": 0.0777, "theoretical_loss": 3.756023977836235, "tokens_seen": 803471360 }, { "epoch": 0.24, "learning_rate": 0.0007640828117477131, "loss": 0.0767, "theoretical_loss": 3.755903538078994, "tokens_seen": 803733504 }, { "epoch": 0.24, "learning_rate": 0.0007640025678061306, "loss": 0.0803, "theoretical_loss": 3.7557831485926982, "tokens_seen": 803995648 }, { "epoch": 0.24, "learning_rate": 0.0007639223238645483, "loss": 0.0786, "theoretical_loss": 3.7556628093399835, "tokens_seen": 804257792 }, { "epoch": 0.24, "learning_rate": 0.0007638420799229659, "loss": 0.0802, "theoretical_loss": 3.7555425202835275, "tokens_seen": 804519936 }, { "epoch": 0.24, "learning_rate": 0.0007637618359813834, "loss": 0.0736, "theoretical_loss": 3.7554222813860463, "tokens_seen": 804782080 }, { "epoch": 0.24, "learning_rate": 0.0007636815920398011, "loss": 0.0769, "theoretical_loss": 3.7553020926102954, "tokens_seen": 805044224 }, { "epoch": 0.24, "learning_rate": 0.0007636013480982186, "loss": 0.0786, "theoretical_loss": 3.755181953919071, "tokens_seen": 805306368 }, { "epoch": 0.24, "learning_rate": 0.0007635211041566362, "loss": 0.0769, "theoretical_loss": 3.755061865275209, "tokens_seen": 805568512 }, { "epoch": 0.24, "learning_rate": 0.0007634408602150538, "loss": 0.0779, "theoretical_loss": 3.754941826641584, "tokens_seen": 805830656 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.0008438312797807157, "objective/train/docs_used": 297892, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4582135677337646, "objective/train/original_loss": 1.458213448524475, "objective/train/theoretical_loss": 3.754821837981112, "objective/train/tokens_used": 826552800, "objective/train/value_avg": -0.005859375, "objective/train/value_loss": 0.0003289504675194621, "objective/train/value_max": -0.00010973215103149414, "objective/train/value_min": -0.73828125, "objective/train/value_reward_corr": 0.4976893790214529, "objective/train/value_std": 0.0099029541015625, "objective/train/weight_avg": 1.0009607076644897, "objective/train/weighted_lm_loss": 1.4596952199935913, "objective/train/weights_max": 1.2155424356460571, "objective/train/weights_min": 0.05370466783642769, "theoretical_loss": 3.754821837981112, "tokens_seen": 806092800 }, { "epoch": 0.24, "learning_rate": 0.0007633606162734714, "loss": 0.0753, "theoretical_loss": 3.754821837981112, "tokens_seen": 806092800 }, { "epoch": 0.24, "learning_rate": 0.0007632803723318889, "loss": 0.0759, "theoretical_loss": 3.7547018992567462, "tokens_seen": 806354944 }, { "epoch": 0.24, "learning_rate": 0.0007632001283903065, "loss": 0.079, "theoretical_loss": 3.7545820104314815, "tokens_seen": 806617088 }, { "epoch": 0.24, "learning_rate": 0.0007631198844487241, "loss": 0.0746, "theoretical_loss": 3.7544621714683517, "tokens_seen": 806879232 }, { "epoch": 0.24, "learning_rate": 0.0007630396405071417, "loss": 0.0763, "theoretical_loss": 3.754342382330428, "tokens_seen": 807141376 }, { "epoch": 0.24, "learning_rate": 0.0007629593965655594, "loss": 0.0746, "theoretical_loss": 3.7542226429808236, "tokens_seen": 807403520 }, { "epoch": 0.24, "learning_rate": 0.0007628791526239769, "loss": 0.0742, "theoretical_loss": 3.7541029533826893, "tokens_seen": 807665664 }, { "epoch": 0.24, "learning_rate": 0.0007627989086823945, "loss": 0.0753, "theoretical_loss": 3.7539833134992158, "tokens_seen": 807927808 }, { "epoch": 0.24, "learning_rate": 0.0007627186647408121, "loss": 0.0783, "theoretical_loss": 3.753863723293634, "tokens_seen": 808189952 }, { "epoch": 0.24, "learning_rate": 0.0007626384207992296, "loss": 0.0766, "theoretical_loss": 3.7537441827292106, "tokens_seen": 808452096 }, { "epoch": 0.25, "learning_rate": 0.0007625581768576473, "loss": 0.077, "theoretical_loss": 3.753624691769255, "tokens_seen": 808714240 }, { "epoch": 0.25, "learning_rate": 0.0007624779329160648, "loss": 0.079, "theoretical_loss": 3.7535052503771142, "tokens_seen": 808976384 }, { "epoch": 0.25, "learning_rate": 0.0007623976889744824, "loss": 0.0756, "theoretical_loss": 3.7533858585161735, "tokens_seen": 809238528 }, { "epoch": 0.25, "learning_rate": 0.0007623174450329001, "loss": 0.0796, "theoretical_loss": 3.753266516149858, "tokens_seen": 809500672 }, { "epoch": 0.25, "learning_rate": 0.0007622372010913177, "loss": 0.0766, "theoretical_loss": 3.7531472232416316, "tokens_seen": 809762816 }, { "epoch": 0.25, "learning_rate": 0.0007621569571497352, "loss": 0.0736, "theoretical_loss": 3.7530279797549957, "tokens_seen": 810024960 }, { "epoch": 0.25, "learning_rate": 0.0007620767132081528, "loss": 0.0748, "theoretical_loss": 3.752908785653492, "tokens_seen": 810287104 }, { "epoch": 0.25, "learning_rate": 0.0007619964692665704, "loss": 0.0756, "theoretical_loss": 3.7527896409007004, "tokens_seen": 810549248 }, { "epoch": 0.25, "learning_rate": 0.0007619162253249879, "loss": 0.078, "theoretical_loss": 3.7526705454602394, "tokens_seen": 810811392 }, { "epoch": 0.25, "learning_rate": 0.0007618359813834056, "loss": 0.077, "theoretical_loss": 3.752551499295766, "tokens_seen": 811073536 }, { "epoch": 0.25, "learning_rate": 0.0007617557374418231, "loss": 0.0766, "theoretical_loss": 3.7524325023709757, "tokens_seen": 811335680 }, { "epoch": 0.25, "learning_rate": 0.0007616754935002407, "loss": 0.0736, "theoretical_loss": 3.7523135546496023, "tokens_seen": 811597824 }, { "epoch": 0.25, "learning_rate": 0.0007615952495586584, "loss": 0.0772, "theoretical_loss": 3.7521946560954182, "tokens_seen": 811859968 }, { "epoch": 0.25, "learning_rate": 0.0007615150056170759, "loss": 0.0767, "theoretical_loss": 3.7520758066722344, "tokens_seen": 812122112 }, { "epoch": 0.25, "learning_rate": 0.0007614347616754936, "loss": 0.075, "theoretical_loss": 3.7519570063438996, "tokens_seen": 812384256 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.0014068076852709055, "objective/train/docs_used": 300102, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5233573913574219, "objective/train/original_loss": 1.5233573913574219, "objective/train/theoretical_loss": 3.7518382550743024, "objective/train/tokens_used": 833106400, "objective/train/value_avg": -0.006847381591796875, "objective/train/value_loss": 0.00014167482731863856, "objective/train/value_max": -0.0001150369644165039, "objective/train/value_min": -0.31005859375, "objective/train/value_reward_corr": 0.5786646527156373, "objective/train/value_std": 0.01003265380859375, "objective/train/weight_avg": 1.0014715194702148, "objective/train/weighted_lm_loss": 1.5256245136260986, "objective/train/weights_max": 1.1971921920776367, "objective/train/weights_min": 0.36875900626182556, "theoretical_loss": 3.7518382550743024, "tokens_seen": 812646400 }, { "epoch": 0.25, "learning_rate": 0.0007613545177339111, "loss": 0.0763, "theoretical_loss": 3.7518382550743024, "tokens_seen": 812646400 }, { "epoch": 0.25, "learning_rate": 0.0007612742737923287, "loss": 0.0777, "theoretical_loss": 3.7517195528273666, "tokens_seen": 812908544 }, { "epoch": 0.25, "learning_rate": 0.0007611940298507463, "loss": 0.0758, "theoretical_loss": 3.751600899567057, "tokens_seen": 813170688 }, { "epoch": 0.25, "learning_rate": 0.0007611137859091639, "loss": 0.0724, "theoretical_loss": 3.7514822952573743, "tokens_seen": 813432832 }, { "epoch": 0.25, "learning_rate": 0.0007610335419675814, "loss": 0.0772, "theoretical_loss": 3.7513637398623603, "tokens_seen": 813694976 }, { "epoch": 0.25, "learning_rate": 0.0007609532980259991, "loss": 0.0756, "theoretical_loss": 3.751245233346091, "tokens_seen": 813957120 }, { "epoch": 0.25, "learning_rate": 0.0007608730540844167, "loss": 0.0768, "theoretical_loss": 3.7511267756726823, "tokens_seen": 814219264 }, { "epoch": 0.25, "learning_rate": 0.0007607928101428342, "loss": 0.0783, "theoretical_loss": 3.7510083668062886, "tokens_seen": 814481408 }, { "epoch": 0.25, "learning_rate": 0.0007607125662012519, "loss": 0.075, "theoretical_loss": 3.750890006711101, "tokens_seen": 814743552 }, { "epoch": 0.25, "learning_rate": 0.0007606323222596694, "loss": 0.075, "theoretical_loss": 3.7507716953513492, "tokens_seen": 815005696 }, { "epoch": 0.25, "learning_rate": 0.000760552078318087, "loss": 0.0784, "theoretical_loss": 3.7506534326912995, "tokens_seen": 815267840 }, { "epoch": 0.25, "learning_rate": 0.0007604718343765046, "loss": 0.0769, "theoretical_loss": 3.7505352186952567, "tokens_seen": 815529984 }, { "epoch": 0.25, "learning_rate": 0.0007603915904349222, "loss": 0.0738, "theoretical_loss": 3.7504170533275634, "tokens_seen": 815792128 }, { "epoch": 0.25, "learning_rate": 0.0007603113464933397, "loss": 0.0732, "theoretical_loss": 3.7502989365526, "tokens_seen": 816054272 }, { "epoch": 0.25, "learning_rate": 0.0007602311025517573, "loss": 0.0762, "theoretical_loss": 3.7501808683347826, "tokens_seen": 816316416 }, { "epoch": 0.25, "learning_rate": 0.000760150858610175, "loss": 0.0751, "theoretical_loss": 3.7500628486385668, "tokens_seen": 816578560 }, { "epoch": 0.25, "learning_rate": 0.0007600706146685926, "loss": 0.0744, "theoretical_loss": 3.7499448774284447, "tokens_seen": 816840704 }, { "epoch": 0.25, "learning_rate": 0.0007599903707270102, "loss": 0.0771, "theoretical_loss": 3.749826954668946, "tokens_seen": 817102848 }, { "epoch": 0.25, "learning_rate": 0.0007599101267854277, "loss": 0.077, "theoretical_loss": 3.7497090803246387, "tokens_seen": 817364992 }, { "epoch": 0.25, "learning_rate": 0.0007598298828438454, "loss": 0.0749, "theoretical_loss": 3.7495912543601246, "tokens_seen": 817627136 }, { "epoch": 0.25, "learning_rate": 0.0007597496389022629, "loss": 0.0775, "theoretical_loss": 3.7494734767400475, "tokens_seen": 817889280 }, { "epoch": 0.25, "learning_rate": 0.0007596693949606804, "loss": 0.0766, "theoretical_loss": 3.7493557474290853, "tokens_seen": 818151424 }, { "epoch": 0.25, "learning_rate": 0.0007595891510190981, "loss": 0.0743, "theoretical_loss": 3.7492380663919533, "tokens_seen": 818413568 }, { "epoch": 0.25, "learning_rate": 0.0007595089070775156, "loss": 0.0762, "theoretical_loss": 3.7491204335934043, "tokens_seen": 818675712 }, { "epoch": 0.25, "learning_rate": 0.0007594286631359332, "loss": 0.076, "theoretical_loss": 3.7490028489982286, "tokens_seen": 818937856 }, { "epoch": 0.25, "objective/train/advantage_avg": -2.009487616305705e-05, "objective/train/docs_used": 302476, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5615952014923096, "objective/train/original_loss": 1.5615952014923096, "objective/train/theoretical_loss": 3.7488853125712525, "objective/train/tokens_used": 839660000, "objective/train/value_avg": -0.0078582763671875, "objective/train/value_loss": 0.00040608173003420234, "objective/train/value_max": -0.0001323223114013672, "objective/train/value_min": -0.56494140625, "objective/train/value_reward_corr": 0.58299361157949, "objective/train/value_std": 0.01151275634765625, "objective/train/weight_avg": 1.0001566410064697, "objective/train/weighted_lm_loss": 1.563400387763977, "objective/train/weights_max": 1.6039460897445679, "objective/train/weights_min": 0.3728724420070648, "theoretical_loss": 3.7488853125712525, "tokens_seen": 819200000 }, { "epoch": 0.25, "learning_rate": 0.0007593484191943509, "loss": 0.0727, "theoretical_loss": 3.7488853125712525, "tokens_seen": 819200000 }, { "epoch": 0.25, "learning_rate": 0.0007592681752527685, "loss": 0.0738, "theoretical_loss": 3.7487678242773406, "tokens_seen": 819462144 }, { "epoch": 0.25, "learning_rate": 0.000759187931311186, "loss": 0.0772, "theoretical_loss": 3.748650384081392, "tokens_seen": 819724288 }, { "epoch": 0.25, "learning_rate": 0.0007591076873696036, "loss": 0.0771, "theoretical_loss": 3.7485329919483448, "tokens_seen": 819986432 }, { "epoch": 0.25, "learning_rate": 0.0007590274434280212, "loss": 0.0746, "theoretical_loss": 3.7484156478431734, "tokens_seen": 820248576 }, { "epoch": 0.25, "learning_rate": 0.0007589471994864387, "loss": 0.075, "theoretical_loss": 3.748298351730888, "tokens_seen": 820510720 }, { "epoch": 0.25, "learning_rate": 0.0007588669555448564, "loss": 0.0757, "theoretical_loss": 3.748181103576537, "tokens_seen": 820772864 }, { "epoch": 0.25, "learning_rate": 0.0007587867116032739, "loss": 0.0773, "theoretical_loss": 3.7480639033452032, "tokens_seen": 821035008 }, { "epoch": 0.25, "learning_rate": 0.0007587064676616916, "loss": 0.078, "theoretical_loss": 3.747946751002009, "tokens_seen": 821297152 }, { "epoch": 0.25, "learning_rate": 0.0007586262237201092, "loss": 0.0784, "theoretical_loss": 3.747829646512109, "tokens_seen": 821559296 }, { "epoch": 0.25, "learning_rate": 0.0007585459797785267, "loss": 0.0763, "theoretical_loss": 3.747712589840699, "tokens_seen": 821821440 }, { "epoch": 0.25, "learning_rate": 0.0007584657358369444, "loss": 0.0751, "theoretical_loss": 3.7475955809530084, "tokens_seen": 822083584 }, { "epoch": 0.25, "learning_rate": 0.0007583854918953619, "loss": 0.0731, "theoretical_loss": 3.747478619814303, "tokens_seen": 822345728 }, { "epoch": 0.25, "learning_rate": 0.0007583052479537795, "loss": 0.0792, "theoretical_loss": 3.7473617063898863, "tokens_seen": 822607872 }, { "epoch": 0.25, "learning_rate": 0.0007582250040121971, "loss": 0.0767, "theoretical_loss": 3.747244840645097, "tokens_seen": 822870016 }, { "epoch": 0.25, "learning_rate": 0.0007581447600706147, "loss": 0.0751, "theoretical_loss": 3.7471280225453096, "tokens_seen": 823132160 }, { "epoch": 0.25, "learning_rate": 0.0007580645161290322, "loss": 0.0764, "theoretical_loss": 3.747011252055936, "tokens_seen": 823394304 }, { "epoch": 0.25, "learning_rate": 0.0007579842721874498, "loss": 0.075, "theoretical_loss": 3.746894529142424, "tokens_seen": 823656448 }, { "epoch": 0.25, "learning_rate": 0.0007579040282458675, "loss": 0.0762, "theoretical_loss": 3.746777853770256, "tokens_seen": 823918592 }, { "epoch": 0.25, "learning_rate": 0.000757823784304285, "loss": 0.0751, "theoretical_loss": 3.746661225904953, "tokens_seen": 824180736 }, { "epoch": 0.25, "learning_rate": 0.0007577435403627027, "loss": 0.0776, "theoretical_loss": 3.746544645512069, "tokens_seen": 824442880 }, { "epoch": 0.25, "learning_rate": 0.0007576632964211202, "loss": 0.0753, "theoretical_loss": 3.7464281125571963, "tokens_seen": 824705024 }, { "epoch": 0.25, "learning_rate": 0.0007575830524795379, "loss": 0.0762, "theoretical_loss": 3.7463116270059618, "tokens_seen": 824967168 }, { "epoch": 0.25, "learning_rate": 0.0007575028085379554, "loss": 0.0748, "theoretical_loss": 3.7461951888240286, "tokens_seen": 825229312 }, { "epoch": 0.25, "learning_rate": 0.000757422564596373, "loss": 0.0769, "theoretical_loss": 3.7460787979770958, "tokens_seen": 825491456 }, { "epoch": 0.25, "objective/train/advantage_avg": -6.741825927747414e-05, "objective/train/docs_used": 304950, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5155413150787354, "objective/train/original_loss": 1.5155411958694458, "objective/train/theoretical_loss": 3.745962454430897, "objective/train/tokens_used": 846213600, "objective/train/value_avg": -0.007843017578125, "objective/train/value_loss": 0.0001769889786373824, "objective/train/value_max": -0.00011682510375976562, "objective/train/value_min": -0.30322265625, "objective/train/value_reward_corr": 0.7321892783075874, "objective/train/value_std": 0.01239776611328125, "objective/train/weight_avg": 1.000016212463379, "objective/train/weighted_lm_loss": 1.5158941745758057, "objective/train/weights_max": 1.2777711153030396, "objective/train/weights_min": 0.37355300784111023, "theoretical_loss": 3.745962454430897, "tokens_seen": 825753600 }, { "epoch": 0.25, "learning_rate": 0.0007573423206547906, "loss": 0.0764, "theoretical_loss": 3.745962454430897, "tokens_seen": 825753600 }, { "epoch": 0.25, "learning_rate": 0.0007572620767132081, "loss": 0.0785, "theoretical_loss": 3.745846158151204, "tokens_seen": 826015744 }, { "epoch": 0.25, "learning_rate": 0.0007571818327716257, "loss": 0.0765, "theoretical_loss": 3.7457299091038214, "tokens_seen": 826277888 }, { "epoch": 0.25, "learning_rate": 0.0007571015888300434, "loss": 0.0748, "theoretical_loss": 3.745613707254591, "tokens_seen": 826540032 }, { "epoch": 0.25, "learning_rate": 0.000757021344888461, "loss": 0.0763, "theoretical_loss": 3.7454975525693897, "tokens_seen": 826802176 }, { "epoch": 0.25, "learning_rate": 0.0007569411009468785, "loss": 0.076, "theoretical_loss": 3.7453814450141305, "tokens_seen": 827064320 }, { "epoch": 0.25, "learning_rate": 0.0007568608570052962, "loss": 0.0806, "theoretical_loss": 3.7452653845547603, "tokens_seen": 827326464 }, { "epoch": 0.25, "learning_rate": 0.0007567806130637137, "loss": 0.0798, "theoretical_loss": 3.745149371157263, "tokens_seen": 827588608 }, { "epoch": 0.25, "learning_rate": 0.0007567003691221312, "loss": 0.0778, "theoretical_loss": 3.7450334047876574, "tokens_seen": 827850752 }, { "epoch": 0.25, "learning_rate": 0.0007566201251805489, "loss": 0.0779, "theoretical_loss": 3.744917485411997, "tokens_seen": 828112896 }, { "epoch": 0.25, "learning_rate": 0.0007565398812389664, "loss": 0.0803, "theoretical_loss": 3.744801612996371, "tokens_seen": 828375040 }, { "epoch": 0.25, "learning_rate": 0.000756459637297384, "loss": 0.0766, "theoretical_loss": 3.744685787506903, "tokens_seen": 828637184 }, { "epoch": 0.25, "learning_rate": 0.0007563793933558017, "loss": 0.0775, "theoretical_loss": 3.7445700089097533, "tokens_seen": 828899328 }, { "epoch": 0.25, "learning_rate": 0.0007562991494142193, "loss": 0.0764, "theoretical_loss": 3.7444542771711165, "tokens_seen": 829161472 }, { "epoch": 0.25, "learning_rate": 0.0007562189054726369, "loss": 0.0745, "theoretical_loss": 3.744338592257222, "tokens_seen": 829423616 }, { "epoch": 0.25, "learning_rate": 0.0007561386615310544, "loss": 0.076, "theoretical_loss": 3.744222954134334, "tokens_seen": 829685760 }, { "epoch": 0.25, "learning_rate": 0.000756058417589472, "loss": 0.0747, "theoretical_loss": 3.7441073627687524, "tokens_seen": 829947904 }, { "epoch": 0.25, "learning_rate": 0.0007559781736478896, "loss": 0.0769, "theoretical_loss": 3.743991818126812, "tokens_seen": 830210048 }, { "epoch": 0.25, "learning_rate": 0.0007558979297063072, "loss": 0.0761, "theoretical_loss": 3.7438763201748815, "tokens_seen": 830472192 }, { "epoch": 0.25, "learning_rate": 0.0007558176857647247, "loss": 0.0765, "theoretical_loss": 3.743760868879365, "tokens_seen": 830734336 }, { "epoch": 0.25, "learning_rate": 0.0007557374418231424, "loss": 0.0748, "theoretical_loss": 3.743645464206702, "tokens_seen": 830996480 }, { "epoch": 0.25, "learning_rate": 0.00075565719788156, "loss": 0.0757, "theoretical_loss": 3.743530106123365, "tokens_seen": 831258624 }, { "epoch": 0.25, "learning_rate": 0.0007555769539399775, "loss": 0.0756, "theoretical_loss": 3.7434147945958642, "tokens_seen": 831520768 }, { "epoch": 0.25, "learning_rate": 0.0007554967099983952, "loss": 0.0766, "theoretical_loss": 3.7432995295907405, "tokens_seen": 831782912 }, { "epoch": 0.25, "learning_rate": 0.0007554164660568127, "loss": 0.0781, "theoretical_loss": 3.7431843110745726, "tokens_seen": 832045056 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.0015261891530826688, "objective/train/docs_used": 307306, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4608768224716187, "objective/train/original_loss": 1.4608770608901978, "objective/train/theoretical_loss": 3.743069139013972, "objective/train/tokens_used": 852767200, "objective/train/value_avg": -0.009246826171875, "objective/train/value_loss": 0.00019339752907399088, "objective/train/value_max": -0.00014889240264892578, "objective/train/value_min": -0.43701171875, "objective/train/value_reward_corr": 0.673779683354758, "objective/train/value_std": 0.0131378173828125, "objective/train/weight_avg": 1.0016170740127563, "objective/train/weighted_lm_loss": 1.4626826047897339, "objective/train/weights_max": 1.353039264678955, "objective/train/weights_min": 0.37851035594940186, "theoretical_loss": 3.743069139013972, "tokens_seen": 832307200 }, { "epoch": 0.25, "learning_rate": 0.0007553362221152303, "loss": 0.075, "theoretical_loss": 3.743069139013972, "tokens_seen": 832307200 }, { "epoch": 0.25, "learning_rate": 0.0007552559781736479, "loss": 0.076, "theoretical_loss": 3.742954013375586, "tokens_seen": 832569344 }, { "epoch": 0.25, "learning_rate": 0.0007551757342320655, "loss": 0.0768, "theoretical_loss": 3.742838934126094, "tokens_seen": 832831488 }, { "epoch": 0.25, "learning_rate": 0.000755095490290483, "loss": 0.0764, "theoretical_loss": 3.742723901232213, "tokens_seen": 833093632 }, { "epoch": 0.25, "learning_rate": 0.0007550152463489006, "loss": 0.0764, "theoretical_loss": 3.742608914660692, "tokens_seen": 833355776 }, { "epoch": 0.25, "learning_rate": 0.0007549350024073182, "loss": 0.078, "theoretical_loss": 3.742493974378314, "tokens_seen": 833617920 }, { "epoch": 0.25, "learning_rate": 0.0007548547584657359, "loss": 0.0751, "theoretical_loss": 3.742379080351899, "tokens_seen": 833880064 }, { "epoch": 0.25, "learning_rate": 0.0007547745145241535, "loss": 0.079, "theoretical_loss": 3.7422642325482975, "tokens_seen": 834142208 }, { "epoch": 0.25, "learning_rate": 0.000754694270582571, "loss": 0.0808, "theoretical_loss": 3.742149430934398, "tokens_seen": 834404352 }, { "epoch": 0.25, "learning_rate": 0.0007546140266409887, "loss": 0.0806, "theoretical_loss": 3.7420346754771208, "tokens_seen": 834666496 }, { "epoch": 0.25, "learning_rate": 0.0007545337826994062, "loss": 0.0763, "theoretical_loss": 3.7419199661434197, "tokens_seen": 834928640 }, { "epoch": 0.25, "learning_rate": 0.0007544535387578238, "loss": 0.078, "theoretical_loss": 3.7418053029002842, "tokens_seen": 835190784 }, { "epoch": 0.25, "learning_rate": 0.0007543732948162414, "loss": 0.0738, "theoretical_loss": 3.7416906857147367, "tokens_seen": 835452928 }, { "epoch": 0.25, "learning_rate": 0.0007542930508746589, "loss": 0.0758, "theoretical_loss": 3.741576114553835, "tokens_seen": 835715072 }, { "epoch": 0.25, "learning_rate": 0.0007542128069330765, "loss": 0.0775, "theoretical_loss": 3.7414615893846683, "tokens_seen": 835977216 }, { "epoch": 0.25, "learning_rate": 0.0007541325629914942, "loss": 0.0766, "theoretical_loss": 3.741347110174362, "tokens_seen": 836239360 }, { "epoch": 0.25, "learning_rate": 0.0007540523190499118, "loss": 0.0775, "theoretical_loss": 3.741232676890074, "tokens_seen": 836501504 }, { "epoch": 0.25, "learning_rate": 0.0007539720751083293, "loss": 0.0782, "theoretical_loss": 3.7411182894989965, "tokens_seen": 836763648 }, { "epoch": 0.25, "learning_rate": 0.000753891831166747, "loss": 0.0778, "theoretical_loss": 3.7410039479683546, "tokens_seen": 837025792 }, { "epoch": 0.25, "learning_rate": 0.0007538115872251645, "loss": 0.0758, "theoretical_loss": 3.740889652265408, "tokens_seen": 837287936 }, { "epoch": 0.25, "learning_rate": 0.0007537313432835821, "loss": 0.0772, "theoretical_loss": 3.7407754023574507, "tokens_seen": 837550080 }, { "epoch": 0.25, "learning_rate": 0.0007536510993419997, "loss": 0.0767, "theoretical_loss": 3.7406611982118076, "tokens_seen": 837812224 }, { "epoch": 0.25, "learning_rate": 0.0007535708554004172, "loss": 0.0768, "theoretical_loss": 3.74054703979584, "tokens_seen": 838074368 }, { "epoch": 0.25, "learning_rate": 0.000753490611458835, "loss": 0.0782, "theoretical_loss": 3.7404329270769403, "tokens_seen": 838336512 }, { "epoch": 0.25, "learning_rate": 0.0007534103675172525, "loss": 0.0758, "theoretical_loss": 3.740318860022537, "tokens_seen": 838598656 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.0015410750638693571, "objective/train/docs_used": 309674, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.561414361000061, "objective/train/original_loss": 1.5614142417907715, "objective/train/theoretical_loss": 3.7402048386000892, "objective/train/tokens_used": 859320800, "objective/train/value_avg": -0.01053619384765625, "objective/train/value_loss": 0.0005841842503286898, "objective/train/value_max": -0.0002846717834472656, "objective/train/value_min": -0.48974609375, "objective/train/value_reward_corr": 0.5773049027811739, "objective/train/value_std": 0.016204833984375, "objective/train/weight_avg": 1.0017683506011963, "objective/train/weighted_lm_loss": 1.563459038734436, "objective/train/weights_max": 1.4500267505645752, "objective/train/weights_min": 0.05458702892065048, "theoretical_loss": 3.7402048386000892, "tokens_seen": 838860800 }, { "epoch": 0.25, "learning_rate": 0.0007533301235756701, "loss": 0.0754, "theoretical_loss": 3.7402048386000892, "tokens_seen": 838860800 }, { "epoch": 0.25, "learning_rate": 0.0007532498796340877, "loss": 0.0739, "theoretical_loss": 3.740090862777091, "tokens_seen": 839122944 }, { "epoch": 0.25, "learning_rate": 0.0007531696356925052, "loss": 0.0778, "theoretical_loss": 3.7399769325210697, "tokens_seen": 839385088 }, { "epoch": 0.25, "learning_rate": 0.0007530893917509228, "loss": 0.0771, "theoretical_loss": 3.7398630477995853, "tokens_seen": 839647232 }, { "epoch": 0.25, "learning_rate": 0.0007530091478093404, "loss": 0.0748, "theoretical_loss": 3.7397492085802315, "tokens_seen": 839909376 }, { "epoch": 0.25, "learning_rate": 0.000752928903867758, "loss": 0.0764, "theoretical_loss": 3.739635414830635, "tokens_seen": 840171520 }, { "epoch": 0.25, "learning_rate": 0.0007528486599261755, "loss": 0.076, "theoretical_loss": 3.7395216665184554, "tokens_seen": 840433664 }, { "epoch": 0.25, "learning_rate": 0.0007527684159845932, "loss": 0.0792, "theoretical_loss": 3.739407963611386, "tokens_seen": 840695808 }, { "epoch": 0.25, "learning_rate": 0.0007526881720430108, "loss": 0.074, "theoretical_loss": 3.739294306077152, "tokens_seen": 840957952 }, { "epoch": 0.25, "learning_rate": 0.0007526079281014283, "loss": 0.0785, "theoretical_loss": 3.7391806938835126, "tokens_seen": 841220096 }, { "epoch": 0.26, "learning_rate": 0.000752527684159846, "loss": 0.0755, "theoretical_loss": 3.7390671269982603, "tokens_seen": 841482240 }, { "epoch": 0.26, "learning_rate": 0.0007524474402182635, "loss": 0.0761, "theoretical_loss": 3.7389536053892187, "tokens_seen": 841744384 }, { "epoch": 0.26, "learning_rate": 0.0007523671962766812, "loss": 0.0741, "theoretical_loss": 3.738840129024246, "tokens_seen": 842006528 }, { "epoch": 0.26, "learning_rate": 0.0007522869523350987, "loss": 0.0771, "theoretical_loss": 3.738726697871233, "tokens_seen": 842268672 }, { "epoch": 0.26, "learning_rate": 0.0007522067083935163, "loss": 0.0795, "theoretical_loss": 3.738613311898103, "tokens_seen": 842530816 }, { "epoch": 0.26, "learning_rate": 0.0007521264644519339, "loss": 0.0762, "theoretical_loss": 3.7384999710728106, "tokens_seen": 842792960 }, { "epoch": 0.26, "learning_rate": 0.0007520462205103514, "loss": 0.0768, "theoretical_loss": 3.738386675363346, "tokens_seen": 843055104 }, { "epoch": 0.26, "learning_rate": 0.000751965976568769, "loss": 0.0763, "theoretical_loss": 3.738273424737729, "tokens_seen": 843317248 }, { "epoch": 0.26, "learning_rate": 0.0007518857326271867, "loss": 0.0792, "theoretical_loss": 3.7381602191640146, "tokens_seen": 843579392 }, { "epoch": 0.26, "learning_rate": 0.0007518054886856043, "loss": 0.0769, "theoretical_loss": 3.738047058610289, "tokens_seen": 843841536 }, { "epoch": 0.26, "learning_rate": 0.0007517252447440218, "loss": 0.0767, "theoretical_loss": 3.7379339430446707, "tokens_seen": 844103680 }, { "epoch": 0.26, "learning_rate": 0.0007516450008024395, "loss": 0.075, "theoretical_loss": 3.7378208724353117, "tokens_seen": 844365824 }, { "epoch": 0.26, "learning_rate": 0.000751564756860857, "loss": 0.0776, "theoretical_loss": 3.7377078467503955, "tokens_seen": 844627968 }, { "epoch": 0.26, "learning_rate": 0.0007514845129192746, "loss": 0.0779, "theoretical_loss": 3.737594865958138, "tokens_seen": 844890112 }, { "epoch": 0.26, "learning_rate": 0.0007514042689776922, "loss": 0.0759, "theoretical_loss": 3.7374819300267883, "tokens_seen": 845152256 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.0018314362969249487, "objective/train/docs_used": 312167, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4999068975448608, "objective/train/original_loss": 1.4999067783355713, "objective/train/theoretical_loss": 3.7373690389246272, "objective/train/tokens_used": 865874400, "objective/train/value_avg": -0.00806427001953125, "objective/train/value_loss": 0.00013757370470557362, "objective/train/value_max": -0.0001323223114013672, "objective/train/value_min": -0.31982421875, "objective/train/value_reward_corr": 0.6737398459252089, "objective/train/value_std": 0.01104736328125, "objective/train/weight_avg": 1.0018991231918335, "objective/train/weighted_lm_loss": 1.5027458667755127, "objective/train/weights_max": 1.1910135746002197, "objective/train/weights_min": 0.6367841958999634, "theoretical_loss": 3.7373690389246272, "tokens_seen": 845414400 }, { "epoch": 0.26, "learning_rate": 0.0007513240250361097, "loss": 0.0768, "theoretical_loss": 3.7373690389246272, "tokens_seen": 845414400 }, { "epoch": 0.26, "learning_rate": 0.0007512437810945275, "loss": 0.0783, "theoretical_loss": 3.737256192619967, "tokens_seen": 845676544 }, { "epoch": 0.26, "learning_rate": 0.000751163537152945, "loss": 0.0751, "theoretical_loss": 3.737143391081154, "tokens_seen": 845938688 }, { "epoch": 0.26, "learning_rate": 0.0007510832932113626, "loss": 0.076, "theoretical_loss": 3.7370306342765653, "tokens_seen": 846200832 }, { "epoch": 0.26, "learning_rate": 0.0007510030492697802, "loss": 0.0736, "theoretical_loss": 3.73691792217461, "tokens_seen": 846462976 }, { "epoch": 0.26, "learning_rate": 0.0007509228053281978, "loss": 0.0778, "theoretical_loss": 3.7368052547437305, "tokens_seen": 846725120 }, { "epoch": 0.26, "learning_rate": 0.0007508425613866153, "loss": 0.077, "theoretical_loss": 3.7366926319524003, "tokens_seen": 846987264 }, { "epoch": 0.26, "learning_rate": 0.0007507623174450329, "loss": 0.0742, "theoretical_loss": 3.736580053769125, "tokens_seen": 847249408 }, { "epoch": 0.26, "learning_rate": 0.0007506820735034505, "loss": 0.0727, "theoretical_loss": 3.736467520162442, "tokens_seen": 847511552 }, { "epoch": 0.26, "learning_rate": 0.000750601829561868, "loss": 0.0769, "theoretical_loss": 3.736355031100922, "tokens_seen": 847773696 }, { "epoch": 0.26, "learning_rate": 0.0007505215856202858, "loss": 0.0763, "theoretical_loss": 3.7362425865531654, "tokens_seen": 848035840 }, { "epoch": 0.26, "learning_rate": 0.0007504413416787033, "loss": 0.0773, "theoretical_loss": 3.736130186487806, "tokens_seen": 848297984 }, { "epoch": 0.26, "learning_rate": 0.0007503610977371209, "loss": 0.0748, "theoretical_loss": 3.736017830873508, "tokens_seen": 848560128 }, { "epoch": 0.26, "learning_rate": 0.0007502808537955385, "loss": 0.0769, "theoretical_loss": 3.7359055196789694, "tokens_seen": 848822272 }, { "epoch": 0.26, "learning_rate": 0.000750200609853956, "loss": 0.0747, "theoretical_loss": 3.7357932528729183, "tokens_seen": 849084416 }, { "epoch": 0.26, "learning_rate": 0.0007501203659123736, "loss": 0.0785, "theoretical_loss": 3.7356810304241144, "tokens_seen": 849346560 }, { "epoch": 0.26, "learning_rate": 0.0007500401219707912, "loss": 0.0781, "theoretical_loss": 3.73556885230135, "tokens_seen": 849608704 }, { "epoch": 0.26, "learning_rate": 0.0007499598780292088, "loss": 0.0755, "theoretical_loss": 3.735456718473449, "tokens_seen": 849870848 }, { "epoch": 0.26, "learning_rate": 0.0007498796340876264, "loss": 0.0716, "theoretical_loss": 3.7353446289092647, "tokens_seen": 850132992 }, { "epoch": 0.26, "learning_rate": 0.000749799390146044, "loss": 0.0779, "theoretical_loss": 3.7352325835776856, "tokens_seen": 850395136 }, { "epoch": 0.26, "learning_rate": 0.0007497191462044616, "loss": 0.0772, "theoretical_loss": 3.7351205824476277, "tokens_seen": 850657280 }, { "epoch": 0.26, "learning_rate": 0.0007496389022628792, "loss": 0.079, "theoretical_loss": 3.7350086254880415, "tokens_seen": 850919424 }, { "epoch": 0.26, "learning_rate": 0.0007495586583212968, "loss": 0.0742, "theoretical_loss": 3.734896712667907, "tokens_seen": 851181568 }, { "epoch": 0.26, "learning_rate": 0.0007494784143797143, "loss": 0.0753, "theoretical_loss": 3.734784843956236, "tokens_seen": 851443712 }, { "epoch": 0.26, "learning_rate": 0.000749398170438132, "loss": 0.0745, "theoretical_loss": 3.7346730193220727, "tokens_seen": 851705856 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.0010687694884836674, "objective/train/docs_used": 314587, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.526440143585205, "objective/train/original_loss": 1.526440143585205, "objective/train/theoretical_loss": 3.7345612387344906, "objective/train/tokens_used": 872428000, "objective/train/value_avg": -0.00872802734375, "objective/train/value_loss": 0.0003840262070298195, "objective/train/value_max": -0.0002779960632324219, "objective/train/value_min": -0.62109375, "objective/train/value_reward_corr": 0.6480027416305001, "objective/train/value_std": 0.016021728515625, "objective/train/weight_avg": 1.0012387037277222, "objective/train/weighted_lm_loss": 1.527116060256958, "objective/train/weights_max": 1.4146990776062012, "objective/train/weights_min": 0.370330810546875, "theoretical_loss": 3.7345612387344906, "tokens_seen": 851968000 }, { "epoch": 0.26, "learning_rate": 0.0007493179264965495, "loss": 0.0755, "theoretical_loss": 3.7345612387344906, "tokens_seen": 851968000 }, { "epoch": 0.26, "learning_rate": 0.0007492376825549671, "loss": 0.078, "theoretical_loss": 3.734449502162596, "tokens_seen": 852230144 }, { "epoch": 0.26, "learning_rate": 0.0007491574386133847, "loss": 0.0772, "theoretical_loss": 3.7343378095755257, "tokens_seen": 852492288 }, { "epoch": 0.26, "learning_rate": 0.0007490771946718022, "loss": 0.0743, "theoretical_loss": 3.7342261609424483, "tokens_seen": 852754432 }, { "epoch": 0.26, "learning_rate": 0.0007489969507302198, "loss": 0.0787, "theoretical_loss": 3.7341145562325613, "tokens_seen": 853016576 }, { "epoch": 0.26, "learning_rate": 0.0007489167067886375, "loss": 0.0761, "theoretical_loss": 3.734002995415096, "tokens_seen": 853278720 }, { "epoch": 0.26, "learning_rate": 0.0007488364628470551, "loss": 0.0771, "theoretical_loss": 3.7338914784593134, "tokens_seen": 853540864 }, { "epoch": 0.26, "learning_rate": 0.0007487562189054726, "loss": 0.0754, "theoretical_loss": 3.733780005334505, "tokens_seen": 853803008 }, { "epoch": 0.26, "learning_rate": 0.0007486759749638903, "loss": 0.0786, "theoretical_loss": 3.733668576009995, "tokens_seen": 854065152 }, { "epoch": 0.26, "learning_rate": 0.0007485957310223078, "loss": 0.0764, "theoretical_loss": 3.733557190455136, "tokens_seen": 854327296 }, { "epoch": 0.26, "learning_rate": 0.0007485154870807254, "loss": 0.074, "theoretical_loss": 3.733445848639313, "tokens_seen": 854589440 }, { "epoch": 0.26, "learning_rate": 0.000748435243139143, "loss": 0.0753, "theoretical_loss": 3.733334550531942, "tokens_seen": 854851584 }, { "epoch": 0.26, "learning_rate": 0.0007483549991975605, "loss": 0.0776, "theoretical_loss": 3.7332232961024694, "tokens_seen": 855113728 }, { "epoch": 0.26, "learning_rate": 0.0007482747552559783, "loss": 0.0758, "theoretical_loss": 3.7331120853203714, "tokens_seen": 855375872 }, { "epoch": 0.26, "learning_rate": 0.0007481945113143958, "loss": 0.0749, "theoretical_loss": 3.733000918155156, "tokens_seen": 855638016 }, { "epoch": 0.26, "learning_rate": 0.0007481142673728134, "loss": 0.0725, "theoretical_loss": 3.7328897945763617, "tokens_seen": 855900160 }, { "epoch": 0.26, "learning_rate": 0.000748034023431231, "loss": 0.078, "theoretical_loss": 3.7327787145535574, "tokens_seen": 856162304 }, { "epoch": 0.26, "learning_rate": 0.0007479537794896486, "loss": 0.0736, "theoretical_loss": 3.732667678056342, "tokens_seen": 856424448 }, { "epoch": 0.26, "learning_rate": 0.0007478735355480661, "loss": 0.0729, "theoretical_loss": 3.732556685054346, "tokens_seen": 856686592 }, { "epoch": 0.26, "learning_rate": 0.0007477932916064837, "loss": 0.0744, "theoretical_loss": 3.7324457355172296, "tokens_seen": 856948736 }, { "epoch": 0.26, "learning_rate": 0.0007477130476649013, "loss": 0.0746, "theoretical_loss": 3.7323348294146843, "tokens_seen": 857210880 }, { "epoch": 0.26, "learning_rate": 0.0007476328037233188, "loss": 0.0757, "theoretical_loss": 3.73222396671643, "tokens_seen": 857473024 }, { "epoch": 0.26, "learning_rate": 0.0007475525597817365, "loss": 0.0766, "theoretical_loss": 3.73211314739222, "tokens_seen": 857735168 }, { "epoch": 0.26, "learning_rate": 0.000747472315840154, "loss": 0.0748, "theoretical_loss": 3.732002371411835, "tokens_seen": 857997312 }, { "epoch": 0.26, "learning_rate": 0.0007473920718985718, "loss": 0.0738, "theoretical_loss": 3.7318916387450876, "tokens_seen": 858259456 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.0009312600013799965, "objective/train/docs_used": 317142, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5528024435043335, "objective/train/original_loss": 1.5528024435043335, "objective/train/theoretical_loss": 3.7317809493618204, "objective/train/tokens_used": 878981600, "objective/train/value_avg": -0.00759124755859375, "objective/train/value_loss": 0.0003197310143150389, "objective/train/value_max": -0.00022339820861816406, "objective/train/value_min": -0.65625, "objective/train/value_reward_corr": 0.5907966960002585, "objective/train/value_std": 0.01306915283203125, "objective/train/weight_avg": 1.0010712146759033, "objective/train/weighted_lm_loss": 1.5542237758636475, "objective/train/weights_max": 1.8819797039031982, "objective/train/weights_min": 0.3681640923023224, "theoretical_loss": 3.7317809493618204, "tokens_seen": 858521600 }, { "epoch": 0.26, "learning_rate": 0.0007473118279569893, "loss": 0.0775, "theoretical_loss": 3.7317809493618204, "tokens_seen": 858521600 }, { "epoch": 0.26, "learning_rate": 0.0007472315840154068, "loss": 0.0751, "theoretical_loss": 3.7316703032319056, "tokens_seen": 858783744 }, { "epoch": 0.26, "learning_rate": 0.0007471513400738245, "loss": 0.0786, "theoretical_loss": 3.7315597003252474, "tokens_seen": 859045888 }, { "epoch": 0.26, "learning_rate": 0.000747071096132242, "loss": 0.0784, "theoretical_loss": 3.731449140611777, "tokens_seen": 859308032 }, { "epoch": 0.26, "learning_rate": 0.0007469908521906596, "loss": 0.0748, "theoretical_loss": 3.7313386240614577, "tokens_seen": 859570176 }, { "epoch": 0.26, "learning_rate": 0.0007469106082490772, "loss": 0.0761, "theoretical_loss": 3.7312281506442835, "tokens_seen": 859832320 }, { "epoch": 0.26, "learning_rate": 0.0007468303643074948, "loss": 0.0757, "theoretical_loss": 3.7311177203302766, "tokens_seen": 860094464 }, { "epoch": 0.26, "learning_rate": 0.0007467501203659124, "loss": 0.0766, "theoretical_loss": 3.7310073330894906, "tokens_seen": 860356608 }, { "epoch": 0.26, "learning_rate": 0.00074666987642433, "loss": 0.0739, "theoretical_loss": 3.730896988892008, "tokens_seen": 860618752 }, { "epoch": 0.26, "learning_rate": 0.0007465896324827476, "loss": 0.0808, "theoretical_loss": 3.7307866877079414, "tokens_seen": 860880896 }, { "epoch": 0.26, "learning_rate": 0.0007465093885411651, "loss": 0.0756, "theoretical_loss": 3.730676429507435, "tokens_seen": 861143040 }, { "epoch": 0.26, "learning_rate": 0.0007464291445995828, "loss": 0.0736, "theoretical_loss": 3.730566214260659, "tokens_seen": 861405184 }, { "epoch": 0.26, "learning_rate": 0.0007463489006580003, "loss": 0.0759, "theoretical_loss": 3.730456041937817, "tokens_seen": 861667328 }, { "epoch": 0.26, "learning_rate": 0.0007462686567164179, "loss": 0.0766, "theoretical_loss": 3.730345912509141, "tokens_seen": 861929472 }, { "epoch": 0.26, "learning_rate": 0.0007461884127748355, "loss": 0.074, "theoretical_loss": 3.7302358259448924, "tokens_seen": 862191616 }, { "epoch": 0.26, "learning_rate": 0.000746108168833253, "loss": 0.0776, "theoretical_loss": 3.730125782215362, "tokens_seen": 862453760 }, { "epoch": 0.26, "learning_rate": 0.0007460279248916708, "loss": 0.0766, "theoretical_loss": 3.730015781290872, "tokens_seen": 862715904 }, { "epoch": 0.26, "learning_rate": 0.0007459476809500883, "loss": 0.0783, "theoretical_loss": 3.729905823141771, "tokens_seen": 862978048 }, { "epoch": 0.26, "learning_rate": 0.0007458674370085059, "loss": 0.0739, "theoretical_loss": 3.729795907738441, "tokens_seen": 863240192 }, { "epoch": 0.26, "learning_rate": 0.0007457871930669235, "loss": 0.0746, "theoretical_loss": 3.729686035051291, "tokens_seen": 863502336 }, { "epoch": 0.26, "learning_rate": 0.0007457069491253411, "loss": 0.0761, "theoretical_loss": 3.7295762050507593, "tokens_seen": 863764480 }, { "epoch": 0.26, "learning_rate": 0.0007456267051837586, "loss": 0.0754, "theoretical_loss": 3.7294664177073145, "tokens_seen": 864026624 }, { "epoch": 0.26, "learning_rate": 0.0007455464612421762, "loss": 0.0773, "theoretical_loss": 3.7293566729914547, "tokens_seen": 864288768 }, { "epoch": 0.26, "learning_rate": 0.0007454662173005938, "loss": 0.0756, "theoretical_loss": 3.7292469708737066, "tokens_seen": 864550912 }, { "epoch": 0.26, "learning_rate": 0.0007453859733590113, "loss": 0.0764, "theoretical_loss": 3.729137311324627, "tokens_seen": 864813056 }, { "epoch": 0.26, "objective/train/advantage_avg": -0.00015239574713632464, "objective/train/docs_used": 319421, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4767508506774902, "objective/train/original_loss": 1.4767508506774902, "objective/train/theoretical_loss": 3.7290276943148015, "objective/train/tokens_used": 885535200, "objective/train/value_avg": -0.00782012939453125, "objective/train/value_loss": 0.0005899901734665036, "objective/train/value_max": -0.00014889240264892578, "objective/train/value_min": -0.95166015625, "objective/train/value_reward_corr": 0.5327929289583835, "objective/train/value_std": 0.0201263427734375, "objective/train/weight_avg": 1.0001168251037598, "objective/train/weighted_lm_loss": 1.4773733615875244, "objective/train/weights_max": 2.5354816913604736, "objective/train/weights_min": 0.23555737733840942, "theoretical_loss": 3.7290276943148015, "tokens_seen": 865075200 }, { "epoch": 0.26, "learning_rate": 0.000745305729417429, "loss": 0.073, "theoretical_loss": 3.7290276943148015, "tokens_seen": 865075200 }, { "epoch": 0.26, "learning_rate": 0.0007452254854758466, "loss": 0.0777, "theoretical_loss": 3.7289181198148458, "tokens_seen": 865337344 }, { "epoch": 0.26, "learning_rate": 0.0007451452415342642, "loss": 0.0771, "theoretical_loss": 3.7288085877954025, "tokens_seen": 865599488 }, { "epoch": 0.26, "learning_rate": 0.0007450649975926818, "loss": 0.0765, "theoretical_loss": 3.728699098227146, "tokens_seen": 865861632 }, { "epoch": 0.26, "learning_rate": 0.0007449847536510994, "loss": 0.0757, "theoretical_loss": 3.728589651080779, "tokens_seen": 866123776 }, { "epoch": 0.26, "learning_rate": 0.000744904509709517, "loss": 0.0753, "theoretical_loss": 3.728480246327032, "tokens_seen": 866385920 }, { "epoch": 0.26, "learning_rate": 0.0007448242657679345, "loss": 0.0744, "theoretical_loss": 3.7283708839366656, "tokens_seen": 866648064 }, { "epoch": 0.26, "learning_rate": 0.0007447440218263521, "loss": 0.0766, "theoretical_loss": 3.72826156388047, "tokens_seen": 866910208 }, { "epoch": 0.26, "learning_rate": 0.0007446637778847697, "loss": 0.0762, "theoretical_loss": 3.728152286129263, "tokens_seen": 867172352 }, { "epoch": 0.26, "learning_rate": 0.0007445835339431873, "loss": 0.0744, "theoretical_loss": 3.728043050653893, "tokens_seen": 867434496 }, { "epoch": 0.26, "learning_rate": 0.0007445032900016049, "loss": 0.0797, "theoretical_loss": 3.7279338574252354, "tokens_seen": 867696640 }, { "epoch": 0.26, "learning_rate": 0.0007444230460600226, "loss": 0.0779, "theoretical_loss": 3.7278247064141956, "tokens_seen": 867958784 }, { "epoch": 0.26, "learning_rate": 0.0007443428021184401, "loss": 0.0771, "theoretical_loss": 3.7277155975917076, "tokens_seen": 868220928 }, { "epoch": 0.26, "learning_rate": 0.0007442625581768576, "loss": 0.0779, "theoretical_loss": 3.7276065309287345, "tokens_seen": 868483072 }, { "epoch": 0.26, "learning_rate": 0.0007441823142352753, "loss": 0.0774, "theoretical_loss": 3.727497506396267, "tokens_seen": 868745216 }, { "epoch": 0.26, "learning_rate": 0.0007441020702936928, "loss": 0.075, "theoretical_loss": 3.7273885239653266, "tokens_seen": 869007360 }, { "epoch": 0.26, "learning_rate": 0.0007440218263521104, "loss": 0.0752, "theoretical_loss": 3.727279583606961, "tokens_seen": 869269504 }, { "epoch": 0.26, "learning_rate": 0.000743941582410528, "loss": 0.0763, "theoretical_loss": 3.727170685292248, "tokens_seen": 869531648 }, { "epoch": 0.26, "learning_rate": 0.0007438613384689456, "loss": 0.0769, "theoretical_loss": 3.7270618289922943, "tokens_seen": 869793792 }, { "epoch": 0.26, "learning_rate": 0.0007437810945273631, "loss": 0.0752, "theoretical_loss": 3.7269530146782337, "tokens_seen": 870055936 }, { "epoch": 0.26, "learning_rate": 0.0007437008505857808, "loss": 0.0808, "theoretical_loss": 3.72684424232123, "tokens_seen": 870318080 }, { "epoch": 0.26, "learning_rate": 0.0007436206066441984, "loss": 0.077, "theoretical_loss": 3.7267355118924748, "tokens_seen": 870580224 }, { "epoch": 0.26, "learning_rate": 0.000743540362702616, "loss": 0.0752, "theoretical_loss": 3.726626823363188, "tokens_seen": 870842368 }, { "epoch": 0.26, "learning_rate": 0.0007434601187610336, "loss": 0.0754, "theoretical_loss": 3.7265181767046176, "tokens_seen": 871104512 }, { "epoch": 0.26, "learning_rate": 0.0007433798748194511, "loss": 0.0772, "theoretical_loss": 3.726409571888042, "tokens_seen": 871366656 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.0006801082054153085, "objective/train/docs_used": 321822, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.6108815670013428, "objective/train/original_loss": 1.6108815670013428, "objective/train/theoretical_loss": 3.7263010088847652, "objective/train/tokens_used": 892088800, "objective/train/value_avg": -0.00757598876953125, "objective/train/value_loss": 0.00016946658433880657, "objective/train/value_max": -0.00016224384307861328, "objective/train/value_min": -0.28515625, "objective/train/value_reward_corr": 0.7011885448576807, "objective/train/value_std": 0.01305389404296875, "objective/train/weight_avg": 1.0007609128952026, "objective/train/weighted_lm_loss": 1.6116539239883423, "objective/train/weights_max": 1.1948398351669312, "objective/train/weights_min": 0.39612501859664917, "theoretical_loss": 3.7263010088847652, "tokens_seen": 871628800 }, { "epoch": 0.26, "learning_rate": 0.0007432996308778688, "loss": 0.0783, "theoretical_loss": 3.7263010088847652, "tokens_seen": 871628800 }, { "epoch": 0.26, "learning_rate": 0.0007432193869362863, "loss": 0.0751, "theoretical_loss": 3.726192487666121, "tokens_seen": 871890944 }, { "epoch": 0.26, "learning_rate": 0.0007431391429947038, "loss": 0.0762, "theoretical_loss": 3.7260840082034714, "tokens_seen": 872153088 }, { "epoch": 0.26, "learning_rate": 0.0007430588990531216, "loss": 0.0758, "theoretical_loss": 3.7259755704682065, "tokens_seen": 872415232 }, { "epoch": 0.26, "learning_rate": 0.0007429786551115391, "loss": 0.0765, "theoretical_loss": 3.7258671744317446, "tokens_seen": 872677376 }, { "epoch": 0.26, "learning_rate": 0.0007428984111699567, "loss": 0.0767, "theoretical_loss": 3.725758820065531, "tokens_seen": 872939520 }, { "epoch": 0.26, "learning_rate": 0.0007428181672283743, "loss": 0.0759, "theoretical_loss": 3.725650507341042, "tokens_seen": 873201664 }, { "epoch": 0.26, "learning_rate": 0.0007427379232867919, "loss": 0.0773, "theoretical_loss": 3.7255422362297788, "tokens_seen": 873463808 }, { "epoch": 0.26, "learning_rate": 0.0007426576793452094, "loss": 0.0758, "theoretical_loss": 3.7254340067032725, "tokens_seen": 873725952 }, { "epoch": 0.26, "learning_rate": 0.000742577435403627, "loss": 0.0765, "theoretical_loss": 3.7253258187330816, "tokens_seen": 873988096 }, { "epoch": 0.26, "learning_rate": 0.0007424971914620446, "loss": 0.0761, "theoretical_loss": 3.7252176722907926, "tokens_seen": 874250240 }, { "epoch": 0.27, "learning_rate": 0.0007424169475204621, "loss": 0.0786, "theoretical_loss": 3.725109567348021, "tokens_seen": 874512384 }, { "epoch": 0.27, "learning_rate": 0.0007423367035788799, "loss": 0.0777, "theoretical_loss": 3.725001503876408, "tokens_seen": 874774528 }, { "epoch": 0.27, "learning_rate": 0.0007422564596372974, "loss": 0.0753, "theoretical_loss": 3.7248934818476247, "tokens_seen": 875036672 }, { "epoch": 0.27, "learning_rate": 0.0007421762156957151, "loss": 0.0756, "theoretical_loss": 3.7247855012333693, "tokens_seen": 875298816 }, { "epoch": 0.27, "learning_rate": 0.0007420959717541326, "loss": 0.0758, "theoretical_loss": 3.7246775620053665, "tokens_seen": 875560960 }, { "epoch": 0.27, "learning_rate": 0.0007420157278125502, "loss": 0.0766, "theoretical_loss": 3.724569664135372, "tokens_seen": 875823104 }, { "epoch": 0.27, "learning_rate": 0.0007419354838709678, "loss": 0.0761, "theoretical_loss": 3.7244618075951657, "tokens_seen": 876085248 }, { "epoch": 0.27, "learning_rate": 0.0007418552399293853, "loss": 0.0779, "theoretical_loss": 3.7243539923565576, "tokens_seen": 876347392 }, { "epoch": 0.27, "learning_rate": 0.0007417749959878029, "loss": 0.0773, "theoretical_loss": 3.724246218391384, "tokens_seen": 876609536 }, { "epoch": 0.27, "learning_rate": 0.0007416947520462205, "loss": 0.0779, "theoretical_loss": 3.7241384856715096, "tokens_seen": 876871680 }, { "epoch": 0.27, "learning_rate": 0.0007416145081046381, "loss": 0.0754, "theoretical_loss": 3.724030794168826, "tokens_seen": 877133824 }, { "epoch": 0.27, "learning_rate": 0.0007415342641630557, "loss": 0.0752, "theoretical_loss": 3.723923143855253, "tokens_seen": 877395968 }, { "epoch": 0.27, "learning_rate": 0.0007414540202214734, "loss": 0.0744, "theoretical_loss": 3.723815534702738, "tokens_seen": 877658112 }, { "epoch": 0.27, "learning_rate": 0.0007413737762798909, "loss": 0.0784, "theoretical_loss": 3.7237079666832553, "tokens_seen": 877920256 }, { "epoch": 0.27, "objective/train/advantage_avg": 9.239621431333944e-05, "objective/train/docs_used": 324331, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5191644430160522, "objective/train/original_loss": 1.5191644430160522, "objective/train/theoretical_loss": 3.7236004397688065, "objective/train/tokens_used": 898642400, "objective/train/value_avg": -0.01021575927734375, "objective/train/value_loss": 0.0004930080031044781, "objective/train/value_max": -0.0001596212387084961, "objective/train/value_min": -0.9365234375, "objective/train/value_reward_corr": 0.6092888427252324, "objective/train/value_std": 0.017242431640625, "objective/train/weight_avg": 1.0003056526184082, "objective/train/weighted_lm_loss": 1.51966392993927, "objective/train/weights_max": 1.5377529859542847, "objective/train/weights_min": 0.36856982111930847, "theoretical_loss": 3.7236004397688065, "tokens_seen": 878182400 }, { "epoch": 0.27, "learning_rate": 0.0007412935323383084, "loss": 0.0779, "theoretical_loss": 3.7236004397688065, "tokens_seen": 878182400 }, { "epoch": 0.27, "learning_rate": 0.0007412132883967261, "loss": 0.0754, "theoretical_loss": 3.723492953931421, "tokens_seen": 878444544 }, { "epoch": 0.27, "learning_rate": 0.0007411330444551436, "loss": 0.0758, "theoretical_loss": 3.7233855091431565, "tokens_seen": 878706688 }, { "epoch": 0.27, "learning_rate": 0.0007410528005135613, "loss": 0.0759, "theoretical_loss": 3.723278105376096, "tokens_seen": 878968832 }, { "epoch": 0.27, "learning_rate": 0.0007409725565719788, "loss": 0.0747, "theoretical_loss": 3.723170742602351, "tokens_seen": 879230976 }, { "epoch": 0.27, "learning_rate": 0.0007408923126303964, "loss": 0.0772, "theoretical_loss": 3.7230634207940607, "tokens_seen": 879493120 }, { "epoch": 0.27, "learning_rate": 0.0007408120686888141, "loss": 0.0771, "theoretical_loss": 3.7229561399233906, "tokens_seen": 879755264 }, { "epoch": 0.27, "learning_rate": 0.0007407318247472316, "loss": 0.0789, "theoretical_loss": 3.7228488999625338, "tokens_seen": 880017408 }, { "epoch": 0.27, "learning_rate": 0.0007406515808056492, "loss": 0.0773, "theoretical_loss": 3.722741700883711, "tokens_seen": 880279552 }, { "epoch": 0.27, "learning_rate": 0.0007405713368640668, "loss": 0.0755, "theoretical_loss": 3.7226345426591694, "tokens_seen": 880541696 }, { "epoch": 0.27, "learning_rate": 0.0007404910929224844, "loss": 0.073, "theoretical_loss": 3.722527425261183, "tokens_seen": 880803840 }, { "epoch": 0.27, "learning_rate": 0.0007404108489809019, "loss": 0.0782, "theoretical_loss": 3.7224203486620535, "tokens_seen": 881065984 }, { "epoch": 0.27, "learning_rate": 0.0007403306050393196, "loss": 0.0775, "theoretical_loss": 3.7223133128341104, "tokens_seen": 881328128 }, { "epoch": 0.27, "learning_rate": 0.0007402503610977371, "loss": 0.0751, "theoretical_loss": 3.722206317749708, "tokens_seen": 881590272 }, { "epoch": 0.27, "learning_rate": 0.0007401701171561546, "loss": 0.0777, "theoretical_loss": 3.722099363381229, "tokens_seen": 881852416 }, { "epoch": 0.27, "learning_rate": 0.0007400898732145724, "loss": 0.0767, "theoretical_loss": 3.7219924497010837, "tokens_seen": 882114560 }, { "epoch": 0.27, "learning_rate": 0.0007400096292729899, "loss": 0.0767, "theoretical_loss": 3.721885576681708, "tokens_seen": 882376704 }, { "epoch": 0.27, "learning_rate": 0.0007399293853314075, "loss": 0.0778, "theoretical_loss": 3.7217787442955643, "tokens_seen": 882638848 }, { "epoch": 0.27, "learning_rate": 0.0007398491413898251, "loss": 0.0765, "theoretical_loss": 3.721671952515144, "tokens_seen": 882900992 }, { "epoch": 0.27, "learning_rate": 0.0007397688974482427, "loss": 0.0761, "theoretical_loss": 3.7215652013129628, "tokens_seen": 883163136 }, { "epoch": 0.27, "learning_rate": 0.0007396886535066603, "loss": 0.0756, "theoretical_loss": 3.7214584906615644, "tokens_seen": 883425280 }, { "epoch": 0.27, "learning_rate": 0.0007396084095650778, "loss": 0.0732, "theoretical_loss": 3.7213518205335196, "tokens_seen": 883687424 }, { "epoch": 0.27, "learning_rate": 0.0007395281656234954, "loss": 0.0764, "theoretical_loss": 3.721245190901425, "tokens_seen": 883949568 }, { "epoch": 0.27, "learning_rate": 0.000739447921681913, "loss": 0.0768, "theoretical_loss": 3.721138601737904, "tokens_seen": 884211712 }, { "epoch": 0.27, "learning_rate": 0.0007393676777403307, "loss": 0.0753, "theoretical_loss": 3.721032053015607, "tokens_seen": 884473856 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.0008710287511348724, "objective/train/docs_used": 326745, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.574365496635437, "objective/train/original_loss": 1.5743653774261475, "objective/train/theoretical_loss": 3.720925544707211, "objective/train/tokens_used": 905196000, "objective/train/value_avg": -0.007781982421875, "objective/train/value_loss": 0.00020630114886444062, "objective/train/value_max": -0.00011235475540161133, "objective/train/value_min": -0.50830078125, "objective/train/value_reward_corr": 0.6715271500942702, "objective/train/value_std": 0.013824462890625, "objective/train/weight_avg": 1.0009685754776, "objective/train/weighted_lm_loss": 1.576448917388916, "objective/train/weights_max": 1.275231957435608, "objective/train/weights_min": 0.3771900534629822, "theoretical_loss": 3.720925544707211, "tokens_seen": 884736000 }, { "epoch": 0.27, "learning_rate": 0.0007392874337987482, "loss": 0.0758, "theoretical_loss": 3.720925544707211, "tokens_seen": 884736000 }, { "epoch": 0.27, "learning_rate": 0.0007392071898571659, "loss": 0.0778, "theoretical_loss": 3.720819076785419, "tokens_seen": 884998144 }, { "epoch": 0.27, "learning_rate": 0.0007391269459155834, "loss": 0.074, "theoretical_loss": 3.720712649222961, "tokens_seen": 885260288 }, { "epoch": 0.27, "learning_rate": 0.0007390467019740009, "loss": 0.075, "theoretical_loss": 3.720606261992593, "tokens_seen": 885522432 }, { "epoch": 0.27, "learning_rate": 0.0007389664580324186, "loss": 0.0775, "theoretical_loss": 3.7204999150670988, "tokens_seen": 885784576 }, { "epoch": 0.27, "learning_rate": 0.0007388862140908361, "loss": 0.0753, "theoretical_loss": 3.7203936084192866, "tokens_seen": 886046720 }, { "epoch": 0.27, "learning_rate": 0.0007388059701492537, "loss": 0.0772, "theoretical_loss": 3.720287342021992, "tokens_seen": 886308864 }, { "epoch": 0.27, "learning_rate": 0.0007387257262076713, "loss": 0.0745, "theoretical_loss": 3.720181115848078, "tokens_seen": 886571008 }, { "epoch": 0.27, "learning_rate": 0.000738645482266089, "loss": 0.074, "theoretical_loss": 3.7200749298704316, "tokens_seen": 886833152 }, { "epoch": 0.27, "learning_rate": 0.0007385652383245065, "loss": 0.0778, "theoretical_loss": 3.7199687840619675, "tokens_seen": 887095296 }, { "epoch": 0.27, "learning_rate": 0.0007384849943829242, "loss": 0.0766, "theoretical_loss": 3.719862678395627, "tokens_seen": 887357440 }, { "epoch": 0.27, "learning_rate": 0.0007384047504413417, "loss": 0.0755, "theoretical_loss": 3.719756612844377, "tokens_seen": 887619584 }, { "epoch": 0.27, "learning_rate": 0.0007383245064997593, "loss": 0.0737, "theoretical_loss": 3.7196505873812105, "tokens_seen": 887881728 }, { "epoch": 0.27, "learning_rate": 0.0007382442625581769, "loss": 0.0764, "theoretical_loss": 3.7195446019791465, "tokens_seen": 888143872 }, { "epoch": 0.27, "learning_rate": 0.0007381640186165944, "loss": 0.0755, "theoretical_loss": 3.7194386566112314, "tokens_seen": 888406016 }, { "epoch": 0.27, "learning_rate": 0.0007380837746750121, "loss": 0.0737, "theoretical_loss": 3.7193327512505356, "tokens_seen": 888668160 }, { "epoch": 0.27, "learning_rate": 0.0007380035307334296, "loss": 0.0773, "theoretical_loss": 3.7192268858701576, "tokens_seen": 888930304 }, { "epoch": 0.27, "learning_rate": 0.0007379232867918472, "loss": 0.0742, "theoretical_loss": 3.7191210604432205, "tokens_seen": 889192448 }, { "epoch": 0.27, "learning_rate": 0.0007378430428502649, "loss": 0.0774, "theoretical_loss": 3.7190152749428735, "tokens_seen": 889454592 }, { "epoch": 0.27, "learning_rate": 0.0007377627989086824, "loss": 0.0758, "theoretical_loss": 3.7189095293422927, "tokens_seen": 889716736 }, { "epoch": 0.27, "learning_rate": 0.0007376825549671, "loss": 0.0772, "theoretical_loss": 3.71880382361468, "tokens_seen": 889978880 }, { "epoch": 0.27, "learning_rate": 0.0007376023110255176, "loss": 0.0766, "theoretical_loss": 3.7186981577332614, "tokens_seen": 890241024 }, { "epoch": 0.27, "learning_rate": 0.0007375220670839352, "loss": 0.0735, "theoretical_loss": 3.718592531671291, "tokens_seen": 890503168 }, { "epoch": 0.27, "learning_rate": 0.0007374418231423527, "loss": 0.0759, "theoretical_loss": 3.7184869454020477, "tokens_seen": 890765312 }, { "epoch": 0.27, "learning_rate": 0.0007373615792007704, "loss": 0.0747, "theoretical_loss": 3.7183813988988357, "tokens_seen": 891027456 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.0005592244560830295, "objective/train/docs_used": 329220, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.468513011932373, "objective/train/original_loss": 1.468513011932373, "objective/train/theoretical_loss": 3.7182758921349865, "objective/train/tokens_used": 911749600, "objective/train/value_avg": -0.00971221923828125, "objective/train/value_loss": 0.0005271669942885637, "objective/train/value_max": -0.00017535686492919922, "objective/train/value_min": -0.74658203125, "objective/train/value_reward_corr": 0.6262416599375313, "objective/train/value_std": 0.0177154541015625, "objective/train/weight_avg": 1.0007938146591187, "objective/train/weighted_lm_loss": 1.4691156148910522, "objective/train/weights_max": 1.5486183166503906, "objective/train/weights_min": 0.36942073702812195, "theoretical_loss": 3.7182758921349865, "tokens_seen": 891289600 }, { "epoch": 0.27, "learning_rate": 0.0007372813352591879, "loss": 0.078, "theoretical_loss": 3.7182758921349865, "tokens_seen": 891289600 }, { "epoch": 0.27, "learning_rate": 0.0007372010913176055, "loss": 0.076, "theoretical_loss": 3.718170425083856, "tokens_seen": 891551744 }, { "epoch": 0.27, "learning_rate": 0.0007371208473760232, "loss": 0.0755, "theoretical_loss": 3.718064997718826, "tokens_seen": 891813888 }, { "epoch": 0.27, "learning_rate": 0.0007370406034344407, "loss": 0.0754, "theoretical_loss": 3.7179596100133034, "tokens_seen": 892076032 }, { "epoch": 0.27, "learning_rate": 0.0007369603594928584, "loss": 0.078, "theoretical_loss": 3.7178542619407233, "tokens_seen": 892338176 }, { "epoch": 0.27, "learning_rate": 0.0007368801155512759, "loss": 0.0785, "theoretical_loss": 3.7177489534745427, "tokens_seen": 892600320 }, { "epoch": 0.27, "learning_rate": 0.0007367998716096935, "loss": 0.0786, "theoretical_loss": 3.717643684588247, "tokens_seen": 892862464 }, { "epoch": 0.27, "learning_rate": 0.0007367196276681111, "loss": 0.0766, "theoretical_loss": 3.7175384552553457, "tokens_seen": 893124608 }, { "epoch": 0.27, "learning_rate": 0.0007366393837265286, "loss": 0.0739, "theoretical_loss": 3.7174332654493742, "tokens_seen": 893386752 }, { "epoch": 0.27, "learning_rate": 0.0007365591397849462, "loss": 0.0766, "theoretical_loss": 3.717328115143894, "tokens_seen": 893648896 }, { "epoch": 0.27, "learning_rate": 0.0007364788958433638, "loss": 0.0766, "theoretical_loss": 3.717223004312491, "tokens_seen": 893911040 }, { "epoch": 0.27, "learning_rate": 0.0007363986519017815, "loss": 0.0767, "theoretical_loss": 3.717117932928777, "tokens_seen": 894173184 }, { "epoch": 0.27, "learning_rate": 0.000736318407960199, "loss": 0.0752, "theoretical_loss": 3.7170129009663886, "tokens_seen": 894435328 }, { "epoch": 0.27, "learning_rate": 0.0007362381640186167, "loss": 0.0777, "theoretical_loss": 3.716907908398989, "tokens_seen": 894697472 }, { "epoch": 0.27, "learning_rate": 0.0007361579200770342, "loss": 0.0756, "theoretical_loss": 3.7168029552002655, "tokens_seen": 894959616 }, { "epoch": 0.27, "learning_rate": 0.0007360776761354517, "loss": 0.0764, "theoretical_loss": 3.716698041343931, "tokens_seen": 895221760 }, { "epoch": 0.27, "learning_rate": 0.0007359974321938694, "loss": 0.0779, "theoretical_loss": 3.716593166803724, "tokens_seen": 895483904 }, { "epoch": 0.27, "learning_rate": 0.0007359171882522869, "loss": 0.0734, "theoretical_loss": 3.7164883315534087, "tokens_seen": 895746048 }, { "epoch": 0.27, "learning_rate": 0.0007358369443107046, "loss": 0.0773, "theoretical_loss": 3.7163835355667723, "tokens_seen": 896008192 }, { "epoch": 0.27, "learning_rate": 0.0007357567003691221, "loss": 0.074, "theoretical_loss": 3.7162787788176295, "tokens_seen": 896270336 }, { "epoch": 0.27, "learning_rate": 0.0007356764564275397, "loss": 0.0762, "theoretical_loss": 3.716174061279819, "tokens_seen": 896532480 }, { "epoch": 0.27, "learning_rate": 0.0007355962124859574, "loss": 0.0739, "theoretical_loss": 3.7160693829272047, "tokens_seen": 896794624 }, { "epoch": 0.27, "learning_rate": 0.000735515968544375, "loss": 0.0746, "theoretical_loss": 3.715964743733676, "tokens_seen": 897056768 }, { "epoch": 0.27, "learning_rate": 0.0007354357246027925, "loss": 0.0778, "theoretical_loss": 3.7158601436731464, "tokens_seen": 897318912 }, { "epoch": 0.27, "learning_rate": 0.0007353554806612101, "loss": 0.0749, "theoretical_loss": 3.715755582719556, "tokens_seen": 897581056 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.0012793752830475569, "objective/train/docs_used": 331577, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5417509078979492, "objective/train/original_loss": 1.5417510271072388, "objective/train/theoretical_loss": 3.7156510608468674, "objective/train/tokens_used": 918303200, "objective/train/value_avg": -0.007205963134765625, "objective/train/value_loss": 0.0003468697250355035, "objective/train/value_max": -8.684396743774414e-05, "objective/train/value_min": -0.890625, "objective/train/value_reward_corr": 0.5998975412340294, "objective/train/value_std": 0.014312744140625, "objective/train/weight_avg": 1.0014266967773438, "objective/train/weighted_lm_loss": 1.5438824892044067, "objective/train/weights_max": 1.7019463777542114, "objective/train/weights_min": 0.2276768684387207, "theoretical_loss": 3.7156510608468674, "tokens_seen": 897843200 }, { "epoch": 0.27, "learning_rate": 0.0007352752367196277, "loss": 0.072, "theoretical_loss": 3.7156510608468674, "tokens_seen": 897843200 }, { "epoch": 0.27, "learning_rate": 0.0007351949927780452, "loss": 0.0769, "theoretical_loss": 3.7155465780290706, "tokens_seen": 898105344 }, { "epoch": 0.27, "learning_rate": 0.0007351147488364629, "loss": 0.0755, "theoretical_loss": 3.7154421342401793, "tokens_seen": 898367488 }, { "epoch": 0.27, "learning_rate": 0.0007350345048948804, "loss": 0.0757, "theoretical_loss": 3.7153377294542325, "tokens_seen": 898629632 }, { "epoch": 0.27, "learning_rate": 0.000734954260953298, "loss": 0.0756, "theoretical_loss": 3.715233363645293, "tokens_seen": 898891776 }, { "epoch": 0.27, "learning_rate": 0.0007348740170117157, "loss": 0.0757, "theoretical_loss": 3.7151290367874497, "tokens_seen": 899153920 }, { "epoch": 0.27, "learning_rate": 0.0007347937730701332, "loss": 0.0744, "theoretical_loss": 3.715024748854815, "tokens_seen": 899416064 }, { "epoch": 0.27, "learning_rate": 0.0007347135291285509, "loss": 0.0753, "theoretical_loss": 3.714920499821528, "tokens_seen": 899678208 }, { "epoch": 0.27, "learning_rate": 0.0007346332851869684, "loss": 0.0746, "theoretical_loss": 3.7148162896617505, "tokens_seen": 899940352 }, { "epoch": 0.27, "learning_rate": 0.000734553041245386, "loss": 0.074, "theoretical_loss": 3.714712118349669, "tokens_seen": 900202496 }, { "epoch": 0.27, "learning_rate": 0.0007344727973038036, "loss": 0.0788, "theoretical_loss": 3.7146079858594976, "tokens_seen": 900464640 }, { "epoch": 0.27, "learning_rate": 0.0007343925533622212, "loss": 0.0753, "theoretical_loss": 3.714503892165471, "tokens_seen": 900726784 }, { "epoch": 0.27, "learning_rate": 0.0007343123094206387, "loss": 0.0758, "theoretical_loss": 3.714399837241851, "tokens_seen": 900988928 }, { "epoch": 0.27, "learning_rate": 0.0007342320654790563, "loss": 0.0748, "theoretical_loss": 3.7142958210629233, "tokens_seen": 901251072 }, { "epoch": 0.27, "learning_rate": 0.000734151821537474, "loss": 0.0769, "theoretical_loss": 3.714191843602998, "tokens_seen": 901513216 }, { "epoch": 0.27, "learning_rate": 0.0007340715775958915, "loss": 0.0769, "theoretical_loss": 3.7140879048364104, "tokens_seen": 901775360 }, { "epoch": 0.27, "learning_rate": 0.0007339913336543092, "loss": 0.0756, "theoretical_loss": 3.7139840047375183, "tokens_seen": 902037504 }, { "epoch": 0.27, "learning_rate": 0.0007339110897127267, "loss": 0.0759, "theoretical_loss": 3.713880143280707, "tokens_seen": 902299648 }, { "epoch": 0.27, "learning_rate": 0.0007338308457711443, "loss": 0.0758, "theoretical_loss": 3.713776320440383, "tokens_seen": 902561792 }, { "epoch": 0.27, "learning_rate": 0.0007337506018295619, "loss": 0.0756, "theoretical_loss": 3.7136725361909795, "tokens_seen": 902823936 }, { "epoch": 0.27, "learning_rate": 0.0007336703578879794, "loss": 0.0763, "theoretical_loss": 3.713568790506953, "tokens_seen": 903086080 }, { "epoch": 0.27, "learning_rate": 0.000733590113946397, "loss": 0.0745, "theoretical_loss": 3.7134650833627854, "tokens_seen": 903348224 }, { "epoch": 0.27, "learning_rate": 0.0007335098700048146, "loss": 0.0735, "theoretical_loss": 3.7133614147329808, "tokens_seen": 903610368 }, { "epoch": 0.27, "learning_rate": 0.0007334296260632322, "loss": 0.0775, "theoretical_loss": 3.7132577845920696, "tokens_seen": 903872512 }, { "epoch": 0.27, "learning_rate": 0.0007333493821216499, "loss": 0.076, "theoretical_loss": 3.7131541929146055, "tokens_seen": 904134656 }, { "epoch": 0.27, "objective/train/advantage_avg": -9.983734344132245e-05, "objective/train/docs_used": 333943, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4496132135391235, "objective/train/original_loss": 1.449613094329834, "objective/train/theoretical_loss": 3.713050639675166, "objective/train/tokens_used": 924856800, "objective/train/value_avg": -0.00740814208984375, "objective/train/value_loss": 0.00036323920357972383, "objective/train/value_max": -0.00014650821685791016, "objective/train/value_min": -0.87744140625, "objective/train/value_reward_corr": 0.7248527212737947, "objective/train/value_std": 0.014312744140625, "objective/train/weight_avg": 1.000061273574829, "objective/train/weighted_lm_loss": 1.4491292238235474, "objective/train/weights_max": 1.8285359144210815, "objective/train/weights_min": 0.3715519905090332, "theoretical_loss": 3.713050639675166, "tokens_seen": 904396800 }, { "epoch": 0.27, "learning_rate": 0.0007332691381800675, "loss": 0.0729, "theoretical_loss": 3.713050639675166, "tokens_seen": 904396800 }, { "epoch": 0.27, "learning_rate": 0.000733188894238485, "loss": 0.0731, "theoretical_loss": 3.712947124848354, "tokens_seen": 904658944 }, { "epoch": 0.27, "learning_rate": 0.0007331086502969026, "loss": 0.0771, "theoretical_loss": 3.7128436484087954, "tokens_seen": 904921088 }, { "epoch": 0.27, "learning_rate": 0.0007330284063553202, "loss": 0.0748, "theoretical_loss": 3.712740210331141, "tokens_seen": 905183232 }, { "epoch": 0.27, "learning_rate": 0.0007329481624137377, "loss": 0.0769, "theoretical_loss": 3.712636810590065, "tokens_seen": 905445376 }, { "epoch": 0.27, "learning_rate": 0.0007328679184721554, "loss": 0.0736, "theoretical_loss": 3.7125334491602664, "tokens_seen": 905707520 }, { "epoch": 0.27, "learning_rate": 0.0007327876745305729, "loss": 0.0743, "theoretical_loss": 3.712430126016467, "tokens_seen": 905969664 }, { "epoch": 0.27, "learning_rate": 0.0007327074305889905, "loss": 0.0751, "theoretical_loss": 3.7123268411334136, "tokens_seen": 906231808 }, { "epoch": 0.27, "learning_rate": 0.0007326271866474082, "loss": 0.0736, "theoretical_loss": 3.7122235944858772, "tokens_seen": 906493952 }, { "epoch": 0.27, "learning_rate": 0.0007325469427058258, "loss": 0.0734, "theoretical_loss": 3.712120386048652, "tokens_seen": 906756096 }, { "epoch": 0.27, "learning_rate": 0.0007324666987642433, "loss": 0.0758, "theoretical_loss": 3.712017215796556, "tokens_seen": 907018240 }, { "epoch": 0.27, "learning_rate": 0.0007323864548226609, "loss": 0.0744, "theoretical_loss": 3.7119140837044315, "tokens_seen": 907280384 }, { "epoch": 0.28, "learning_rate": 0.0007323062108810785, "loss": 0.0735, "theoretical_loss": 3.7118109897471445, "tokens_seen": 907542528 }, { "epoch": 0.28, "learning_rate": 0.000732225966939496, "loss": 0.0732, "theoretical_loss": 3.7117079338995858, "tokens_seen": 907804672 }, { "epoch": 0.28, "learning_rate": 0.0007321457229979137, "loss": 0.0767, "theoretical_loss": 3.7116049161366673, "tokens_seen": 908066816 }, { "epoch": 0.28, "learning_rate": 0.0007320654790563312, "loss": 0.0727, "theoretical_loss": 3.7115019364333275, "tokens_seen": 908328960 }, { "epoch": 0.28, "learning_rate": 0.000731985235114749, "loss": 0.0761, "theoretical_loss": 3.7113989947645276, "tokens_seen": 908591104 }, { "epoch": 0.28, "learning_rate": 0.0007319049911731665, "loss": 0.0754, "theoretical_loss": 3.711296091105252, "tokens_seen": 908853248 }, { "epoch": 0.28, "learning_rate": 0.000731824747231584, "loss": 0.0722, "theoretical_loss": 3.7111932254305096, "tokens_seen": 909115392 }, { "epoch": 0.28, "learning_rate": 0.0007317445032900017, "loss": 0.0747, "theoretical_loss": 3.7110903977153313, "tokens_seen": 909377536 }, { "epoch": 0.28, "learning_rate": 0.0007316642593484192, "loss": 0.0757, "theoretical_loss": 3.710987607934774, "tokens_seen": 909639680 }, { "epoch": 0.28, "learning_rate": 0.0007315840154068368, "loss": 0.0759, "theoretical_loss": 3.7108848560639167, "tokens_seen": 909901824 }, { "epoch": 0.28, "learning_rate": 0.0007315037714652544, "loss": 0.0753, "theoretical_loss": 3.7107821420778615, "tokens_seen": 910163968 }, { "epoch": 0.28, "learning_rate": 0.000731423527523672, "loss": 0.0744, "theoretical_loss": 3.7106794659517357, "tokens_seen": 910426112 }, { "epoch": 0.28, "learning_rate": 0.0007313432835820895, "loss": 0.0729, "theoretical_loss": 3.7105768276606885, "tokens_seen": 910688256 }, { "epoch": 0.28, "objective/train/advantage_avg": -0.001721195294521749, "objective/train/docs_used": 336141, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5599461793899536, "objective/train/original_loss": 1.5599462985992432, "objective/train/theoretical_loss": 3.710474227179893, "objective/train/tokens_used": 931410400, "objective/train/value_avg": -0.006168365478515625, "objective/train/value_loss": 0.00024355393543373793, "objective/train/value_max": -0.0001398324966430664, "objective/train/value_min": -0.4716796875, "objective/train/value_reward_corr": 0.6900471509482724, "objective/train/value_std": 0.0086822509765625, "objective/train/weight_avg": 0.9983928203582764, "objective/train/weighted_lm_loss": 1.5585672855377197, "objective/train/weights_max": 1.1376217603683472, "objective/train/weights_min": 0.3742261826992035, "theoretical_loss": 3.710474227179893, "tokens_seen": 910950400 }, { "epoch": 0.28, "learning_rate": 0.0007312630396405071, "loss": 0.0743, "theoretical_loss": 3.710474227179893, "tokens_seen": 910950400 }, { "epoch": 0.28, "learning_rate": 0.0007311827956989248, "loss": 0.0741, "theoretical_loss": 3.710371664484547, "tokens_seen": 911212544 }, { "epoch": 0.28, "learning_rate": 0.0007311025517573423, "loss": 0.0714, "theoretical_loss": 3.710269139549869, "tokens_seen": 911474688 }, { "epoch": 0.28, "learning_rate": 0.00073102230781576, "loss": 0.0724, "theoretical_loss": 3.7101666523511034, "tokens_seen": 911736832 }, { "epoch": 0.28, "learning_rate": 0.0007309420638741775, "loss": 0.075, "theoretical_loss": 3.710064202863517, "tokens_seen": 911998976 }, { "epoch": 0.28, "learning_rate": 0.0007308618199325952, "loss": 0.077, "theoretical_loss": 3.7099617910623994, "tokens_seen": 912261120 }, { "epoch": 0.28, "learning_rate": 0.0007307815759910127, "loss": 0.0751, "theoretical_loss": 3.7098594169230648, "tokens_seen": 912523264 }, { "epoch": 0.28, "learning_rate": 0.0007307013320494302, "loss": 0.0732, "theoretical_loss": 3.7097570804208497, "tokens_seen": 912785408 }, { "epoch": 0.28, "learning_rate": 0.0007306210881078479, "loss": 0.074, "theoretical_loss": 3.709654781531113, "tokens_seen": 913047552 }, { "epoch": 0.28, "learning_rate": 0.0007305408441662654, "loss": 0.0752, "theoretical_loss": 3.709552520229239, "tokens_seen": 913309696 }, { "epoch": 0.28, "learning_rate": 0.000730460600224683, "loss": 0.074, "theoretical_loss": 3.7094502964906337, "tokens_seen": 913571840 }, { "epoch": 0.28, "learning_rate": 0.0007303803562831007, "loss": 0.0741, "theoretical_loss": 3.709348110290726, "tokens_seen": 913833984 }, { "epoch": 0.28, "learning_rate": 0.0007303001123415183, "loss": 0.0742, "theoretical_loss": 3.7092459616049682, "tokens_seen": 914096128 }, { "epoch": 0.28, "learning_rate": 0.0007302198683999358, "loss": 0.0745, "theoretical_loss": 3.709143850408837, "tokens_seen": 914358272 }, { "epoch": 0.28, "learning_rate": 0.0007301396244583534, "loss": 0.074, "theoretical_loss": 3.7090417766778305, "tokens_seen": 914620416 }, { "epoch": 0.28, "learning_rate": 0.000730059380516771, "loss": 0.0736, "theoretical_loss": 3.7089397403874704, "tokens_seen": 914882560 }, { "epoch": 0.28, "learning_rate": 0.0007299791365751885, "loss": 0.0742, "theoretical_loss": 3.708837741513301, "tokens_seen": 915144704 }, { "epoch": 0.28, "learning_rate": 0.0007298988926336062, "loss": 0.0751, "theoretical_loss": 3.7087357800308904, "tokens_seen": 915406848 }, { "epoch": 0.28, "learning_rate": 0.0007298186486920237, "loss": 0.0734, "theoretical_loss": 3.708633855915829, "tokens_seen": 915668992 }, { "epoch": 0.28, "learning_rate": 0.0007297384047504413, "loss": 0.0739, "theoretical_loss": 3.708531969143731, "tokens_seen": 915931136 }, { "epoch": 0.28, "learning_rate": 0.000729658160808859, "loss": 0.0766, "theoretical_loss": 3.708430119690232, "tokens_seen": 916193280 }, { "epoch": 0.28, "learning_rate": 0.0007295779168672765, "loss": 0.0748, "theoretical_loss": 3.708328307530991, "tokens_seen": 916455424 }, { "epoch": 0.28, "learning_rate": 0.0007294976729256942, "loss": 0.0736, "theoretical_loss": 3.7082265326416914, "tokens_seen": 916717568 }, { "epoch": 0.28, "learning_rate": 0.0007294174289841117, "loss": 0.0732, "theoretical_loss": 3.708124794998037, "tokens_seen": 916979712 }, { "epoch": 0.28, "learning_rate": 0.0007293371850425293, "loss": 0.0722, "theoretical_loss": 3.708023094575756, "tokens_seen": 917241856 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.0011513980571180582, "objective/train/docs_used": 338425, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.586716890335083, "objective/train/original_loss": 1.5867170095443726, "objective/train/theoretical_loss": 3.7079214313505986, "objective/train/tokens_used": 937964000, "objective/train/value_avg": -0.0078125, "objective/train/value_loss": 0.00020614459936041385, "objective/train/value_max": -0.00014650821685791016, "objective/train/value_min": -0.955078125, "objective/train/value_reward_corr": 0.6817206994700864, "objective/train/value_std": 0.014862060546875, "objective/train/weight_avg": 1.0012516975402832, "objective/train/weighted_lm_loss": 1.588771939277649, "objective/train/weights_max": 2.5229907035827637, "objective/train/weights_min": 0.39287444949150085, "theoretical_loss": 3.7079214313505986, "tokens_seen": 917504000 }, { "epoch": 0.28, "learning_rate": 0.0007292569411009469, "loss": 0.0756, "theoretical_loss": 3.7079214313505986, "tokens_seen": 917504000 }, { "epoch": 0.28, "learning_rate": 0.0007291766971593645, "loss": 0.073, "theoretical_loss": 3.707819805298338, "tokens_seen": 917766144 }, { "epoch": 0.28, "learning_rate": 0.000729096453217782, "loss": 0.0756, "theoretical_loss": 3.7077182163947704, "tokens_seen": 918028288 }, { "epoch": 0.28, "learning_rate": 0.0007290162092761998, "loss": 0.0748, "theoretical_loss": 3.7076166646157134, "tokens_seen": 918290432 }, { "epoch": 0.28, "learning_rate": 0.0007289359653346173, "loss": 0.075, "theoretical_loss": 3.7075151499370094, "tokens_seen": 918552576 }, { "epoch": 0.28, "learning_rate": 0.0007288557213930348, "loss": 0.0768, "theoretical_loss": 3.7074136723345212, "tokens_seen": 918814720 }, { "epoch": 0.28, "learning_rate": 0.0007287754774514525, "loss": 0.0768, "theoretical_loss": 3.707312231784136, "tokens_seen": 919076864 }, { "epoch": 0.28, "learning_rate": 0.00072869523350987, "loss": 0.0745, "theoretical_loss": 3.7072108282617617, "tokens_seen": 919339008 }, { "epoch": 0.28, "learning_rate": 0.0007286149895682876, "loss": 0.0756, "theoretical_loss": 3.7071094617433307, "tokens_seen": 919601152 }, { "epoch": 0.28, "learning_rate": 0.0007285347456267052, "loss": 0.0748, "theoretical_loss": 3.707008132204796, "tokens_seen": 919863296 }, { "epoch": 0.28, "learning_rate": 0.0007284545016851228, "loss": 0.0734, "theoretical_loss": 3.7069068396221345, "tokens_seen": 920125440 }, { "epoch": 0.28, "learning_rate": 0.0007283742577435403, "loss": 0.0732, "theoretical_loss": 3.706805583971345, "tokens_seen": 920387584 }, { "epoch": 0.28, "learning_rate": 0.0007282940138019579, "loss": 0.0756, "theoretical_loss": 3.7067043652284495, "tokens_seen": 920649728 }, { "epoch": 0.28, "learning_rate": 0.0007282137698603756, "loss": 0.0809, "theoretical_loss": 3.7066031833694906, "tokens_seen": 920911872 }, { "epoch": 0.28, "learning_rate": 0.0007281335259187932, "loss": 0.0763, "theoretical_loss": 3.7065020383705347, "tokens_seen": 921174016 }, { "epoch": 0.28, "learning_rate": 0.0007280532819772108, "loss": 0.0719, "theoretical_loss": 3.70640093020767, "tokens_seen": 921436160 }, { "epoch": 0.28, "learning_rate": 0.0007279730380356283, "loss": 0.0762, "theoretical_loss": 3.7062998588570073, "tokens_seen": 921698304 }, { "epoch": 0.28, "learning_rate": 0.000727892794094046, "loss": 0.074, "theoretical_loss": 3.7061988242946793, "tokens_seen": 921960448 }, { "epoch": 0.28, "learning_rate": 0.0007278125501524635, "loss": 0.0744, "theoretical_loss": 3.7060978264968423, "tokens_seen": 922222592 }, { "epoch": 0.28, "learning_rate": 0.000727732306210881, "loss": 0.0749, "theoretical_loss": 3.705996865439672, "tokens_seen": 922484736 }, { "epoch": 0.28, "learning_rate": 0.0007276520622692987, "loss": 0.0727, "theoretical_loss": 3.7058959410993695, "tokens_seen": 922746880 }, { "epoch": 0.28, "learning_rate": 0.0007275718183277162, "loss": 0.0743, "theoretical_loss": 3.7057950534521558, "tokens_seen": 923009024 }, { "epoch": 0.28, "learning_rate": 0.0007274915743861338, "loss": 0.0763, "theoretical_loss": 3.705694202474275, "tokens_seen": 923271168 }, { "epoch": 0.28, "learning_rate": 0.0007274113304445515, "loss": 0.0742, "theoretical_loss": 3.7055933881419936, "tokens_seen": 923533312 }, { "epoch": 0.28, "learning_rate": 0.0007273310865029691, "loss": 0.0705, "theoretical_loss": 3.7054926104315995, "tokens_seen": 923795456 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.0009234403842128813, "objective/train/docs_used": 340768, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5103869438171387, "objective/train/original_loss": 1.5103867053985596, "objective/train/theoretical_loss": 3.705391869319403, "objective/train/tokens_used": 944517600, "objective/train/value_avg": -0.00710296630859375, "objective/train/value_loss": 0.00018672032456379384, "objective/train/value_max": -0.00013136863708496094, "objective/train/value_min": -0.277099609375, "objective/train/value_reward_corr": 0.6575917614537756, "objective/train/value_std": 0.01128387451171875, "objective/train/weight_avg": 1.001011610031128, "objective/train/weighted_lm_loss": 1.5110077857971191, "objective/train/weights_max": 1.3192977905273438, "objective/train/weights_min": 0.3727017939090729, "theoretical_loss": 3.705391869319403, "tokens_seen": 924057600 }, { "epoch": 0.28, "learning_rate": 0.0007272508425613866, "loss": 0.0732, "theoretical_loss": 3.705391869319403, "tokens_seen": 924057600 }, { "epoch": 0.28, "learning_rate": 0.0007271705986198042, "loss": 0.0717, "theoretical_loss": 3.7052911647817357, "tokens_seen": 924319744 }, { "epoch": 0.28, "learning_rate": 0.0007270903546782218, "loss": 0.0752, "theoretical_loss": 3.7051904967949527, "tokens_seen": 924581888 }, { "epoch": 0.28, "learning_rate": 0.0007270101107366394, "loss": 0.0711, "theoretical_loss": 3.7050898653354296, "tokens_seen": 924844032 }, { "epoch": 0.28, "learning_rate": 0.000726929866795057, "loss": 0.0745, "theoretical_loss": 3.7049892703795653, "tokens_seen": 925106176 }, { "epoch": 0.28, "learning_rate": 0.0007268496228534745, "loss": 0.0739, "theoretical_loss": 3.70488871190378, "tokens_seen": 925368320 }, { "epoch": 0.28, "learning_rate": 0.0007267693789118923, "loss": 0.0737, "theoretical_loss": 3.704788189884515, "tokens_seen": 925630464 }, { "epoch": 0.28, "learning_rate": 0.0007266891349703098, "loss": 0.0753, "theoretical_loss": 3.7046877042982347, "tokens_seen": 925892608 }, { "epoch": 0.28, "learning_rate": 0.0007266088910287273, "loss": 0.0757, "theoretical_loss": 3.7045872551214254, "tokens_seen": 926154752 }, { "epoch": 0.28, "learning_rate": 0.000726528647087145, "loss": 0.0719, "theoretical_loss": 3.704486842330594, "tokens_seen": 926416896 }, { "epoch": 0.28, "learning_rate": 0.0007264484031455625, "loss": 0.074, "theoretical_loss": 3.7043864659022696, "tokens_seen": 926679040 }, { "epoch": 0.28, "learning_rate": 0.0007263681592039801, "loss": 0.0714, "theoretical_loss": 3.704286125813004, "tokens_seen": 926941184 }, { "epoch": 0.28, "learning_rate": 0.0007262879152623977, "loss": 0.074, "theoretical_loss": 3.7041858220393706, "tokens_seen": 927203328 }, { "epoch": 0.28, "learning_rate": 0.0007262076713208153, "loss": 0.0731, "theoretical_loss": 3.704085554557964, "tokens_seen": 927465472 }, { "epoch": 0.28, "learning_rate": 0.0007261274273792328, "loss": 0.0728, "theoretical_loss": 3.703985323345399, "tokens_seen": 927727616 }, { "epoch": 0.28, "learning_rate": 0.0007260471834376505, "loss": 0.0705, "theoretical_loss": 3.7038851283783156, "tokens_seen": 927989760 }, { "epoch": 0.28, "learning_rate": 0.000725966939496068, "loss": 0.075, "theoretical_loss": 3.7037849696333724, "tokens_seen": 928251904 }, { "epoch": 0.28, "learning_rate": 0.0007258866955544856, "loss": 0.0765, "theoretical_loss": 3.703684847087251, "tokens_seen": 928514048 }, { "epoch": 0.28, "learning_rate": 0.0007258064516129033, "loss": 0.0759, "theoretical_loss": 3.7035847607166534, "tokens_seen": 928776192 }, { "epoch": 0.28, "learning_rate": 0.0007257262076713208, "loss": 0.0734, "theoretical_loss": 3.703484710498306, "tokens_seen": 929038336 }, { "epoch": 0.28, "learning_rate": 0.0007256459637297385, "loss": 0.0801, "theoretical_loss": 3.703384696408953, "tokens_seen": 929300480 }, { "epoch": 0.28, "learning_rate": 0.000725565719788156, "loss": 0.0735, "theoretical_loss": 3.7032847184253628, "tokens_seen": 929562624 }, { "epoch": 0.28, "learning_rate": 0.0007254854758465736, "loss": 0.0732, "theoretical_loss": 3.7031847765243233, "tokens_seen": 929824768 }, { "epoch": 0.28, "learning_rate": 0.0007254052319049912, "loss": 0.0713, "theoretical_loss": 3.7030848706826465, "tokens_seen": 930086912 }, { "epoch": 0.28, "learning_rate": 0.0007253249879634087, "loss": 0.0752, "theoretical_loss": 3.702985000877163, "tokens_seen": 930349056 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.0017973899375647306, "objective/train/docs_used": 343003, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5838013887405396, "objective/train/original_loss": 1.5838017463684082, "objective/train/theoretical_loss": 3.7028851670847267, "objective/train/tokens_used": 951071200, "objective/train/value_avg": -0.009552001953125, "objective/train/value_loss": 0.00028080277843400836, "objective/train/value_max": -0.00020182132720947266, "objective/train/value_min": -0.52685546875, "objective/train/value_reward_corr": 0.687662785571624, "objective/train/value_std": 0.0157623291015625, "objective/train/weight_avg": 1.001927137374878, "objective/train/weighted_lm_loss": 1.586591362953186, "objective/train/weights_max": 1.4974627494812012, "objective/train/weights_min": 0.3688567578792572, "theoretical_loss": 3.7028851670847267, "tokens_seen": 930611200 }, { "epoch": 0.28, "learning_rate": 0.0007252447440218264, "loss": 0.0745, "theoretical_loss": 3.7028851670847267, "tokens_seen": 930611200 }, { "epoch": 0.28, "learning_rate": 0.000725164500080244, "loss": 0.0729, "theoretical_loss": 3.7027853692822124, "tokens_seen": 930873344 }, { "epoch": 0.28, "learning_rate": 0.0007250842561386616, "loss": 0.076, "theoretical_loss": 3.702685607446516, "tokens_seen": 931135488 }, { "epoch": 0.28, "learning_rate": 0.0007250040121970791, "loss": 0.076, "theoretical_loss": 3.7025858815545543, "tokens_seen": 931397632 }, { "epoch": 0.28, "learning_rate": 0.0007249237682554968, "loss": 0.0746, "theoretical_loss": 3.7024861915832665, "tokens_seen": 931659776 }, { "epoch": 0.28, "learning_rate": 0.0007248435243139143, "loss": 0.0739, "theoretical_loss": 3.7023865375096126, "tokens_seen": 931921920 }, { "epoch": 0.28, "learning_rate": 0.0007247632803723318, "loss": 0.0757, "theoretical_loss": 3.7022869193105734, "tokens_seen": 932184064 }, { "epoch": 0.28, "learning_rate": 0.0007246830364307495, "loss": 0.0728, "theoretical_loss": 3.702187336963151, "tokens_seen": 932446208 }, { "epoch": 0.28, "learning_rate": 0.000724602792489167, "loss": 0.0741, "theoretical_loss": 3.70208779044437, "tokens_seen": 932708352 }, { "epoch": 0.28, "learning_rate": 0.0007245225485475848, "loss": 0.0719, "theoretical_loss": 3.7019882797312746, "tokens_seen": 932970496 }, { "epoch": 0.28, "learning_rate": 0.0007244423046060023, "loss": 0.0794, "theoretical_loss": 3.701888804800931, "tokens_seen": 933232640 }, { "epoch": 0.28, "learning_rate": 0.0007243620606644199, "loss": 0.071, "theoretical_loss": 3.701789365630426, "tokens_seen": 933494784 }, { "epoch": 0.28, "learning_rate": 0.0007242818167228375, "loss": 0.0728, "theoretical_loss": 3.701689962196868, "tokens_seen": 933756928 }, { "epoch": 0.28, "learning_rate": 0.000724201572781255, "loss": 0.0769, "theoretical_loss": 3.701590594477387, "tokens_seen": 934019072 }, { "epoch": 0.28, "learning_rate": 0.0007241213288396726, "loss": 0.0766, "theoretical_loss": 3.701491262449131, "tokens_seen": 934281216 }, { "epoch": 0.28, "learning_rate": 0.0007240410848980902, "loss": 0.0764, "theoretical_loss": 3.7013919660892736, "tokens_seen": 934543360 }, { "epoch": 0.28, "learning_rate": 0.0007239608409565078, "loss": 0.0743, "theoretical_loss": 3.701292705375006, "tokens_seen": 934805504 }, { "epoch": 0.28, "learning_rate": 0.0007238805970149253, "loss": 0.0755, "theoretical_loss": 3.701193480283542, "tokens_seen": 935067648 }, { "epoch": 0.28, "learning_rate": 0.000723800353073343, "loss": 0.0718, "theoretical_loss": 3.701094290792116, "tokens_seen": 935329792 }, { "epoch": 0.28, "learning_rate": 0.0007237201091317606, "loss": 0.0755, "theoretical_loss": 3.7009951368779825, "tokens_seen": 935591936 }, { "epoch": 0.28, "learning_rate": 0.0007236398651901781, "loss": 0.0729, "theoretical_loss": 3.700896018518418, "tokens_seen": 935854080 }, { "epoch": 0.28, "learning_rate": 0.0007235596212485958, "loss": 0.0756, "theoretical_loss": 3.700796935690719, "tokens_seen": 936116224 }, { "epoch": 0.28, "learning_rate": 0.0007234793773070133, "loss": 0.0736, "theoretical_loss": 3.700697888372204, "tokens_seen": 936378368 }, { "epoch": 0.28, "learning_rate": 0.0007233991333654309, "loss": 0.0746, "theoretical_loss": 3.7005988765402114, "tokens_seen": 936640512 }, { "epoch": 0.28, "learning_rate": 0.0007233188894238485, "loss": 0.0735, "theoretical_loss": 3.700499900172101, "tokens_seen": 936902656 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.0006104443455114961, "objective/train/docs_used": 344763, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4431649446487427, "objective/train/original_loss": 1.4431649446487427, "objective/train/theoretical_loss": 3.700400959245252, "objective/train/tokens_used": 957624800, "objective/train/value_avg": -0.01019287109375, "objective/train/value_loss": 0.00036883429856970906, "objective/train/value_max": -0.0001252889633178711, "objective/train/value_min": -0.8720703125, "objective/train/value_reward_corr": 0.8053598602672474, "objective/train/value_std": 0.0275726318359375, "objective/train/weight_avg": 1.000779628753662, "objective/train/weighted_lm_loss": 1.4440263509750366, "objective/train/weights_max": 1.7109980583190918, "objective/train/weights_min": 0.3686527907848358, "theoretical_loss": 3.700400959245252, "tokens_seen": 937164800 }, { "epoch": 0.28, "learning_rate": 0.0007232386454822661, "loss": 0.0714, "theoretical_loss": 3.700400959245252, "tokens_seen": 937164800 }, { "epoch": 0.28, "learning_rate": 0.0007231584015406837, "loss": 0.0727, "theoretical_loss": 3.7003020537370657, "tokens_seen": 937426944 }, { "epoch": 0.28, "learning_rate": 0.0007230781575991013, "loss": 0.0735, "theoretical_loss": 3.7002031836249643, "tokens_seen": 937689088 }, { "epoch": 0.28, "learning_rate": 0.0007229979136575189, "loss": 0.0755, "theoretical_loss": 3.7001043488863896, "tokens_seen": 937951232 }, { "epoch": 0.28, "learning_rate": 0.0007229176697159365, "loss": 0.0751, "theoretical_loss": 3.7000055494988047, "tokens_seen": 938213376 }, { "epoch": 0.28, "learning_rate": 0.0007228374257743541, "loss": 0.0768, "theoretical_loss": 3.6999067854396936, "tokens_seen": 938475520 }, { "epoch": 0.28, "learning_rate": 0.0007227571818327716, "loss": 0.0713, "theoretical_loss": 3.6998080566865608, "tokens_seen": 938737664 }, { "epoch": 0.28, "learning_rate": 0.0007226769378911893, "loss": 0.0738, "theoretical_loss": 3.6997093632169307, "tokens_seen": 938999808 }, { "epoch": 0.28, "learning_rate": 0.0007225966939496068, "loss": 0.0765, "theoretical_loss": 3.699610705008349, "tokens_seen": 939261952 }, { "epoch": 0.28, "learning_rate": 0.0007225164500080244, "loss": 0.0705, "theoretical_loss": 3.6995120820383818, "tokens_seen": 939524096 }, { "epoch": 0.28, "learning_rate": 0.000722436206066442, "loss": 0.0741, "theoretical_loss": 3.6994134942846157, "tokens_seen": 939786240 }, { "epoch": 0.28, "learning_rate": 0.0007223559621248595, "loss": 0.0758, "theoretical_loss": 3.6993149417246576, "tokens_seen": 940048384 }, { "epoch": 0.28, "learning_rate": 0.0007222757181832772, "loss": 0.0735, "theoretical_loss": 3.699216424336135, "tokens_seen": 940310528 }, { "epoch": 0.29, "learning_rate": 0.0007221954742416948, "loss": 0.0745, "theoretical_loss": 3.6991179420966964, "tokens_seen": 940572672 }, { "epoch": 0.29, "learning_rate": 0.0007221152303001124, "loss": 0.0728, "theoretical_loss": 3.69901949498401, "tokens_seen": 940834816 }, { "epoch": 0.29, "learning_rate": 0.0007220349863585299, "loss": 0.0759, "theoretical_loss": 3.6989210829757644, "tokens_seen": 941096960 }, { "epoch": 0.29, "learning_rate": 0.0007219547424169476, "loss": 0.0756, "theoretical_loss": 3.6988227060496692, "tokens_seen": 941359104 }, { "epoch": 0.29, "learning_rate": 0.0007218744984753651, "loss": 0.0761, "theoretical_loss": 3.6987243641834535, "tokens_seen": 941621248 }, { "epoch": 0.29, "learning_rate": 0.0007217942545337827, "loss": 0.0743, "theoretical_loss": 3.6986260573548675, "tokens_seen": 941883392 }, { "epoch": 0.29, "learning_rate": 0.0007217140105922003, "loss": 0.0746, "theoretical_loss": 3.698527785541682, "tokens_seen": 942145536 }, { "epoch": 0.29, "learning_rate": 0.0007216337666506178, "loss": 0.0741, "theoretical_loss": 3.6984295487216867, "tokens_seen": 942407680 }, { "epoch": 0.29, "learning_rate": 0.0007215535227090356, "loss": 0.0745, "theoretical_loss": 3.6983313468726924, "tokens_seen": 942669824 }, { "epoch": 0.29, "learning_rate": 0.0007214732787674531, "loss": 0.0772, "theoretical_loss": 3.6982331799725303, "tokens_seen": 942931968 }, { "epoch": 0.29, "learning_rate": 0.0007213930348258707, "loss": 0.0733, "theoretical_loss": 3.6981350479990525, "tokens_seen": 943194112 }, { "epoch": 0.29, "learning_rate": 0.0007213127908842883, "loss": 0.078, "theoretical_loss": 3.6980369509301285, "tokens_seen": 943456256 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.0005385353579185903, "objective/train/docs_used": 346959, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3849838972091675, "objective/train/original_loss": 1.384983777999878, "objective/train/theoretical_loss": 3.6979388887436517, "objective/train/tokens_used": 964178400, "objective/train/value_avg": -0.008209228515625, "objective/train/value_loss": 0.00034320203121751547, "objective/train/value_max": -5.8770179748535156e-05, "objective/train/value_min": -0.97412109375, "objective/train/value_reward_corr": 0.7214583684226242, "objective/train/value_std": 0.0195465087890625, "objective/train/weight_avg": 1.0007014274597168, "objective/train/weighted_lm_loss": 1.3859144449234009, "objective/train/weights_max": 2.094869613647461, "objective/train/weights_min": 0.3750951290130615, "theoretical_loss": 3.6979388887436517, "tokens_seen": 943718400 }, { "epoch": 0.29, "learning_rate": 0.0007212325469427058, "loss": 0.0757, "theoretical_loss": 3.6979388887436517, "tokens_seen": 943718400 }, { "epoch": 0.29, "learning_rate": 0.0007211523030011234, "loss": 0.0757, "theoretical_loss": 3.697840861417533, "tokens_seen": 943980544 }, { "epoch": 0.29, "learning_rate": 0.000721072059059541, "loss": 0.0754, "theoretical_loss": 3.697742868929704, "tokens_seen": 944242688 }, { "epoch": 0.29, "learning_rate": 0.0007209918151179586, "loss": 0.0744, "theoretical_loss": 3.6976449112581173, "tokens_seen": 944504832 }, { "epoch": 0.29, "learning_rate": 0.0007209115711763761, "loss": 0.0744, "theoretical_loss": 3.697546988380744, "tokens_seen": 944766976 }, { "epoch": 0.29, "learning_rate": 0.0007208313272347939, "loss": 0.076, "theoretical_loss": 3.697449100275577, "tokens_seen": 945029120 }, { "epoch": 0.29, "learning_rate": 0.0007207510832932114, "loss": 0.0726, "theoretical_loss": 3.6973512469206278, "tokens_seen": 945291264 }, { "epoch": 0.29, "learning_rate": 0.000720670839351629, "loss": 0.0732, "theoretical_loss": 3.6972534282939282, "tokens_seen": 945553408 }, { "epoch": 0.29, "learning_rate": 0.0007205905954100466, "loss": 0.0726, "theoretical_loss": 3.6971556443735314, "tokens_seen": 945815552 }, { "epoch": 0.29, "learning_rate": 0.0007205103514684641, "loss": 0.0709, "theoretical_loss": 3.697057895137508, "tokens_seen": 946077696 }, { "epoch": 0.29, "learning_rate": 0.0007204301075268818, "loss": 0.077, "theoretical_loss": 3.696960180563951, "tokens_seen": 946339840 }, { "epoch": 0.29, "learning_rate": 0.0007203498635852993, "loss": 0.0751, "theoretical_loss": 3.6968625006309717, "tokens_seen": 946601984 }, { "epoch": 0.29, "learning_rate": 0.0007202696196437169, "loss": 0.0745, "theoretical_loss": 3.6967648553167014, "tokens_seen": 946864128 }, { "epoch": 0.29, "learning_rate": 0.0007201893757021345, "loss": 0.0737, "theoretical_loss": 3.696667244599292, "tokens_seen": 947126272 }, { "epoch": 0.29, "learning_rate": 0.000720109131760552, "loss": 0.0758, "theoretical_loss": 3.6965696684569154, "tokens_seen": 947388416 }, { "epoch": 0.29, "learning_rate": 0.0007200288878189697, "loss": 0.0759, "theoretical_loss": 3.6964721268677616, "tokens_seen": 947650560 }, { "epoch": 0.29, "learning_rate": 0.0007199486438773873, "loss": 0.0743, "theoretical_loss": 3.696374619810043, "tokens_seen": 947912704 }, { "epoch": 0.29, "learning_rate": 0.0007198683999358049, "loss": 0.0713, "theoretical_loss": 3.6962771472619886, "tokens_seen": 948174848 }, { "epoch": 0.29, "learning_rate": 0.0007197881559942224, "loss": 0.0743, "theoretical_loss": 3.69617970920185, "tokens_seen": 948436992 }, { "epoch": 0.29, "learning_rate": 0.0007197079120526401, "loss": 0.0767, "theoretical_loss": 3.6960823056078973, "tokens_seen": 948699136 }, { "epoch": 0.29, "learning_rate": 0.0007196276681110576, "loss": 0.0766, "theoretical_loss": 3.6959849364584203, "tokens_seen": 948961280 }, { "epoch": 0.29, "learning_rate": 0.0007195474241694752, "loss": 0.0752, "theoretical_loss": 3.695887601731728, "tokens_seen": 949223424 }, { "epoch": 0.29, "learning_rate": 0.0007194671802278928, "loss": 0.0764, "theoretical_loss": 3.69579030140615, "tokens_seen": 949485568 }, { "epoch": 0.29, "learning_rate": 0.0007193869362863103, "loss": 0.075, "theoretical_loss": 3.6956930354600352, "tokens_seen": 949747712 }, { "epoch": 0.29, "learning_rate": 0.0007193066923447281, "loss": 0.0748, "theoretical_loss": 3.6955958038717522, "tokens_seen": 950009856 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.000604399829171598, "objective/train/docs_used": 349336, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.432031512260437, "objective/train/original_loss": 1.432031512260437, "objective/train/theoretical_loss": 3.695498606619688, "objective/train/tokens_used": 970732000, "objective/train/value_avg": -0.007843017578125, "objective/train/value_loss": 0.00045492048957385123, "objective/train/value_max": -0.00015842914581298828, "objective/train/value_min": -0.95703125, "objective/train/value_reward_corr": 0.5685032250536571, "objective/train/value_std": 0.01549530029296875, "objective/train/weight_avg": 1.0007842779159546, "objective/train/weighted_lm_loss": 1.4333014488220215, "objective/train/weights_max": 2.597604990005493, "objective/train/weights_min": 0.05960186570882797, "theoretical_loss": 3.695498606619688, "tokens_seen": 950272000 }, { "epoch": 0.29, "learning_rate": 0.0007192264484031456, "loss": 0.0746, "theoretical_loss": 3.695498606619688, "tokens_seen": 950272000 }, { "epoch": 0.29, "learning_rate": 0.0007191462044615632, "loss": 0.0727, "theoretical_loss": 3.6954014436822513, "tokens_seen": 950534144 }, { "epoch": 0.29, "learning_rate": 0.0007190659605199808, "loss": 0.0749, "theoretical_loss": 3.695304315037868, "tokens_seen": 950796288 }, { "epoch": 0.29, "learning_rate": 0.0007189857165783984, "loss": 0.0744, "theoretical_loss": 3.6952072206649857, "tokens_seen": 951058432 }, { "epoch": 0.29, "learning_rate": 0.0007189054726368159, "loss": 0.075, "theoretical_loss": 3.695110160542069, "tokens_seen": 951320576 }, { "epoch": 0.29, "learning_rate": 0.0007188252286952335, "loss": 0.0741, "theoretical_loss": 3.6950131346476054, "tokens_seen": 951582720 }, { "epoch": 0.29, "learning_rate": 0.0007187449847536511, "loss": 0.0753, "theoretical_loss": 3.694916142960098, "tokens_seen": 951844864 }, { "epoch": 0.29, "learning_rate": 0.0007186647408120686, "loss": 0.0723, "theoretical_loss": 3.6948191854580728, "tokens_seen": 952107008 }, { "epoch": 0.29, "learning_rate": 0.0007185844968704864, "loss": 0.0736, "theoretical_loss": 3.694722262120072, "tokens_seen": 952369152 }, { "epoch": 0.29, "learning_rate": 0.0007185042529289039, "loss": 0.0751, "theoretical_loss": 3.6946253729246594, "tokens_seen": 952631296 }, { "epoch": 0.29, "learning_rate": 0.0007184240089873215, "loss": 0.0732, "theoretical_loss": 3.6945285178504172, "tokens_seen": 952893440 }, { "epoch": 0.29, "learning_rate": 0.0007183437650457391, "loss": 0.0727, "theoretical_loss": 3.694431696875948, "tokens_seen": 953155584 }, { "epoch": 0.29, "learning_rate": 0.0007182635211041566, "loss": 0.0777, "theoretical_loss": 3.6943349099798715, "tokens_seen": 953417728 }, { "epoch": 0.29, "learning_rate": 0.0007181832771625743, "loss": 0.0706, "theoretical_loss": 3.6942381571408287, "tokens_seen": 953679872 }, { "epoch": 0.29, "learning_rate": 0.0007181030332209918, "loss": 0.0759, "theoretical_loss": 3.6941414383374793, "tokens_seen": 953942016 }, { "epoch": 0.29, "learning_rate": 0.0007180227892794094, "loss": 0.0725, "theoretical_loss": 3.6940447535485026, "tokens_seen": 954204160 }, { "epoch": 0.29, "learning_rate": 0.000717942545337827, "loss": 0.0734, "theoretical_loss": 3.6939481027525956, "tokens_seen": 954466304 }, { "epoch": 0.29, "learning_rate": 0.0007178623013962447, "loss": 0.0737, "theoretical_loss": 3.6938514859284766, "tokens_seen": 954728448 }, { "epoch": 0.29, "learning_rate": 0.0007177820574546622, "loss": 0.0737, "theoretical_loss": 3.6937549030548813, "tokens_seen": 954990592 }, { "epoch": 0.29, "learning_rate": 0.0007177018135130798, "loss": 0.0748, "theoretical_loss": 3.693658354110565, "tokens_seen": 955252736 }, { "epoch": 0.29, "learning_rate": 0.0007176215695714974, "loss": 0.0738, "theoretical_loss": 3.6935618390743032, "tokens_seen": 955514880 }, { "epoch": 0.29, "learning_rate": 0.0007175413256299149, "loss": 0.0742, "theoretical_loss": 3.6934653579248886, "tokens_seen": 955777024 }, { "epoch": 0.29, "learning_rate": 0.0007174610816883326, "loss": 0.0727, "theoretical_loss": 3.693368910641135, "tokens_seen": 956039168 }, { "epoch": 0.29, "learning_rate": 0.0007173808377467501, "loss": 0.076, "theoretical_loss": 3.693272497201874, "tokens_seen": 956301312 }, { "epoch": 0.29, "learning_rate": 0.0007173005938051677, "loss": 0.0776, "theoretical_loss": 3.6931761175859554, "tokens_seen": 956563456 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.0019286867463961244, "objective/train/docs_used": 351696, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4690124988555908, "objective/train/original_loss": 1.4690124988555908, "objective/train/theoretical_loss": 3.693079771772251, "objective/train/tokens_used": 977285600, "objective/train/value_avg": -0.008026123046875, "objective/train/value_loss": 0.0001780358434189111, "objective/train/value_max": -0.00020503997802734375, "objective/train/value_min": -0.70703125, "objective/train/value_reward_corr": 0.6057208029602728, "objective/train/value_std": 0.01194000244140625, "objective/train/weight_avg": 1.0020111799240112, "objective/train/weighted_lm_loss": 1.4720525741577148, "objective/train/weights_max": 1.3528940677642822, "objective/train/weights_min": 0.36884480714797974, "theoretical_loss": 3.693079771772251, "tokens_seen": 956825600 }, { "epoch": 0.29, "learning_rate": 0.0007172203498635853, "loss": 0.0746, "theoretical_loss": 3.693079771772251, "tokens_seen": 956825600 }, { "epoch": 0.29, "learning_rate": 0.0007171401059220028, "loss": 0.0763, "theoretical_loss": 3.692983459739649, "tokens_seen": 957087744 }, { "epoch": 0.29, "learning_rate": 0.0007170598619804205, "loss": 0.0776, "theoretical_loss": 3.6928871814670563, "tokens_seen": 957349888 }, { "epoch": 0.29, "learning_rate": 0.0007169796180388381, "loss": 0.0762, "theoretical_loss": 3.6927909369334007, "tokens_seen": 957612032 }, { "epoch": 0.29, "learning_rate": 0.0007168993740972557, "loss": 0.0724, "theoretical_loss": 3.6926947261176277, "tokens_seen": 957874176 }, { "epoch": 0.29, "learning_rate": 0.0007168191301556733, "loss": 0.076, "theoretical_loss": 3.692598548998702, "tokens_seen": 958136320 }, { "epoch": 0.29, "learning_rate": 0.0007167388862140909, "loss": 0.077, "theoretical_loss": 3.692502405555606, "tokens_seen": 958398464 }, { "epoch": 0.29, "learning_rate": 0.0007166586422725084, "loss": 0.0769, "theoretical_loss": 3.692406295767344, "tokens_seen": 958660608 }, { "epoch": 0.29, "learning_rate": 0.0007165783983309261, "loss": 0.0752, "theoretical_loss": 3.692310219612936, "tokens_seen": 958922752 }, { "epoch": 0.29, "learning_rate": 0.0007164981543893436, "loss": 0.0741, "theoretical_loss": 3.6922141770714214, "tokens_seen": 959184896 }, { "epoch": 0.29, "learning_rate": 0.0007164179104477611, "loss": 0.0736, "theoretical_loss": 3.6921181681218602, "tokens_seen": 959447040 }, { "epoch": 0.29, "learning_rate": 0.0007163376665061789, "loss": 0.0753, "theoretical_loss": 3.6920221927433294, "tokens_seen": 959709184 }, { "epoch": 0.29, "learning_rate": 0.0007162574225645964, "loss": 0.0771, "theoretical_loss": 3.691926250914925, "tokens_seen": 959971328 }, { "epoch": 0.29, "learning_rate": 0.000716177178623014, "loss": 0.0751, "theoretical_loss": 3.691830342615763, "tokens_seen": 960233472 }, { "epoch": 0.29, "learning_rate": 0.0007160969346814316, "loss": 0.0743, "theoretical_loss": 3.6917344678249755, "tokens_seen": 960495616 }, { "epoch": 0.29, "learning_rate": 0.0007160166907398492, "loss": 0.0733, "theoretical_loss": 3.6916386265217156, "tokens_seen": 960757760 }, { "epoch": 0.29, "learning_rate": 0.0007159364467982667, "loss": 0.0727, "theoretical_loss": 3.6915428186851553, "tokens_seen": 961019904 }, { "epoch": 0.29, "learning_rate": 0.0007158562028566843, "loss": 0.0751, "theoretical_loss": 3.6914470442944824, "tokens_seen": 961282048 }, { "epoch": 0.29, "learning_rate": 0.0007157759589151019, "loss": 0.0722, "theoretical_loss": 3.691351303328907, "tokens_seen": 961544192 }, { "epoch": 0.29, "learning_rate": 0.0007156957149735194, "loss": 0.0753, "theoretical_loss": 3.691255595767654, "tokens_seen": 961806336 }, { "epoch": 0.29, "learning_rate": 0.0007156154710319372, "loss": 0.0765, "theoretical_loss": 3.6911599215899704, "tokens_seen": 962068480 }, { "epoch": 0.29, "learning_rate": 0.0007155352270903547, "loss": 0.0757, "theoretical_loss": 3.6910642807751195, "tokens_seen": 962330624 }, { "epoch": 0.29, "learning_rate": 0.0007154549831487724, "loss": 0.075, "theoretical_loss": 3.6909686733023843, "tokens_seen": 962592768 }, { "epoch": 0.29, "learning_rate": 0.0007153747392071899, "loss": 0.0721, "theoretical_loss": 3.690873099151065, "tokens_seen": 962854912 }, { "epoch": 0.29, "learning_rate": 0.0007152944952656074, "loss": 0.0729, "theoretical_loss": 3.690777558300482, "tokens_seen": 963117056 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.0009449372300878167, "objective/train/docs_used": 353969, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4017493724822998, "objective/train/original_loss": 1.4017493724822998, "objective/train/theoretical_loss": 3.690682050729972, "objective/train/tokens_used": 983839200, "objective/train/value_avg": -0.00891876220703125, "objective/train/value_loss": 0.00023926046560518444, "objective/train/value_max": -0.0001926422119140625, "objective/train/value_min": -0.299072265625, "objective/train/value_reward_corr": 0.619741688664302, "objective/train/value_std": 0.0124359130859375, "objective/train/weight_avg": 1.0010510683059692, "objective/train/weighted_lm_loss": 1.403052568435669, "objective/train/weights_max": 1.2246274948120117, "objective/train/weights_min": 0.36947569251060486, "theoretical_loss": 3.690682050729972, "tokens_seen": 963379200 }, { "epoch": 0.29, "learning_rate": 0.0007152142513240251, "loss": 0.0734, "theoretical_loss": 3.690682050729972, "tokens_seen": 963379200 }, { "epoch": 0.29, "learning_rate": 0.0007151340073824426, "loss": 0.0755, "theoretical_loss": 3.6905865764188923, "tokens_seen": 963641344 }, { "epoch": 0.29, "learning_rate": 0.0007150537634408602, "loss": 0.0721, "theoretical_loss": 3.6904911353466177, "tokens_seen": 963903488 }, { "epoch": 0.29, "learning_rate": 0.0007149735194992778, "loss": 0.0754, "theoretical_loss": 3.690395727492541, "tokens_seen": 964165632 }, { "epoch": 0.29, "learning_rate": 0.0007148932755576955, "loss": 0.0732, "theoretical_loss": 3.690300352836074, "tokens_seen": 964427776 }, { "epoch": 0.29, "learning_rate": 0.000714813031616113, "loss": 0.0769, "theoretical_loss": 3.690205011356646, "tokens_seen": 964689920 }, { "epoch": 0.29, "learning_rate": 0.0007147327876745306, "loss": 0.0753, "theoretical_loss": 3.6901097030337056, "tokens_seen": 964952064 }, { "epoch": 0.29, "learning_rate": 0.0007146525437329482, "loss": 0.0727, "theoretical_loss": 3.6900144278467204, "tokens_seen": 965214208 }, { "epoch": 0.29, "learning_rate": 0.0007145722997913657, "loss": 0.0755, "theoretical_loss": 3.6899191857751736, "tokens_seen": 965476352 }, { "epoch": 0.29, "learning_rate": 0.0007144920558497834, "loss": 0.0722, "theoretical_loss": 3.6898239767985688, "tokens_seen": 965738496 }, { "epoch": 0.29, "learning_rate": 0.0007144118119082009, "loss": 0.0753, "theoretical_loss": 3.689728800896428, "tokens_seen": 966000640 }, { "epoch": 0.29, "learning_rate": 0.0007143315679666186, "loss": 0.0735, "theoretical_loss": 3.68963365804829, "tokens_seen": 966262784 }, { "epoch": 0.29, "learning_rate": 0.0007142513240250361, "loss": 0.076, "theoretical_loss": 3.689538548233713, "tokens_seen": 966524928 }, { "epoch": 0.29, "learning_rate": 0.0007141710800834536, "loss": 0.0775, "theoretical_loss": 3.6894434714322726, "tokens_seen": 966787072 }, { "epoch": 0.29, "learning_rate": 0.0007140908361418714, "loss": 0.073, "theoretical_loss": 3.689348427623563, "tokens_seen": 967049216 }, { "epoch": 0.29, "learning_rate": 0.0007140105922002889, "loss": 0.0725, "theoretical_loss": 3.689253416787197, "tokens_seen": 967311360 }, { "epoch": 0.29, "learning_rate": 0.0007139303482587065, "loss": 0.0725, "theoretical_loss": 3.6891584389028047, "tokens_seen": 967573504 }, { "epoch": 0.29, "learning_rate": 0.0007138501043171241, "loss": 0.0746, "theoretical_loss": 3.689063493950034, "tokens_seen": 967835648 }, { "epoch": 0.29, "learning_rate": 0.0007137698603755417, "loss": 0.0736, "theoretical_loss": 3.6889685819085525, "tokens_seen": 968097792 }, { "epoch": 0.29, "learning_rate": 0.0007136896164339592, "loss": 0.0744, "theoretical_loss": 3.688873702758044, "tokens_seen": 968359936 }, { "epoch": 0.29, "learning_rate": 0.0007136093724923768, "loss": 0.073, "theoretical_loss": 3.688778856478211, "tokens_seen": 968622080 }, { "epoch": 0.29, "learning_rate": 0.0007135291285507944, "loss": 0.0737, "theoretical_loss": 3.6886840430487746, "tokens_seen": 968884224 }, { "epoch": 0.29, "learning_rate": 0.0007134488846092119, "loss": 0.0726, "theoretical_loss": 3.688589262449474, "tokens_seen": 969146368 }, { "epoch": 0.29, "learning_rate": 0.0007133686406676297, "loss": 0.0753, "theoretical_loss": 3.6884945146600643, "tokens_seen": 969408512 }, { "epoch": 0.29, "learning_rate": 0.0007132883967260472, "loss": 0.0739, "theoretical_loss": 3.6883997996603215, "tokens_seen": 969670656 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.001449979841709137, "objective/train/docs_used": 356486, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4616808891296387, "objective/train/original_loss": 1.4616808891296387, "objective/train/theoretical_loss": 3.688305117430038, "objective/train/tokens_used": 990392800, "objective/train/value_avg": -0.0089111328125, "objective/train/value_loss": 0.00036317447666078806, "objective/train/value_max": -0.00010150671005249023, "objective/train/value_min": -0.70361328125, "objective/train/value_reward_corr": 0.6299372029237911, "objective/train/value_std": 0.017303466796875, "objective/train/weight_avg": 1.0016149282455444, "objective/train/weighted_lm_loss": 1.4640196561813354, "objective/train/weights_max": 1.4167561531066895, "objective/train/weights_min": 0.3715973496437073, "theoretical_loss": 3.688305117430038, "tokens_seen": 969932800 }, { "epoch": 0.29, "learning_rate": 0.0007132081527844648, "loss": 0.0745, "theoretical_loss": 3.688305117430038, "tokens_seen": 969932800 }, { "epoch": 0.29, "learning_rate": 0.0007131279088428824, "loss": 0.0743, "theoretical_loss": 3.688210467949023, "tokens_seen": 970194944 }, { "epoch": 0.29, "learning_rate": 0.0007130476649013, "loss": 0.0741, "theoretical_loss": 3.6881158511971055, "tokens_seen": 970457088 }, { "epoch": 0.29, "learning_rate": 0.0007129674209597176, "loss": 0.0745, "theoretical_loss": 3.6880212671541326, "tokens_seen": 970719232 }, { "epoch": 0.29, "learning_rate": 0.0007128871770181351, "loss": 0.0751, "theoretical_loss": 3.687926715799967, "tokens_seen": 970981376 }, { "epoch": 0.29, "learning_rate": 0.0007128069330765527, "loss": 0.0746, "theoretical_loss": 3.687832197114491, "tokens_seen": 971243520 }, { "epoch": 0.29, "learning_rate": 0.0007127266891349703, "loss": 0.0759, "theoretical_loss": 3.687737711077605, "tokens_seen": 971505664 }, { "epoch": 0.29, "learning_rate": 0.000712646445193388, "loss": 0.0738, "theoretical_loss": 3.687643257669225, "tokens_seen": 971767808 }, { "epoch": 0.29, "learning_rate": 0.0007125662012518055, "loss": 0.0748, "theoretical_loss": 3.6875488368692877, "tokens_seen": 972029952 }, { "epoch": 0.29, "learning_rate": 0.0007124859573102232, "loss": 0.0737, "theoretical_loss": 3.687454448657745, "tokens_seen": 972292096 }, { "epoch": 0.29, "learning_rate": 0.0007124057133686407, "loss": 0.0739, "theoretical_loss": 3.687360093014568, "tokens_seen": 972554240 }, { "epoch": 0.29, "learning_rate": 0.0007123254694270582, "loss": 0.0744, "theoretical_loss": 3.687265769919745, "tokens_seen": 972816384 }, { "epoch": 0.29, "learning_rate": 0.0007122452254854759, "loss": 0.0751, "theoretical_loss": 3.6871714793532826, "tokens_seen": 973078528 }, { "epoch": 0.29, "learning_rate": 0.0007121649815438934, "loss": 0.077, "theoretical_loss": 3.687077221295203, "tokens_seen": 973340672 }, { "epoch": 0.3, "learning_rate": 0.000712084737602311, "loss": 0.0724, "theoretical_loss": 3.6869829957255496, "tokens_seen": 973602816 }, { "epoch": 0.3, "learning_rate": 0.0007120044936607286, "loss": 0.0734, "theoretical_loss": 3.68688880262438, "tokens_seen": 973864960 }, { "epoch": 0.3, "learning_rate": 0.0007119242497191462, "loss": 0.0728, "theoretical_loss": 3.6867946419717716, "tokens_seen": 974127104 }, { "epoch": 0.3, "learning_rate": 0.0007118440057775638, "loss": 0.0726, "theoretical_loss": 3.6867005137478177, "tokens_seen": 974389248 }, { "epoch": 0.3, "learning_rate": 0.0007117637618359814, "loss": 0.0769, "theoretical_loss": 3.686606417932631, "tokens_seen": 974651392 }, { "epoch": 0.3, "learning_rate": 0.000711683517894399, "loss": 0.0729, "theoretical_loss": 3.6865123545063403, "tokens_seen": 974913536 }, { "epoch": 0.3, "learning_rate": 0.0007116032739528166, "loss": 0.0769, "theoretical_loss": 3.686418323449093, "tokens_seen": 975175680 }, { "epoch": 0.3, "learning_rate": 0.0007115230300112342, "loss": 0.0764, "theoretical_loss": 3.6863243247410526, "tokens_seen": 975437824 }, { "epoch": 0.3, "learning_rate": 0.0007114427860696517, "loss": 0.0734, "theoretical_loss": 3.686230358362401, "tokens_seen": 975699968 }, { "epoch": 0.3, "learning_rate": 0.0007113625421280694, "loss": 0.0738, "theoretical_loss": 3.686136424293338, "tokens_seen": 975962112 }, { "epoch": 0.3, "learning_rate": 0.0007112822981864869, "loss": 0.0742, "theoretical_loss": 3.68604252251408, "tokens_seen": 976224256 }, { "epoch": 0.3, "objective/train/advantage_avg": -0.001066173892468214, "objective/train/docs_used": 358882, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4502272605895996, "objective/train/original_loss": 1.4502272605895996, "objective/train/theoretical_loss": 3.6859486530048615, "objective/train/tokens_used": 996946400, "objective/train/value_avg": -0.011871337890625, "objective/train/value_loss": 0.0007070415304042399, "objective/train/value_max": -5.692243576049805e-05, "objective/train/value_min": -0.36865234375, "objective/train/value_reward_corr": 0.9649324601528265, "objective/train/value_std": 0.037567138671875, "objective/train/weight_avg": 0.9992637634277344, "objective/train/weighted_lm_loss": 1.4516780376434326, "objective/train/weights_max": 1.3549121618270874, "objective/train/weights_min": 0.36916786432266235, "theoretical_loss": 3.6859486530048615, "tokens_seen": 976486400 }, { "epoch": 0.3, "learning_rate": 0.0007112020542449044, "loss": 0.0734, "theoretical_loss": 3.6859486530048615, "tokens_seen": 976486400 }, { "epoch": 0.3, "learning_rate": 0.0007111218103033222, "loss": 0.0734, "theoretical_loss": 3.685854815745933, "tokens_seen": 976748544 }, { "epoch": 0.3, "learning_rate": 0.0007110415663617397, "loss": 0.0743, "theoretical_loss": 3.6857610107175645, "tokens_seen": 977010688 }, { "epoch": 0.3, "learning_rate": 0.0007109613224201573, "loss": 0.077, "theoretical_loss": 3.6856672379000415, "tokens_seen": 977272832 }, { "epoch": 0.3, "learning_rate": 0.0007108810784785749, "loss": 0.0763, "theoretical_loss": 3.6855734972736682, "tokens_seen": 977534976 }, { "epoch": 0.3, "learning_rate": 0.0007108008345369925, "loss": 0.0722, "theoretical_loss": 3.685479788818766, "tokens_seen": 977797120 }, { "epoch": 0.3, "learning_rate": 0.00071072059059541, "loss": 0.0754, "theoretical_loss": 3.6853861125156717, "tokens_seen": 978059264 }, { "epoch": 0.3, "learning_rate": 0.0007106403466538276, "loss": 0.0744, "theoretical_loss": 3.6852924683447412, "tokens_seen": 978321408 }, { "epoch": 0.3, "learning_rate": 0.0007105601027122452, "loss": 0.0723, "theoretical_loss": 3.6851988562863482, "tokens_seen": 978583552 }, { "epoch": 0.3, "learning_rate": 0.0007104798587706628, "loss": 0.0747, "theoretical_loss": 3.6851052763208823, "tokens_seen": 978845696 }, { "epoch": 0.3, "learning_rate": 0.0007103996148290805, "loss": 0.0719, "theoretical_loss": 3.6850117284287505, "tokens_seen": 979107840 }, { "epoch": 0.3, "learning_rate": 0.000710319370887498, "loss": 0.0708, "theoretical_loss": 3.6849182125903774, "tokens_seen": 979369984 }, { "epoch": 0.3, "learning_rate": 0.0007102391269459157, "loss": 0.0764, "theoretical_loss": 3.6848247287862046, "tokens_seen": 979632128 }, { "epoch": 0.3, "learning_rate": 0.0007101588830043332, "loss": 0.0758, "theoretical_loss": 3.684731276996691, "tokens_seen": 979894272 }, { "epoch": 0.3, "learning_rate": 0.0007100786390627508, "loss": 0.0747, "theoretical_loss": 3.684637857202312, "tokens_seen": 980156416 }, { "epoch": 0.3, "learning_rate": 0.0007099983951211684, "loss": 0.0739, "theoretical_loss": 3.684544469383562, "tokens_seen": 980418560 }, { "epoch": 0.3, "learning_rate": 0.0007099181511795859, "loss": 0.0724, "theoretical_loss": 3.6844511135209497, "tokens_seen": 980680704 }, { "epoch": 0.3, "learning_rate": 0.0007098379072380035, "loss": 0.0731, "theoretical_loss": 3.684357789595003, "tokens_seen": 980942848 }, { "epoch": 0.3, "learning_rate": 0.0007097576632964211, "loss": 0.0755, "theoretical_loss": 3.684264497586266, "tokens_seen": 981204992 }, { "epoch": 0.3, "learning_rate": 0.0007096774193548388, "loss": 0.0752, "theoretical_loss": 3.684171237475301, "tokens_seen": 981467136 }, { "epoch": 0.3, "learning_rate": 0.0007095971754132563, "loss": 0.0752, "theoretical_loss": 3.6840780092426852, "tokens_seen": 981729280 }, { "epoch": 0.3, "learning_rate": 0.000709516931471674, "loss": 0.0736, "theoretical_loss": 3.6839848128690145, "tokens_seen": 981991424 }, { "epoch": 0.3, "learning_rate": 0.0007094366875300915, "loss": 0.076, "theoretical_loss": 3.683891648334901, "tokens_seen": 982253568 }, { "epoch": 0.3, "learning_rate": 0.000709356443588509, "loss": 0.0751, "theoretical_loss": 3.6837985156209743, "tokens_seen": 982515712 }, { "epoch": 0.3, "learning_rate": 0.0007092761996469267, "loss": 0.0773, "theoretical_loss": 3.683705414707881, "tokens_seen": 982777856 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.0013032013084739447, "objective/train/docs_used": 361383, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.466055154800415, "objective/train/original_loss": 1.466055154800415, "objective/train/theoretical_loss": 3.6836123455762837, "objective/train/tokens_used": 1003500000, "objective/train/value_avg": -0.007678985595703125, "objective/train/value_loss": 0.00017884932458400726, "objective/train/value_max": -0.00016868114471435547, "objective/train/value_min": -0.6962890625, "objective/train/value_reward_corr": 0.6831618657773055, "objective/train/value_std": 0.0128631591796875, "objective/train/weight_avg": 1.0013844966888428, "objective/train/weighted_lm_loss": 1.4686609506607056, "objective/train/weights_max": 1.261329174041748, "objective/train/weights_min": 0.37812936305999756, "theoretical_loss": 3.6836123455762837, "tokens_seen": 983040000 }, { "epoch": 0.3, "learning_rate": 0.0007091959557053442, "loss": 0.0733, "theoretical_loss": 3.6836123455762837, "tokens_seen": 983040000 }, { "epoch": 0.3, "learning_rate": 0.0007091157117637619, "loss": 0.0749, "theoretical_loss": 3.683519308206863, "tokens_seen": 983302144 }, { "epoch": 0.3, "learning_rate": 0.0007090354678221794, "loss": 0.075, "theoretical_loss": 3.683426302580316, "tokens_seen": 983564288 }, { "epoch": 0.3, "learning_rate": 0.000708955223880597, "loss": 0.0761, "theoretical_loss": 3.683333328677356, "tokens_seen": 983826432 }, { "epoch": 0.3, "learning_rate": 0.0007088749799390147, "loss": 0.0706, "theoretical_loss": 3.6832403864787144, "tokens_seen": 984088576 }, { "epoch": 0.3, "learning_rate": 0.0007087947359974322, "loss": 0.0748, "theoretical_loss": 3.683147475965139, "tokens_seen": 984350720 }, { "epoch": 0.3, "learning_rate": 0.0007087144920558498, "loss": 0.0758, "theoretical_loss": 3.683054597117393, "tokens_seen": 984612864 }, { "epoch": 0.3, "learning_rate": 0.0007086342481142674, "loss": 0.0756, "theoretical_loss": 3.6829617499162595, "tokens_seen": 984875008 }, { "epoch": 0.3, "learning_rate": 0.000708554004172685, "loss": 0.075, "theoretical_loss": 3.6828689343425345, "tokens_seen": 985137152 }, { "epoch": 0.3, "learning_rate": 0.0007084737602311025, "loss": 0.074, "theoretical_loss": 3.682776150377034, "tokens_seen": 985399296 }, { "epoch": 0.3, "learning_rate": 0.0007083935162895202, "loss": 0.0762, "theoretical_loss": 3.682683398000589, "tokens_seen": 985661440 }, { "epoch": 0.3, "learning_rate": 0.0007083132723479377, "loss": 0.0771, "theoretical_loss": 3.6825906771940478, "tokens_seen": 985923584 }, { "epoch": 0.3, "learning_rate": 0.0007082330284063552, "loss": 0.0737, "theoretical_loss": 3.682497987938275, "tokens_seen": 986185728 }, { "epoch": 0.3, "learning_rate": 0.000708152784464773, "loss": 0.0731, "theoretical_loss": 3.682405330214153, "tokens_seen": 986447872 }, { "epoch": 0.3, "learning_rate": 0.0007080725405231905, "loss": 0.0716, "theoretical_loss": 3.682312704002579, "tokens_seen": 986710016 }, { "epoch": 0.3, "learning_rate": 0.0007079922965816082, "loss": 0.0761, "theoretical_loss": 3.6822201092844686, "tokens_seen": 986972160 }, { "epoch": 0.3, "learning_rate": 0.0007079120526400257, "loss": 0.0734, "theoretical_loss": 3.682127546040753, "tokens_seen": 987234304 }, { "epoch": 0.3, "learning_rate": 0.0007078318086984433, "loss": 0.075, "theoretical_loss": 3.6820350142523806, "tokens_seen": 987496448 }, { "epoch": 0.3, "learning_rate": 0.0007077515647568609, "loss": 0.0748, "theoretical_loss": 3.6819425139003155, "tokens_seen": 987758592 }, { "epoch": 0.3, "learning_rate": 0.0007076713208152784, "loss": 0.0761, "theoretical_loss": 3.6818500449655396, "tokens_seen": 988020736 }, { "epoch": 0.3, "learning_rate": 0.000707591076873696, "loss": 0.0722, "theoretical_loss": 3.6817576074290503, "tokens_seen": 988282880 }, { "epoch": 0.3, "learning_rate": 0.0007075108329321136, "loss": 0.0724, "theoretical_loss": 3.681665201271862, "tokens_seen": 988545024 }, { "epoch": 0.3, "learning_rate": 0.0007074305889905313, "loss": 0.0772, "theoretical_loss": 3.681572826475006, "tokens_seen": 988807168 }, { "epoch": 0.3, "learning_rate": 0.0007073503450489488, "loss": 0.0751, "theoretical_loss": 3.681480483019529, "tokens_seen": 989069312 }, { "epoch": 0.3, "learning_rate": 0.0007072701011073665, "loss": 0.0755, "theoretical_loss": 3.6813881708864953, "tokens_seen": 989331456 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.0020013097673654556, "objective/train/docs_used": 363941, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4069637060165405, "objective/train/original_loss": 1.406963586807251, "objective/train/theoretical_loss": 3.681295890056985, "objective/train/tokens_used": 1010053600, "objective/train/value_avg": -0.00994873046875, "objective/train/value_loss": 0.00019873856217600405, "objective/train/value_max": -0.00013554096221923828, "objective/train/value_min": -0.890625, "objective/train/value_reward_corr": 0.7167314143110259, "objective/train/value_std": 0.01451873779296875, "objective/train/weight_avg": 1.0020908117294312, "objective/train/weighted_lm_loss": 1.4100981950759888, "objective/train/weights_max": 1.2156908512115479, "objective/train/weights_min": 0.36818236112594604, "theoretical_loss": 3.681295890056985, "tokens_seen": 989593600 }, { "epoch": 0.3, "learning_rate": 0.000707189857165784, "loss": 0.074, "theoretical_loss": 3.681295890056985, "tokens_seen": 989593600 }, { "epoch": 0.3, "learning_rate": 0.0007071096132242016, "loss": 0.0717, "theoretical_loss": 3.681203640512095, "tokens_seen": 989855744 }, { "epoch": 0.3, "learning_rate": 0.0007070293692826192, "loss": 0.0757, "theoretical_loss": 3.681111422232937, "tokens_seen": 990117888 }, { "epoch": 0.3, "learning_rate": 0.0007069491253410367, "loss": 0.0763, "theoretical_loss": 3.681019235200643, "tokens_seen": 990380032 }, { "epoch": 0.3, "learning_rate": 0.0007068688813994543, "loss": 0.0747, "theoretical_loss": 3.680927079396357, "tokens_seen": 990642176 }, { "epoch": 0.3, "learning_rate": 0.0007067886374578719, "loss": 0.0734, "theoretical_loss": 3.680834954801242, "tokens_seen": 990904320 }, { "epoch": 0.3, "learning_rate": 0.0007067083935162896, "loss": 0.0734, "theoretical_loss": 3.6807428613964763, "tokens_seen": 991166464 }, { "epoch": 0.3, "learning_rate": 0.0007066281495747072, "loss": 0.0777, "theoretical_loss": 3.6806507991632555, "tokens_seen": 991428608 }, { "epoch": 0.3, "learning_rate": 0.0007065479056331248, "loss": 0.0744, "theoretical_loss": 3.68055876808279, "tokens_seen": 991690752 }, { "epoch": 0.3, "learning_rate": 0.0007064676616915423, "loss": 0.079, "theoretical_loss": 3.680466768136308, "tokens_seen": 991952896 }, { "epoch": 0.3, "learning_rate": 0.0007063874177499599, "loss": 0.0758, "theoretical_loss": 3.680374799305053, "tokens_seen": 992215040 }, { "epoch": 0.3, "learning_rate": 0.0007063071738083775, "loss": 0.0742, "theoretical_loss": 3.6802828615702845, "tokens_seen": 992477184 }, { "epoch": 0.3, "learning_rate": 0.000706226929866795, "loss": 0.0764, "theoretical_loss": 3.6801909549132796, "tokens_seen": 992739328 }, { "epoch": 0.3, "learning_rate": 0.0007061466859252127, "loss": 0.0757, "theoretical_loss": 3.6800990793153305, "tokens_seen": 993001472 }, { "epoch": 0.3, "learning_rate": 0.0007060664419836302, "loss": 0.0749, "theoretical_loss": 3.6800072347577455, "tokens_seen": 993263616 }, { "epoch": 0.3, "learning_rate": 0.0007059861980420478, "loss": 0.0743, "theoretical_loss": 3.6799154212218506, "tokens_seen": 993525760 }, { "epoch": 0.3, "learning_rate": 0.0007059059541004655, "loss": 0.0726, "theoretical_loss": 3.679823638688985, "tokens_seen": 993787904 }, { "epoch": 0.3, "learning_rate": 0.000705825710158883, "loss": 0.0741, "theoretical_loss": 3.679731887140508, "tokens_seen": 994050048 }, { "epoch": 0.3, "learning_rate": 0.0007057454662173006, "loss": 0.071, "theoretical_loss": 3.6796401665577916, "tokens_seen": 994312192 }, { "epoch": 0.3, "learning_rate": 0.0007056652222757182, "loss": 0.0766, "theoretical_loss": 3.679548476922225, "tokens_seen": 994574336 }, { "epoch": 0.3, "learning_rate": 0.0007055849783341358, "loss": 0.0749, "theoretical_loss": 3.6794568182152143, "tokens_seen": 994836480 }, { "epoch": 0.3, "learning_rate": 0.0007055047343925533, "loss": 0.0756, "theoretical_loss": 3.6793651904181806, "tokens_seen": 995098624 }, { "epoch": 0.3, "learning_rate": 0.000705424490450971, "loss": 0.075, "theoretical_loss": 3.679273593512563, "tokens_seen": 995360768 }, { "epoch": 0.3, "learning_rate": 0.0007053442465093885, "loss": 0.0767, "theoretical_loss": 3.679182027479812, "tokens_seen": 995622912 }, { "epoch": 0.3, "learning_rate": 0.0007052640025678061, "loss": 0.0766, "theoretical_loss": 3.6790904923014005, "tokens_seen": 995885056 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.001141717773862183, "objective/train/docs_used": 366329, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4647668600082397, "objective/train/original_loss": 1.4647668600082397, "objective/train/theoretical_loss": 3.6789989879588125, "objective/train/tokens_used": 1016607200, "objective/train/value_avg": -0.00992584228515625, "objective/train/value_loss": 0.00031617286731489, "objective/train/value_max": -0.00011414289474487305, "objective/train/value_min": -0.49951171875, "objective/train/value_reward_corr": 0.7429311902922219, "objective/train/value_std": 0.0218963623046875, "objective/train/weight_avg": 1.0012768507003784, "objective/train/weighted_lm_loss": 1.465766429901123, "objective/train/weights_max": 1.3314474821090698, "objective/train/weights_min": 0.2260430008172989, "theoretical_loss": 3.6789989879588125, "tokens_seen": 996147200 }, { "epoch": 0.3, "learning_rate": 0.0007051837586262238, "loss": 0.0741, "theoretical_loss": 3.6789989879588125, "tokens_seen": 996147200 }, { "epoch": 0.3, "learning_rate": 0.0007051035146846413, "loss": 0.0743, "theoretical_loss": 3.6789075144335497, "tokens_seen": 996409344 }, { "epoch": 0.3, "learning_rate": 0.000705023270743059, "loss": 0.073, "theoretical_loss": 3.6788160717071303, "tokens_seen": 996671488 }, { "epoch": 0.3, "learning_rate": 0.0007049430268014765, "loss": 0.0771, "theoretical_loss": 3.678724659761087, "tokens_seen": 996933632 }, { "epoch": 0.3, "learning_rate": 0.0007048627828598941, "loss": 0.0732, "theoretical_loss": 3.6786332785769695, "tokens_seen": 997195776 }, { "epoch": 0.3, "learning_rate": 0.0007047825389183117, "loss": 0.0734, "theoretical_loss": 3.678541928136344, "tokens_seen": 997457920 }, { "epoch": 0.3, "learning_rate": 0.0007047022949767292, "loss": 0.0757, "theoretical_loss": 3.6784506084207904, "tokens_seen": 997720064 }, { "epoch": 0.3, "learning_rate": 0.0007046220510351468, "loss": 0.0779, "theoretical_loss": 3.6783593194119066, "tokens_seen": 997982208 }, { "epoch": 0.3, "learning_rate": 0.0007045418070935644, "loss": 0.076, "theoretical_loss": 3.6782680610913054, "tokens_seen": 998244352 }, { "epoch": 0.3, "learning_rate": 0.0007044615631519821, "loss": 0.079, "theoretical_loss": 3.6781768334406157, "tokens_seen": 998506496 }, { "epoch": 0.3, "learning_rate": 0.0007043813192103996, "loss": 0.0736, "theoretical_loss": 3.678085636441482, "tokens_seen": 998768640 }, { "epoch": 0.3, "learning_rate": 0.0007043010752688173, "loss": 0.0782, "theoretical_loss": 3.677994470075565, "tokens_seen": 999030784 }, { "epoch": 0.3, "learning_rate": 0.0007042208313272348, "loss": 0.0749, "theoretical_loss": 3.6779033343245406, "tokens_seen": 999292928 }, { "epoch": 0.3, "learning_rate": 0.0007041405873856524, "loss": 0.0755, "theoretical_loss": 3.677812229170101, "tokens_seen": 999555072 }, { "epoch": 0.3, "learning_rate": 0.00070406034344407, "loss": 0.0731, "theoretical_loss": 3.677721154593953, "tokens_seen": 999817216 }, { "epoch": 0.3, "learning_rate": 0.0007039800995024875, "loss": 0.0756, "theoretical_loss": 3.6776301105778213, "tokens_seen": 1000079360 }, { "epoch": 0.3, "learning_rate": 0.0007038998555609052, "loss": 0.0759, "theoretical_loss": 3.6775390971034447, "tokens_seen": 1000341504 }, { "epoch": 0.3, "learning_rate": 0.0007038196116193227, "loss": 0.0742, "theoretical_loss": 3.6774481141525777, "tokens_seen": 1000603648 }, { "epoch": 0.3, "learning_rate": 0.0007037393676777404, "loss": 0.075, "theoretical_loss": 3.6773571617069907, "tokens_seen": 1000865792 }, { "epoch": 0.3, "learning_rate": 0.000703659123736158, "loss": 0.0729, "theoretical_loss": 3.6772662397484703, "tokens_seen": 1001127936 }, { "epoch": 0.3, "learning_rate": 0.0007035788797945756, "loss": 0.0705, "theoretical_loss": 3.6771753482588183, "tokens_seen": 1001390080 }, { "epoch": 0.3, "learning_rate": 0.0007034986358529931, "loss": 0.0741, "theoretical_loss": 3.6770844872198523, "tokens_seen": 1001652224 }, { "epoch": 0.3, "learning_rate": 0.0007034183919114107, "loss": 0.0733, "theoretical_loss": 3.6769936566134045, "tokens_seen": 1001914368 }, { "epoch": 0.3, "learning_rate": 0.0007033381479698283, "loss": 0.0756, "theoretical_loss": 3.676902856421324, "tokens_seen": 1002176512 }, { "epoch": 0.3, "learning_rate": 0.0007032579040282458, "loss": 0.0758, "theoretical_loss": 3.6768120866254757, "tokens_seen": 1002438656 }, { "epoch": 0.3, "objective/train/advantage_avg": -1.4584275049855933e-05, "objective/train/docs_used": 368716, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5315086841583252, "objective/train/original_loss": 1.5315088033676147, "objective/train/theoretical_loss": 3.6767213472077387, "objective/train/tokens_used": 1023160800, "objective/train/value_avg": -0.0098419189453125, "objective/train/value_loss": 0.0005082013085484505, "objective/train/value_max": -9.244680404663086e-05, "objective/train/value_min": -0.75244140625, "objective/train/value_reward_corr": 0.6553984384647921, "objective/train/value_std": 0.019561767578125, "objective/train/weight_avg": 1.0001962184906006, "objective/train/weighted_lm_loss": 1.5310980081558228, "objective/train/weights_max": 1.727001667022705, "objective/train/weights_min": 0.06900446861982346, "theoretical_loss": 3.6767213472077387, "tokens_seen": 1002700800 }, { "epoch": 0.3, "learning_rate": 0.0007031776600866635, "loss": 0.0743, "theoretical_loss": 3.6767213472077387, "tokens_seen": 1002700800 }, { "epoch": 0.3, "learning_rate": 0.000703097416145081, "loss": 0.0729, "theoretical_loss": 3.676630638150008, "tokens_seen": 1002962944 }, { "epoch": 0.3, "learning_rate": 0.0007030171722034986, "loss": 0.072, "theoretical_loss": 3.6765399594341943, "tokens_seen": 1003225088 }, { "epoch": 0.3, "learning_rate": 0.0007029369282619163, "loss": 0.0758, "theoretical_loss": 3.676449311042225, "tokens_seen": 1003487232 }, { "epoch": 0.3, "learning_rate": 0.0007028566843203338, "loss": 0.0788, "theoretical_loss": 3.6763586929560415, "tokens_seen": 1003749376 }, { "epoch": 0.3, "learning_rate": 0.0007027764403787515, "loss": 0.0757, "theoretical_loss": 3.6762681051576003, "tokens_seen": 1004011520 }, { "epoch": 0.3, "learning_rate": 0.000702696196437169, "loss": 0.074, "theoretical_loss": 3.6761775476288747, "tokens_seen": 1004273664 }, { "epoch": 0.3, "learning_rate": 0.0007026159524955866, "loss": 0.0751, "theoretical_loss": 3.6760870203518525, "tokens_seen": 1004535808 }, { "epoch": 0.3, "learning_rate": 0.0007025357085540042, "loss": 0.0715, "theoretical_loss": 3.6759965233085383, "tokens_seen": 1004797952 }, { "epoch": 0.3, "learning_rate": 0.0007024554646124218, "loss": 0.0748, "theoretical_loss": 3.6759060564809496, "tokens_seen": 1005060096 }, { "epoch": 0.3, "learning_rate": 0.0007023752206708393, "loss": 0.0749, "theoretical_loss": 3.6758156198511216, "tokens_seen": 1005322240 }, { "epoch": 0.3, "learning_rate": 0.0007022949767292569, "loss": 0.0759, "theoretical_loss": 3.675725213401104, "tokens_seen": 1005584384 }, { "epoch": 0.3, "learning_rate": 0.0007022147327876746, "loss": 0.0758, "theoretical_loss": 3.6756348371129617, "tokens_seen": 1005846528 }, { "epoch": 0.3, "learning_rate": 0.0007021344888460921, "loss": 0.0775, "theoretical_loss": 3.6755444909687744, "tokens_seen": 1006108672 }, { "epoch": 0.3, "learning_rate": 0.0007020542449045098, "loss": 0.0745, "theoretical_loss": 3.675454174950639, "tokens_seen": 1006370816 }, { "epoch": 0.31, "learning_rate": 0.0007019740009629273, "loss": 0.0733, "theoretical_loss": 3.675363889040666, "tokens_seen": 1006632960 }, { "epoch": 0.31, "learning_rate": 0.0007018937570213449, "loss": 0.075, "theoretical_loss": 3.675273633220981, "tokens_seen": 1006895104 }, { "epoch": 0.31, "learning_rate": 0.0007018135130797625, "loss": 0.0745, "theoretical_loss": 3.6751834074737264, "tokens_seen": 1007157248 }, { "epoch": 0.31, "learning_rate": 0.00070173326913818, "loss": 0.0761, "theoretical_loss": 3.675093211781059, "tokens_seen": 1007419392 }, { "epoch": 0.31, "learning_rate": 0.0007016530251965977, "loss": 0.0773, "theoretical_loss": 3.67500304612515, "tokens_seen": 1007681536 }, { "epoch": 0.31, "learning_rate": 0.0007015727812550152, "loss": 0.0757, "theoretical_loss": 3.674912910488187, "tokens_seen": 1007943680 }, { "epoch": 0.31, "learning_rate": 0.0007014925373134329, "loss": 0.076, "theoretical_loss": 3.6748228048523726, "tokens_seen": 1008205824 }, { "epoch": 0.31, "learning_rate": 0.0007014122933718505, "loss": 0.0784, "theoretical_loss": 3.674732729199924, "tokens_seen": 1008467968 }, { "epoch": 0.31, "learning_rate": 0.0007013320494302681, "loss": 0.0741, "theoretical_loss": 3.674642683513074, "tokens_seen": 1008730112 }, { "epoch": 0.31, "learning_rate": 0.0007012518054886856, "loss": 0.0764, "theoretical_loss": 3.674552667774071, "tokens_seen": 1008992256 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.0006658255588263273, "objective/train/docs_used": 370943, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.50751793384552, "objective/train/original_loss": 1.50751793384552, "objective/train/theoretical_loss": 3.6744626819651773, "objective/train/tokens_used": 1029714400, "objective/train/value_avg": -0.007167816162109375, "objective/train/value_loss": 0.00024274643510580063, "objective/train/value_max": -0.000110626220703125, "objective/train/value_min": -0.354736328125, "objective/train/value_reward_corr": 0.6235155171541196, "objective/train/value_std": 0.01125335693359375, "objective/train/weight_avg": 1.0007777214050293, "objective/train/weighted_lm_loss": 1.5075055360794067, "objective/train/weights_max": 1.4258047342300415, "objective/train/weights_min": 0.39044809341430664, "theoretical_loss": 3.6744626819651773, "tokens_seen": 1009254400 }, { "epoch": 0.31, "learning_rate": 0.0007011715615471032, "loss": 0.073, "theoretical_loss": 3.6744626819651773, "tokens_seen": 1009254400 }, { "epoch": 0.31, "learning_rate": 0.0007010913176055208, "loss": 0.0745, "theoretical_loss": 3.674372726068671, "tokens_seen": 1009516544 }, { "epoch": 0.31, "learning_rate": 0.0007010110736639383, "loss": 0.0737, "theoretical_loss": 3.6742828000668464, "tokens_seen": 1009778688 }, { "epoch": 0.31, "learning_rate": 0.000700930829722356, "loss": 0.0745, "theoretical_loss": 3.6741929039420103, "tokens_seen": 1010040832 }, { "epoch": 0.31, "learning_rate": 0.0007008505857807735, "loss": 0.0739, "theoretical_loss": 3.6741030376764865, "tokens_seen": 1010302976 }, { "epoch": 0.31, "learning_rate": 0.0007007703418391912, "loss": 0.0764, "theoretical_loss": 3.674013201252614, "tokens_seen": 1010565120 }, { "epoch": 0.31, "learning_rate": 0.0007006900978976088, "loss": 0.0749, "theoretical_loss": 3.6739233946527454, "tokens_seen": 1010827264 }, { "epoch": 0.31, "learning_rate": 0.0007006098539560264, "loss": 0.0727, "theoretical_loss": 3.6738336178592492, "tokens_seen": 1011089408 }, { "epoch": 0.31, "learning_rate": 0.0007005296100144439, "loss": 0.0792, "theoretical_loss": 3.6737438708545094, "tokens_seen": 1011351552 }, { "epoch": 0.31, "learning_rate": 0.0007004493660728615, "loss": 0.0749, "theoretical_loss": 3.673654153620924, "tokens_seen": 1011613696 }, { "epoch": 0.31, "learning_rate": 0.0007003691221312791, "loss": 0.0782, "theoretical_loss": 3.673564466140906, "tokens_seen": 1011875840 }, { "epoch": 0.31, "learning_rate": 0.0007002888781896967, "loss": 0.0746, "theoretical_loss": 3.6734748083968842, "tokens_seen": 1012137984 }, { "epoch": 0.31, "learning_rate": 0.0007002086342481143, "loss": 0.076, "theoretical_loss": 3.6733851803713016, "tokens_seen": 1012400128 }, { "epoch": 0.31, "learning_rate": 0.0007001283903065318, "loss": 0.0759, "theoretical_loss": 3.673295582046616, "tokens_seen": 1012662272 }, { "epoch": 0.31, "learning_rate": 0.0007000481463649496, "loss": 0.0755, "theoretical_loss": 3.6732060134053013, "tokens_seen": 1012924416 }, { "epoch": 0.31, "learning_rate": 0.0006999679024233671, "loss": 0.076, "theoretical_loss": 3.673116474429844, "tokens_seen": 1013186560 }, { "epoch": 0.31, "learning_rate": 0.0006998876584817846, "loss": 0.0775, "theoretical_loss": 3.673026965102748, "tokens_seen": 1013448704 }, { "epoch": 0.31, "learning_rate": 0.0006998074145402023, "loss": 0.0738, "theoretical_loss": 3.67293748540653, "tokens_seen": 1013710848 }, { "epoch": 0.31, "learning_rate": 0.0006997271705986198, "loss": 0.0737, "theoretical_loss": 3.672848035323723, "tokens_seen": 1013972992 }, { "epoch": 0.31, "learning_rate": 0.0006996469266570374, "loss": 0.0745, "theoretical_loss": 3.6727586148368743, "tokens_seen": 1014235136 }, { "epoch": 0.31, "learning_rate": 0.000699566682715455, "loss": 0.0753, "theoretical_loss": 3.672669223928545, "tokens_seen": 1014497280 }, { "epoch": 0.31, "learning_rate": 0.0006994864387738726, "loss": 0.0744, "theoretical_loss": 3.672579862581313, "tokens_seen": 1014759424 }, { "epoch": 0.31, "learning_rate": 0.0006994061948322901, "loss": 0.0746, "theoretical_loss": 3.672490530777769, "tokens_seen": 1015021568 }, { "epoch": 0.31, "learning_rate": 0.0006993259508907077, "loss": 0.0751, "theoretical_loss": 3.6724012285005196, "tokens_seen": 1015283712 }, { "epoch": 0.31, "learning_rate": 0.0006992457069491254, "loss": 0.0763, "theoretical_loss": 3.6723119557321864, "tokens_seen": 1015545856 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.0006244329269975424, "objective/train/docs_used": 372984, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4833120107650757, "objective/train/original_loss": 1.4833121299743652, "objective/train/theoretical_loss": 3.6722227124554045, "objective/train/tokens_used": 1036268000, "objective/train/value_avg": -0.007251739501953125, "objective/train/value_loss": 0.00037429071380756795, "objective/train/value_max": -8.094310760498047e-05, "objective/train/value_min": -0.8642578125, "objective/train/value_reward_corr": 0.6612852682359046, "objective/train/value_std": 0.01514434814453125, "objective/train/weight_avg": 1.0007858276367188, "objective/train/weighted_lm_loss": 1.484993577003479, "objective/train/weights_max": 1.7426632642745972, "objective/train/weights_min": 0.3694700598716736, "theoretical_loss": 3.6722227124554045, "tokens_seen": 1015808000 }, { "epoch": 0.31, "learning_rate": 0.0006991654630075429, "loss": 0.0756, "theoretical_loss": 3.6722227124554045, "tokens_seen": 1015808000 }, { "epoch": 0.31, "learning_rate": 0.0006990852190659606, "loss": 0.0727, "theoretical_loss": 3.6721334986528236, "tokens_seen": 1016070144 }, { "epoch": 0.31, "learning_rate": 0.0006990049751243781, "loss": 0.0766, "theoretical_loss": 3.6720443143071106, "tokens_seen": 1016332288 }, { "epoch": 0.31, "learning_rate": 0.0006989247311827958, "loss": 0.0743, "theoretical_loss": 3.671955159400943, "tokens_seen": 1016594432 }, { "epoch": 0.31, "learning_rate": 0.0006988444872412133, "loss": 0.0763, "theoretical_loss": 3.6718660339170173, "tokens_seen": 1016856576 }, { "epoch": 0.31, "learning_rate": 0.0006987642432996308, "loss": 0.0758, "theoretical_loss": 3.6717769378380414, "tokens_seen": 1017118720 }, { "epoch": 0.31, "learning_rate": 0.0006986839993580485, "loss": 0.0748, "theoretical_loss": 3.671687871146739, "tokens_seen": 1017380864 }, { "epoch": 0.31, "learning_rate": 0.000698603755416466, "loss": 0.0731, "theoretical_loss": 3.6715988338258487, "tokens_seen": 1017643008 }, { "epoch": 0.31, "learning_rate": 0.0006985235114748837, "loss": 0.0745, "theoretical_loss": 3.6715098258581236, "tokens_seen": 1017905152 }, { "epoch": 0.31, "learning_rate": 0.0006984432675333013, "loss": 0.0761, "theoretical_loss": 3.6714208472263303, "tokens_seen": 1018167296 }, { "epoch": 0.31, "learning_rate": 0.0006983630235917189, "loss": 0.0738, "theoretical_loss": 3.6713318979132517, "tokens_seen": 1018429440 }, { "epoch": 0.31, "learning_rate": 0.0006982827796501364, "loss": 0.0752, "theoretical_loss": 3.671242977901683, "tokens_seen": 1018691584 }, { "epoch": 0.31, "learning_rate": 0.000698202535708554, "loss": 0.0762, "theoretical_loss": 3.671154087174436, "tokens_seen": 1018953728 }, { "epoch": 0.31, "learning_rate": 0.0006981222917669716, "loss": 0.0762, "theoretical_loss": 3.6710652257143366, "tokens_seen": 1019215872 }, { "epoch": 0.31, "learning_rate": 0.0006980420478253891, "loss": 0.0755, "theoretical_loss": 3.6709763935042243, "tokens_seen": 1019478016 }, { "epoch": 0.31, "learning_rate": 0.0006979618038838068, "loss": 0.0759, "theoretical_loss": 3.670887590526953, "tokens_seen": 1019740160 }, { "epoch": 0.31, "learning_rate": 0.0006978815599422243, "loss": 0.0753, "theoretical_loss": 3.6707988167653927, "tokens_seen": 1020002304 }, { "epoch": 0.31, "learning_rate": 0.0006978013160006421, "loss": 0.0749, "theoretical_loss": 3.670710072202426, "tokens_seen": 1020264448 }, { "epoch": 0.31, "learning_rate": 0.0006977210720590596, "loss": 0.0762, "theoretical_loss": 3.670621356820951, "tokens_seen": 1020526592 }, { "epoch": 0.31, "learning_rate": 0.0006976408281174772, "loss": 0.0736, "theoretical_loss": 3.67053267060388, "tokens_seen": 1020788736 }, { "epoch": 0.31, "learning_rate": 0.0006975605841758948, "loss": 0.0761, "theoretical_loss": 3.6704440135341394, "tokens_seen": 1021050880 }, { "epoch": 0.31, "learning_rate": 0.0006974803402343123, "loss": 0.0755, "theoretical_loss": 3.6703553855946702, "tokens_seen": 1021313024 }, { "epoch": 0.31, "learning_rate": 0.0006974000962927299, "loss": 0.0732, "theoretical_loss": 3.6702667867684275, "tokens_seen": 1021575168 }, { "epoch": 0.31, "learning_rate": 0.0006973198523511475, "loss": 0.0725, "theoretical_loss": 3.670178217038381, "tokens_seen": 1021837312 }, { "epoch": 0.31, "learning_rate": 0.0006972396084095651, "loss": 0.0737, "theoretical_loss": 3.670089676387515, "tokens_seen": 1022099456 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.000926980166696012, "objective/train/docs_used": 375283, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5110857486724854, "objective/train/original_loss": 1.5110857486724854, "objective/train/theoretical_loss": 3.6700011647988275, "objective/train/tokens_used": 1042821600, "objective/train/value_avg": -0.01114654541015625, "objective/train/value_loss": 0.00015908897330518812, "objective/train/value_max": -9.608268737792969e-05, "objective/train/value_min": -0.3095703125, "objective/train/value_reward_corr": 0.9276620899819128, "objective/train/value_std": 0.02862548828125, "objective/train/weight_avg": 1.0010052919387817, "objective/train/weighted_lm_loss": 1.5121495723724365, "objective/train/weights_max": 1.142175316810608, "objective/train/weights_min": 0.7194867134094238, "theoretical_loss": 3.6700011647988275, "tokens_seen": 1022361600 }, { "epoch": 0.31, "learning_rate": 0.0006971593644679826, "loss": 0.0772, "theoretical_loss": 3.6700011647988275, "tokens_seen": 1022361600 }, { "epoch": 0.31, "learning_rate": 0.0006970791205264004, "loss": 0.0725, "theoretical_loss": 3.6699126822553314, "tokens_seen": 1022623744 }, { "epoch": 0.31, "learning_rate": 0.0006969988765848179, "loss": 0.0735, "theoretical_loss": 3.669824228740053, "tokens_seen": 1022885888 }, { "epoch": 0.31, "learning_rate": 0.0006969186326432354, "loss": 0.0773, "theoretical_loss": 3.6697358042360344, "tokens_seen": 1023148032 }, { "epoch": 0.31, "learning_rate": 0.0006968383887016531, "loss": 0.0755, "theoretical_loss": 3.6696474087263296, "tokens_seen": 1023410176 }, { "epoch": 0.31, "learning_rate": 0.0006967581447600706, "loss": 0.0773, "theoretical_loss": 3.6695590421940096, "tokens_seen": 1023672320 }, { "epoch": 0.31, "learning_rate": 0.0006966779008184882, "loss": 0.076, "theoretical_loss": 3.6694707046221575, "tokens_seen": 1023934464 }, { "epoch": 0.31, "learning_rate": 0.0006965976568769058, "loss": 0.074, "theoretical_loss": 3.669382395993871, "tokens_seen": 1024196608 }, { "epoch": 0.31, "learning_rate": 0.0006965174129353234, "loss": 0.0753, "theoretical_loss": 3.669294116292263, "tokens_seen": 1024458752 }, { "epoch": 0.31, "learning_rate": 0.000696437168993741, "loss": 0.0748, "theoretical_loss": 3.6692058655004605, "tokens_seen": 1024720896 }, { "epoch": 0.31, "learning_rate": 0.0006963569250521585, "loss": 0.0766, "theoretical_loss": 3.669117643601602, "tokens_seen": 1024983040 }, { "epoch": 0.31, "learning_rate": 0.0006962766811105762, "loss": 0.0718, "theoretical_loss": 3.6690294505788446, "tokens_seen": 1025245184 }, { "epoch": 0.31, "learning_rate": 0.0006961964371689938, "loss": 0.0737, "theoretical_loss": 3.668941286415355, "tokens_seen": 1025507328 }, { "epoch": 0.31, "learning_rate": 0.0006961161932274114, "loss": 0.0729, "theoretical_loss": 3.668853151094318, "tokens_seen": 1025769472 }, { "epoch": 0.31, "learning_rate": 0.0006960359492858289, "loss": 0.0727, "theoretical_loss": 3.6687650445989295, "tokens_seen": 1026031616 }, { "epoch": 0.31, "learning_rate": 0.0006959557053442466, "loss": 0.0731, "theoretical_loss": 3.6686769669124004, "tokens_seen": 1026293760 }, { "epoch": 0.31, "learning_rate": 0.0006958754614026641, "loss": 0.0744, "theoretical_loss": 3.6685889180179565, "tokens_seen": 1026555904 }, { "epoch": 0.31, "learning_rate": 0.0006957952174610816, "loss": 0.0741, "theoretical_loss": 3.668500897898837, "tokens_seen": 1026818048 }, { "epoch": 0.31, "learning_rate": 0.0006957149735194993, "loss": 0.0753, "theoretical_loss": 3.668412906538295, "tokens_seen": 1027080192 }, { "epoch": 0.31, "learning_rate": 0.0006956347295779168, "loss": 0.076, "theoretical_loss": 3.6683249439195977, "tokens_seen": 1027342336 }, { "epoch": 0.31, "learning_rate": 0.0006955544856363345, "loss": 0.0751, "theoretical_loss": 3.668237010026026, "tokens_seen": 1027604480 }, { "epoch": 0.31, "learning_rate": 0.0006954742416947521, "loss": 0.0752, "theoretical_loss": 3.668149104840876, "tokens_seen": 1027866624 }, { "epoch": 0.31, "learning_rate": 0.0006953939977531697, "loss": 0.0747, "theoretical_loss": 3.6680612283474567, "tokens_seen": 1028128768 }, { "epoch": 0.31, "learning_rate": 0.0006953137538115872, "loss": 0.0742, "theoretical_loss": 3.667973380529091, "tokens_seen": 1028390912 }, { "epoch": 0.31, "learning_rate": 0.0006952335098700048, "loss": 0.0743, "theoretical_loss": 3.6678855613691157, "tokens_seen": 1028653056 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.0015299071092158556, "objective/train/docs_used": 377682, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4341442584991455, "objective/train/original_loss": 1.4341442584991455, "objective/train/theoretical_loss": 3.667797770850883, "objective/train/tokens_used": 1049375200, "objective/train/value_avg": -0.00940704345703125, "objective/train/value_loss": 0.00021609137183986604, "objective/train/value_max": -0.0001214742660522461, "objective/train/value_min": -0.354248046875, "objective/train/value_reward_corr": 0.8563500147459093, "objective/train/value_std": 0.0274658203125, "objective/train/weight_avg": 1.0016289949417114, "objective/train/weighted_lm_loss": 1.4355690479278564, "objective/train/weights_max": 1.3001129627227783, "objective/train/weights_min": 0.36912843585014343, "theoretical_loss": 3.667797770850883, "tokens_seen": 1028915200 }, { "epoch": 0.31, "learning_rate": 0.0006951532659284224, "loss": 0.0732, "theoretical_loss": 3.667797770850883, "tokens_seen": 1028915200 }, { "epoch": 0.31, "learning_rate": 0.00069507302198684, "loss": 0.0749, "theoretical_loss": 3.667710008957756, "tokens_seen": 1029177344 }, { "epoch": 0.31, "learning_rate": 0.0006949927780452576, "loss": 0.0775, "theoretical_loss": 3.667622275673115, "tokens_seen": 1029439488 }, { "epoch": 0.31, "learning_rate": 0.0006949125341036751, "loss": 0.0747, "theoretical_loss": 3.667534570980353, "tokens_seen": 1029701632 }, { "epoch": 0.31, "learning_rate": 0.0006948322901620929, "loss": 0.0761, "theoretical_loss": 3.667446894862876, "tokens_seen": 1029963776 }, { "epoch": 0.31, "learning_rate": 0.0006947520462205104, "loss": 0.073, "theoretical_loss": 3.667359247304104, "tokens_seen": 1030225920 }, { "epoch": 0.31, "learning_rate": 0.0006946718022789279, "loss": 0.0739, "theoretical_loss": 3.667271628287472, "tokens_seen": 1030488064 }, { "epoch": 0.31, "learning_rate": 0.0006945915583373456, "loss": 0.0751, "theoretical_loss": 3.6671840377964275, "tokens_seen": 1030750208 }, { "epoch": 0.31, "learning_rate": 0.0006945113143957631, "loss": 0.0754, "theoretical_loss": 3.667096475814433, "tokens_seen": 1031012352 }, { "epoch": 0.31, "learning_rate": 0.0006944310704541807, "loss": 0.0746, "theoretical_loss": 3.6670089423249643, "tokens_seen": 1031274496 }, { "epoch": 0.31, "learning_rate": 0.0006943508265125983, "loss": 0.0784, "theoretical_loss": 3.6669214373115104, "tokens_seen": 1031536640 }, { "epoch": 0.31, "learning_rate": 0.0006942705825710159, "loss": 0.0745, "theoretical_loss": 3.6668339607575744, "tokens_seen": 1031798784 }, { "epoch": 0.31, "learning_rate": 0.0006941903386294334, "loss": 0.0726, "theoretical_loss": 3.6667465126466743, "tokens_seen": 1032060928 }, { "epoch": 0.31, "learning_rate": 0.0006941100946878512, "loss": 0.0708, "theoretical_loss": 3.6666590929623393, "tokens_seen": 1032323072 }, { "epoch": 0.31, "learning_rate": 0.0006940298507462687, "loss": 0.0717, "theoretical_loss": 3.666571701688115, "tokens_seen": 1032585216 }, { "epoch": 0.31, "learning_rate": 0.0006939496068046863, "loss": 0.0767, "theoretical_loss": 3.6664843388075594, "tokens_seen": 1032847360 }, { "epoch": 0.31, "learning_rate": 0.0006938693628631039, "loss": 0.0705, "theoretical_loss": 3.6663970043042435, "tokens_seen": 1033109504 }, { "epoch": 0.31, "learning_rate": 0.0006937891189215214, "loss": 0.0717, "theoretical_loss": 3.6663096981617533, "tokens_seen": 1033371648 }, { "epoch": 0.31, "learning_rate": 0.0006937088749799391, "loss": 0.0716, "theoretical_loss": 3.6662224203636886, "tokens_seen": 1033633792 }, { "epoch": 0.31, "learning_rate": 0.0006936286310383566, "loss": 0.0744, "theoretical_loss": 3.6661351708936616, "tokens_seen": 1033895936 }, { "epoch": 0.31, "learning_rate": 0.0006935483870967742, "loss": 0.0693, "theoretical_loss": 3.6660479497352982, "tokens_seen": 1034158080 }, { "epoch": 0.31, "learning_rate": 0.0006934681431551918, "loss": 0.0753, "theoretical_loss": 3.665960756872239, "tokens_seen": 1034420224 }, { "epoch": 0.31, "learning_rate": 0.0006933878992136093, "loss": 0.0745, "theoretical_loss": 3.6658735922881376, "tokens_seen": 1034682368 }, { "epoch": 0.31, "learning_rate": 0.000693307655272027, "loss": 0.0744, "theoretical_loss": 3.665786455966661, "tokens_seen": 1034944512 }, { "epoch": 0.31, "learning_rate": 0.0006932274113304446, "loss": 0.0747, "theoretical_loss": 3.6656993478914903, "tokens_seen": 1035206656 }, { "epoch": 0.31, "objective/train/advantage_avg": -7.297027332242578e-05, "objective/train/docs_used": 379908, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4424421787261963, "objective/train/original_loss": 1.4424422979354858, "objective/train/theoretical_loss": 3.6656122680463197, "objective/train/tokens_used": 1055928800, "objective/train/value_avg": -0.00970458984375, "objective/train/value_loss": 0.0001646455639274791, "objective/train/value_max": -6.711483001708984e-05, "objective/train/value_min": -0.310302734375, "objective/train/value_reward_corr": 0.9264396814702418, "objective/train/value_std": 0.0283050537109375, "objective/train/weight_avg": 1.0000029802322388, "objective/train/weighted_lm_loss": 1.4421523809432983, "objective/train/weights_max": 1.2352874279022217, "objective/train/weights_min": 0.3707435429096222, "theoretical_loss": 3.6656122680463197, "tokens_seen": 1035468800 }, { "epoch": 0.31, "learning_rate": 0.0006931471673888622, "loss": 0.0754, "theoretical_loss": 3.6656122680463197, "tokens_seen": 1035468800 }, { "epoch": 0.31, "learning_rate": 0.0006930669234472797, "loss": 0.0729, "theoretical_loss": 3.6655252164148564, "tokens_seen": 1035730944 }, { "epoch": 0.31, "learning_rate": 0.0006929866795056974, "loss": 0.0758, "theoretical_loss": 3.6654381929808233, "tokens_seen": 1035993088 }, { "epoch": 0.31, "learning_rate": 0.0006929064355641149, "loss": 0.0735, "theoretical_loss": 3.6653511977279534, "tokens_seen": 1036255232 }, { "epoch": 0.31, "learning_rate": 0.0006928261916225324, "loss": 0.0746, "theoretical_loss": 3.6652642306399965, "tokens_seen": 1036517376 }, { "epoch": 0.31, "learning_rate": 0.0006927459476809501, "loss": 0.0749, "theoretical_loss": 3.6651772917007137, "tokens_seen": 1036779520 }, { "epoch": 0.31, "learning_rate": 0.0006926657037393676, "loss": 0.0754, "theoretical_loss": 3.6650903808938806, "tokens_seen": 1037041664 }, { "epoch": 0.31, "learning_rate": 0.0006925854597977854, "loss": 0.0752, "theoretical_loss": 3.6650034982032857, "tokens_seen": 1037303808 }, { "epoch": 0.31, "learning_rate": 0.0006925052158562029, "loss": 0.0734, "theoretical_loss": 3.664916643612732, "tokens_seen": 1037565952 }, { "epoch": 0.31, "learning_rate": 0.0006924249719146205, "loss": 0.0729, "theoretical_loss": 3.6648298171060345, "tokens_seen": 1037828096 }, { "epoch": 0.31, "learning_rate": 0.0006923447279730381, "loss": 0.0726, "theoretical_loss": 3.6647430186670222, "tokens_seen": 1038090240 }, { "epoch": 0.31, "learning_rate": 0.0006922644840314556, "loss": 0.0734, "theoretical_loss": 3.6646562482795373, "tokens_seen": 1038352384 }, { "epoch": 0.31, "learning_rate": 0.0006921842400898732, "loss": 0.0721, "theoretical_loss": 3.664569505927436, "tokens_seen": 1038614528 }, { "epoch": 0.31, "learning_rate": 0.0006921039961482908, "loss": 0.0736, "theoretical_loss": 3.664482791594588, "tokens_seen": 1038876672 }, { "epoch": 0.31, "learning_rate": 0.0006920237522067084, "loss": 0.0738, "theoretical_loss": 3.664396105264875, "tokens_seen": 1039138816 }, { "epoch": 0.31, "learning_rate": 0.0006919435082651259, "loss": 0.0735, "theoretical_loss": 3.6643094469221933, "tokens_seen": 1039400960 }, { "epoch": 0.32, "learning_rate": 0.0006918632643235437, "loss": 0.0734, "theoretical_loss": 3.664222816550452, "tokens_seen": 1039663104 }, { "epoch": 0.32, "learning_rate": 0.0006917830203819612, "loss": 0.0723, "theoretical_loss": 3.6641362141335727, "tokens_seen": 1039925248 }, { "epoch": 0.32, "learning_rate": 0.0006917027764403787, "loss": 0.0731, "theoretical_loss": 3.6640496396554925, "tokens_seen": 1040187392 }, { "epoch": 0.32, "learning_rate": 0.0006916225324987964, "loss": 0.0753, "theoretical_loss": 3.6639630931001594, "tokens_seen": 1040449536 }, { "epoch": 0.32, "learning_rate": 0.0006915422885572139, "loss": 0.0737, "theoretical_loss": 3.6638765744515367, "tokens_seen": 1040711680 }, { "epoch": 0.32, "learning_rate": 0.0006914620446156316, "loss": 0.076, "theoretical_loss": 3.6637900836935993, "tokens_seen": 1040973824 }, { "epoch": 0.32, "learning_rate": 0.0006913818006740491, "loss": 0.0736, "theoretical_loss": 3.6637036208103364, "tokens_seen": 1041235968 }, { "epoch": 0.32, "learning_rate": 0.0006913015567324667, "loss": 0.0711, "theoretical_loss": 3.663617185785749, "tokens_seen": 1041498112 }, { "epoch": 0.32, "learning_rate": 0.0006912213127908843, "loss": 0.0724, "theoretical_loss": 3.6635307786038536, "tokens_seen": 1041760256 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.0013375055277720094, "objective/train/docs_used": 382133, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4263135194778442, "objective/train/original_loss": 1.4263136386871338, "objective/train/theoretical_loss": 3.663444399248678, "objective/train/tokens_used": 1062482400, "objective/train/value_avg": -0.006931304931640625, "objective/train/value_loss": 0.00021545964409597218, "objective/train/value_max": -0.00010150671005249023, "objective/train/value_min": -0.296875, "objective/train/value_reward_corr": 0.5908098049024169, "objective/train/value_std": 0.01110076904296875, "objective/train/weight_avg": 1.0014337301254272, "objective/train/weighted_lm_loss": 1.4280571937561035, "objective/train/weights_max": 1.1778565645217896, "objective/train/weights_min": 0.3711100220680237, "theoretical_loss": 3.663444399248678, "tokens_seen": 1042022400 }, { "epoch": 0.32, "learning_rate": 0.000691141068849302, "loss": 0.0719, "theoretical_loss": 3.663444399248678, "tokens_seen": 1042022400 }, { "epoch": 0.32, "learning_rate": 0.0006910608249077195, "loss": 0.0754, "theoretical_loss": 3.6633580477042633, "tokens_seen": 1042284544 }, { "epoch": 0.32, "learning_rate": 0.0006909805809661371, "loss": 0.0733, "theoretical_loss": 3.663271723954665, "tokens_seen": 1042546688 }, { "epoch": 0.32, "learning_rate": 0.0006909003370245547, "loss": 0.0739, "theoretical_loss": 3.6631854279839513, "tokens_seen": 1042808832 }, { "epoch": 0.32, "learning_rate": 0.0006908200930829722, "loss": 0.0703, "theoretical_loss": 3.6630991597762024, "tokens_seen": 1043070976 }, { "epoch": 0.32, "learning_rate": 0.0006907398491413899, "loss": 0.0742, "theoretical_loss": 3.6630129193155128, "tokens_seen": 1043333120 }, { "epoch": 0.32, "learning_rate": 0.0006906596051998074, "loss": 0.0733, "theoretical_loss": 3.6629267065859894, "tokens_seen": 1043595264 }, { "epoch": 0.32, "learning_rate": 0.000690579361258225, "loss": 0.0755, "theoretical_loss": 3.662840521571753, "tokens_seen": 1043857408 }, { "epoch": 0.32, "learning_rate": 0.0006904991173166426, "loss": 0.074, "theoretical_loss": 3.662754364256937, "tokens_seen": 1044119552 }, { "epoch": 0.32, "learning_rate": 0.0006904188733750601, "loss": 0.0773, "theoretical_loss": 3.662668234625688, "tokens_seen": 1044381696 }, { "epoch": 0.32, "learning_rate": 0.0006903386294334778, "loss": 0.0719, "theoretical_loss": 3.6625821326621653, "tokens_seen": 1044643840 }, { "epoch": 0.32, "learning_rate": 0.0006902583854918954, "loss": 0.0761, "theoretical_loss": 3.6624960583505404, "tokens_seen": 1044905984 }, { "epoch": 0.32, "learning_rate": 0.000690178141550313, "loss": 0.0717, "theoretical_loss": 3.662410011675001, "tokens_seen": 1045168128 }, { "epoch": 0.32, "learning_rate": 0.0006900978976087306, "loss": 0.0748, "theoretical_loss": 3.6623239926197444, "tokens_seen": 1045430272 }, { "epoch": 0.32, "learning_rate": 0.0006900176536671482, "loss": 0.0741, "theoretical_loss": 3.6622380011689826, "tokens_seen": 1045692416 }, { "epoch": 0.32, "learning_rate": 0.0006899374097255657, "loss": 0.0735, "theoretical_loss": 3.66215203730694, "tokens_seen": 1045954560 }, { "epoch": 0.32, "learning_rate": 0.0006898571657839833, "loss": 0.0728, "theoretical_loss": 3.6620661010178543, "tokens_seen": 1046216704 }, { "epoch": 0.32, "learning_rate": 0.0006897769218424009, "loss": 0.0737, "theoretical_loss": 3.6619801922859763, "tokens_seen": 1046478848 }, { "epoch": 0.32, "learning_rate": 0.0006896966779008184, "loss": 0.0726, "theoretical_loss": 3.661894311095568, "tokens_seen": 1046740992 }, { "epoch": 0.32, "learning_rate": 0.0006896164339592362, "loss": 0.0751, "theoretical_loss": 3.6618084574309075, "tokens_seen": 1047003136 }, { "epoch": 0.32, "learning_rate": 0.0006895361900176537, "loss": 0.0726, "theoretical_loss": 3.6617226312762834, "tokens_seen": 1047265280 }, { "epoch": 0.32, "learning_rate": 0.0006894559460760713, "loss": 0.0724, "theoretical_loss": 3.6616368326159976, "tokens_seen": 1047527424 }, { "epoch": 0.32, "learning_rate": 0.0006893757021344889, "loss": 0.0747, "theoretical_loss": 3.6615510614343654, "tokens_seen": 1047789568 }, { "epoch": 0.32, "learning_rate": 0.0006892954581929064, "loss": 0.0713, "theoretical_loss": 3.661465317715715, "tokens_seen": 1048051712 }, { "epoch": 0.32, "learning_rate": 0.000689215214251324, "loss": 0.0729, "theoretical_loss": 3.6613796014443865, "tokens_seen": 1048313856 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.000845851085614413, "objective/train/docs_used": 384505, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4578531980514526, "objective/train/original_loss": 1.457853078842163, "objective/train/theoretical_loss": 3.661293912604734, "objective/train/tokens_used": 1069036000, "objective/train/value_avg": -0.00762176513671875, "objective/train/value_loss": 0.0002575255639385432, "objective/train/value_max": -6.812810897827148e-05, "objective/train/value_min": -0.26220703125, "objective/train/value_reward_corr": 0.5918434184109678, "objective/train/value_std": 0.01096343994140625, "objective/train/weight_avg": 1.0009592771530151, "objective/train/weighted_lm_loss": 1.459302544593811, "objective/train/weights_max": 1.1559641361236572, "objective/train/weights_min": 0.368507981300354, "theoretical_loss": 3.661293912604734, "tokens_seen": 1048576000 }, { "epoch": 0.32, "learning_rate": 0.0006891349703097416, "loss": 0.0725, "theoretical_loss": 3.661293912604734, "tokens_seen": 1048576000 }, { "epoch": 0.32, "learning_rate": 0.0006890547263681592, "loss": 0.0737, "theoretical_loss": 3.661208251181124, "tokens_seen": 1048838144 }, { "epoch": 0.32, "learning_rate": 0.0006889744824265767, "loss": 0.072, "theoretical_loss": 3.6611226171579356, "tokens_seen": 1049100288 }, { "epoch": 0.32, "learning_rate": 0.0006888942384849945, "loss": 0.0739, "theoretical_loss": 3.6610370105195607, "tokens_seen": 1049362432 }, { "epoch": 0.32, "learning_rate": 0.000688813994543412, "loss": 0.0753, "theoretical_loss": 3.660951431250405, "tokens_seen": 1049624576 }, { "epoch": 0.32, "learning_rate": 0.0006887337506018296, "loss": 0.071, "theoretical_loss": 3.6608658793348847, "tokens_seen": 1049886720 }, { "epoch": 0.32, "learning_rate": 0.0006886535066602472, "loss": 0.074, "theoretical_loss": 3.6607803547574314, "tokens_seen": 1050148864 }, { "epoch": 0.32, "learning_rate": 0.0006885732627186647, "loss": 0.0754, "theoretical_loss": 3.660694857502487, "tokens_seen": 1050411008 }, { "epoch": 0.32, "learning_rate": 0.0006884930187770824, "loss": 0.0729, "theoretical_loss": 3.660609387554509, "tokens_seen": 1050673152 }, { "epoch": 0.32, "learning_rate": 0.0006884127748354999, "loss": 0.0761, "theoretical_loss": 3.6605239448979647, "tokens_seen": 1050935296 }, { "epoch": 0.32, "learning_rate": 0.0006883325308939175, "loss": 0.0751, "theoretical_loss": 3.660438529517336, "tokens_seen": 1051197440 }, { "epoch": 0.32, "learning_rate": 0.0006882522869523351, "loss": 0.076, "theoretical_loss": 3.660353141397116, "tokens_seen": 1051459584 }, { "epoch": 0.32, "learning_rate": 0.0006881720430107528, "loss": 0.0765, "theoretical_loss": 3.6602677805218127, "tokens_seen": 1051721728 }, { "epoch": 0.32, "learning_rate": 0.0006880917990691703, "loss": 0.0754, "theoretical_loss": 3.660182446875944, "tokens_seen": 1051983872 }, { "epoch": 0.32, "learning_rate": 0.0006880115551275879, "loss": 0.0699, "theoretical_loss": 3.6600971404440434, "tokens_seen": 1052246016 }, { "epoch": 0.32, "learning_rate": 0.0006879313111860055, "loss": 0.0713, "theoretical_loss": 3.660011861210654, "tokens_seen": 1052508160 }, { "epoch": 0.32, "learning_rate": 0.000687851067244423, "loss": 0.0712, "theoretical_loss": 3.659926609160334, "tokens_seen": 1052770304 }, { "epoch": 0.32, "learning_rate": 0.0006877708233028407, "loss": 0.0701, "theoretical_loss": 3.6598413842776534, "tokens_seen": 1053032448 }, { "epoch": 0.32, "learning_rate": 0.0006876905793612582, "loss": 0.0729, "theoretical_loss": 3.6597561865471935, "tokens_seen": 1053294592 }, { "epoch": 0.32, "learning_rate": 0.0006876103354196759, "loss": 0.0774, "theoretical_loss": 3.6596710159535504, "tokens_seen": 1053556736 }, { "epoch": 0.32, "learning_rate": 0.0006875300914780934, "loss": 0.0753, "theoretical_loss": 3.659585872481331, "tokens_seen": 1053818880 }, { "epoch": 0.32, "learning_rate": 0.0006874498475365109, "loss": 0.0752, "theoretical_loss": 3.659500756115156, "tokens_seen": 1054081024 }, { "epoch": 0.32, "learning_rate": 0.0006873696035949287, "loss": 0.0736, "theoretical_loss": 3.659415666839658, "tokens_seen": 1054343168 }, { "epoch": 0.32, "learning_rate": 0.0006872893596533462, "loss": 0.0724, "theoretical_loss": 3.6593306046394813, "tokens_seen": 1054605312 }, { "epoch": 0.32, "learning_rate": 0.0006872091157117638, "loss": 0.075, "theoretical_loss": 3.6592455694992854, "tokens_seen": 1054867456 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.001419214648194611, "objective/train/docs_used": 386814, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3335970640182495, "objective/train/original_loss": 1.33359694480896, "objective/train/theoretical_loss": 3.659160561403739, "objective/train/tokens_used": 1075589600, "objective/train/value_avg": -0.007778167724609375, "objective/train/value_loss": 0.00025554379681125283, "objective/train/value_max": -0.00013875961303710938, "objective/train/value_min": -0.865234375, "objective/train/value_reward_corr": 0.7009334463254417, "objective/train/value_std": 0.0123443603515625, "objective/train/weight_avg": 1.0015344619750977, "objective/train/weighted_lm_loss": 1.3353464603424072, "objective/train/weights_max": 1.3698445558547974, "objective/train/weights_min": 0.3807870149612427, "theoretical_loss": 3.659160561403739, "tokens_seen": 1055129600 }, { "epoch": 0.32, "learning_rate": 0.0006871288717701814, "loss": 0.07, "theoretical_loss": 3.659160561403739, "tokens_seen": 1055129600 }, { "epoch": 0.32, "learning_rate": 0.000687048627828599, "loss": 0.0718, "theoretical_loss": 3.6590755803375252, "tokens_seen": 1055391744 }, { "epoch": 0.32, "learning_rate": 0.0006869683838870165, "loss": 0.0735, "theoretical_loss": 3.65899062628534, "tokens_seen": 1055653888 }, { "epoch": 0.32, "learning_rate": 0.0006868881399454341, "loss": 0.073, "theoretical_loss": 3.65890569923189, "tokens_seen": 1055916032 }, { "epoch": 0.32, "learning_rate": 0.0006868078960038517, "loss": 0.0743, "theoretical_loss": 3.658820799161896, "tokens_seen": 1056178176 }, { "epoch": 0.32, "learning_rate": 0.0006867276520622692, "loss": 0.0749, "theoretical_loss": 3.65873592606009, "tokens_seen": 1056440320 }, { "epoch": 0.32, "learning_rate": 0.000686647408120687, "loss": 0.0707, "theoretical_loss": 3.658651079911218, "tokens_seen": 1056702464 }, { "epoch": 0.32, "learning_rate": 0.0006865671641791045, "loss": 0.0735, "theoretical_loss": 3.658566260700036, "tokens_seen": 1056964608 }, { "epoch": 0.32, "learning_rate": 0.0006864869202375221, "loss": 0.0735, "theoretical_loss": 3.658481468411315, "tokens_seen": 1057226752 }, { "epoch": 0.32, "learning_rate": 0.0006864066762959397, "loss": 0.0715, "theoretical_loss": 3.6583967030298368, "tokens_seen": 1057488896 }, { "epoch": 0.32, "learning_rate": 0.0006863264323543572, "loss": 0.0746, "theoretical_loss": 3.6583119645403954, "tokens_seen": 1057751040 }, { "epoch": 0.32, "learning_rate": 0.0006862461884127749, "loss": 0.0709, "theoretical_loss": 3.658227252927799, "tokens_seen": 1058013184 }, { "epoch": 0.32, "learning_rate": 0.0006861659444711924, "loss": 0.0704, "theoretical_loss": 3.6581425681768653, "tokens_seen": 1058275328 }, { "epoch": 0.32, "learning_rate": 0.00068608570052961, "loss": 0.072, "theoretical_loss": 3.6580579102724267, "tokens_seen": 1058537472 }, { "epoch": 0.32, "learning_rate": 0.0006860054565880276, "loss": 0.0734, "theoretical_loss": 3.657973279199327, "tokens_seen": 1058799616 }, { "epoch": 0.32, "learning_rate": 0.0006859252126464453, "loss": 0.0734, "theoretical_loss": 3.6578886749424226, "tokens_seen": 1059061760 }, { "epoch": 0.32, "learning_rate": 0.0006858449687048628, "loss": 0.077, "theoretical_loss": 3.657804097486581, "tokens_seen": 1059323904 }, { "epoch": 0.32, "learning_rate": 0.0006857647247632804, "loss": 0.0705, "theoretical_loss": 3.657719546816685, "tokens_seen": 1059586048 }, { "epoch": 0.32, "learning_rate": 0.000685684480821698, "loss": 0.0709, "theoretical_loss": 3.657635022917626, "tokens_seen": 1059848192 }, { "epoch": 0.32, "learning_rate": 0.0006856042368801155, "loss": 0.0715, "theoretical_loss": 3.65755052577431, "tokens_seen": 1060110336 }, { "epoch": 0.32, "learning_rate": 0.0006855239929385332, "loss": 0.071, "theoretical_loss": 3.657466055371654, "tokens_seen": 1060372480 }, { "epoch": 0.32, "learning_rate": 0.0006854437489969507, "loss": 0.0721, "theoretical_loss": 3.657381611694588, "tokens_seen": 1060634624 }, { "epoch": 0.32, "learning_rate": 0.0006853635050553683, "loss": 0.0731, "theoretical_loss": 3.6572971947280544, "tokens_seen": 1060896768 }, { "epoch": 0.32, "learning_rate": 0.0006852832611137859, "loss": 0.0764, "theoretical_loss": 3.6572128044570067, "tokens_seen": 1061158912 }, { "epoch": 0.32, "learning_rate": 0.0006852030171722034, "loss": 0.0724, "theoretical_loss": 3.657128440866412, "tokens_seen": 1061421056 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.0005861930549144745, "objective/train/docs_used": 389244, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5149002075195312, "objective/train/original_loss": 1.5149000883102417, "objective/train/theoretical_loss": 3.6570441039412485, "objective/train/tokens_used": 1082143200, "objective/train/value_avg": -0.00844573974609375, "objective/train/value_loss": 0.00028737890534102917, "objective/train/value_max": -9.918212890625e-05, "objective/train/value_min": -0.37060546875, "objective/train/value_reward_corr": 0.6168363103967418, "objective/train/value_std": 0.01305389404296875, "objective/train/weight_avg": 1.0007121562957764, "objective/train/weighted_lm_loss": 1.5156128406524658, "objective/train/weights_max": 1.3453842401504517, "objective/train/weights_min": 0.36841732263565063, "theoretical_loss": 3.6570441039412485, "tokens_seen": 1061683200 }, { "epoch": 0.32, "learning_rate": 0.0006851227732306211, "loss": 0.073, "theoretical_loss": 3.6570441039412485, "tokens_seen": 1061683200 }, { "epoch": 0.32, "learning_rate": 0.0006850425292890387, "loss": 0.0734, "theoretical_loss": 3.6569597936665064, "tokens_seen": 1061945344 }, { "epoch": 0.32, "learning_rate": 0.0006849622853474563, "loss": 0.0753, "theoretical_loss": 3.6568755100271897, "tokens_seen": 1062207488 }, { "epoch": 0.32, "learning_rate": 0.0006848820414058739, "loss": 0.0727, "theoretical_loss": 3.656791253008313, "tokens_seen": 1062469632 }, { "epoch": 0.32, "learning_rate": 0.0006848017974642915, "loss": 0.0738, "theoretical_loss": 3.6567070225949028, "tokens_seen": 1062731776 }, { "epoch": 0.32, "learning_rate": 0.000684721553522709, "loss": 0.0727, "theoretical_loss": 3.656622818771999, "tokens_seen": 1062993920 }, { "epoch": 0.32, "learning_rate": 0.0006846413095811267, "loss": 0.0711, "theoretical_loss": 3.6565386415246524, "tokens_seen": 1063256064 }, { "epoch": 0.32, "learning_rate": 0.0006845610656395442, "loss": 0.0719, "theoretical_loss": 3.6564544908379273, "tokens_seen": 1063518208 }, { "epoch": 0.32, "learning_rate": 0.0006844808216979617, "loss": 0.0723, "theoretical_loss": 3.6563703666968985, "tokens_seen": 1063780352 }, { "epoch": 0.32, "learning_rate": 0.0006844005777563795, "loss": 0.0708, "theoretical_loss": 3.656286269086653, "tokens_seen": 1064042496 }, { "epoch": 0.32, "learning_rate": 0.000684320333814797, "loss": 0.0734, "theoretical_loss": 3.6562021979922923, "tokens_seen": 1064304640 }, { "epoch": 0.32, "learning_rate": 0.0006842400898732146, "loss": 0.0752, "theoretical_loss": 3.6561181533989267, "tokens_seen": 1064566784 }, { "epoch": 0.32, "learning_rate": 0.0006841598459316322, "loss": 0.0731, "theoretical_loss": 3.6560341352916796, "tokens_seen": 1064828928 }, { "epoch": 0.32, "learning_rate": 0.0006840796019900498, "loss": 0.0729, "theoretical_loss": 3.655950143655688, "tokens_seen": 1065091072 }, { "epoch": 0.32, "learning_rate": 0.0006839993580484673, "loss": 0.0733, "theoretical_loss": 3.655866178476098, "tokens_seen": 1065353216 }, { "epoch": 0.32, "learning_rate": 0.0006839191141068849, "loss": 0.0729, "theoretical_loss": 3.65578223973807, "tokens_seen": 1065615360 }, { "epoch": 0.32, "learning_rate": 0.0006838388701653025, "loss": 0.077, "theoretical_loss": 3.6556983274267765, "tokens_seen": 1065877504 }, { "epoch": 0.32, "learning_rate": 0.0006837586262237201, "loss": 0.0728, "theoretical_loss": 3.6556144415273994, "tokens_seen": 1066139648 }, { "epoch": 0.32, "learning_rate": 0.0006836783822821378, "loss": 0.0735, "theoretical_loss": 3.655530582025136, "tokens_seen": 1066401792 }, { "epoch": 0.32, "learning_rate": 0.0006835981383405553, "loss": 0.072, "theoretical_loss": 3.6554467489051925, "tokens_seen": 1066663936 }, { "epoch": 0.32, "learning_rate": 0.000683517894398973, "loss": 0.0746, "theoretical_loss": 3.6553629421527885, "tokens_seen": 1066926080 }, { "epoch": 0.32, "learning_rate": 0.0006834376504573905, "loss": 0.0744, "theoretical_loss": 3.655279161753156, "tokens_seen": 1067188224 }, { "epoch": 0.32, "learning_rate": 0.000683357406515808, "loss": 0.0713, "theoretical_loss": 3.6551954076915374, "tokens_seen": 1067450368 }, { "epoch": 0.32, "learning_rate": 0.0006832771625742257, "loss": 0.0727, "theoretical_loss": 3.655111679953188, "tokens_seen": 1067712512 }, { "epoch": 0.32, "learning_rate": 0.0006831969186326432, "loss": 0.0737, "theoretical_loss": 3.6550279785233757, "tokens_seen": 1067974656 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.001482620951719582, "objective/train/docs_used": 391774, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.458082914352417, "objective/train/original_loss": 1.458082914352417, "objective/train/theoretical_loss": 3.654944303387378, "objective/train/tokens_used": 1088696800, "objective/train/value_avg": -0.00849151611328125, "objective/train/value_loss": 0.00018974917475134134, "objective/train/value_max": -0.00010472536087036133, "objective/train/value_min": -0.64208984375, "objective/train/value_reward_corr": 0.7253735317584146, "objective/train/value_std": 0.0149993896484375, "objective/train/weight_avg": 1.0015714168548584, "objective/train/weighted_lm_loss": 1.4602124691009521, "objective/train/weights_max": 1.292288899421692, "objective/train/weights_min": 0.36969563364982605, "theoretical_loss": 3.654944303387378, "tokens_seen": 1068236800 }, { "epoch": 0.32, "learning_rate": 0.0006831166746910608, "loss": 0.0736, "theoretical_loss": 3.654944303387378, "tokens_seen": 1068236800 }, { "epoch": 0.32, "learning_rate": 0.0006830364307494784, "loss": 0.073, "theoretical_loss": 3.654860654530486, "tokens_seen": 1068498944 }, { "epoch": 0.32, "learning_rate": 0.0006829561868078961, "loss": 0.0741, "theoretical_loss": 3.6547770319380026, "tokens_seen": 1068761088 }, { "epoch": 0.32, "learning_rate": 0.0006828759428663136, "loss": 0.0712, "theoretical_loss": 3.6546934355952425, "tokens_seen": 1069023232 }, { "epoch": 0.32, "learning_rate": 0.0006827956989247312, "loss": 0.0743, "theoretical_loss": 3.6546098654875303, "tokens_seen": 1069285376 }, { "epoch": 0.32, "learning_rate": 0.0006827154549831488, "loss": 0.0743, "theoretical_loss": 3.654526321600205, "tokens_seen": 1069547520 }, { "epoch": 0.32, "learning_rate": 0.0006826352110415663, "loss": 0.0728, "theoretical_loss": 3.6544428039186165, "tokens_seen": 1069809664 }, { "epoch": 0.32, "learning_rate": 0.000682554967099984, "loss": 0.0715, "theoretical_loss": 3.6543593124281264, "tokens_seen": 1070071808 }, { "epoch": 0.32, "learning_rate": 0.0006824747231584015, "loss": 0.0738, "theoretical_loss": 3.654275847114107, "tokens_seen": 1070333952 }, { "epoch": 0.32, "learning_rate": 0.0006823944792168192, "loss": 0.0761, "theoretical_loss": 3.6541924079619443, "tokens_seen": 1070596096 }, { "epoch": 0.32, "learning_rate": 0.0006823142352752367, "loss": 0.0741, "theoretical_loss": 3.654108994957034, "tokens_seen": 1070858240 }, { "epoch": 0.32, "learning_rate": 0.0006822339913336542, "loss": 0.075, "theoretical_loss": 3.654025608084786, "tokens_seen": 1071120384 }, { "epoch": 0.32, "learning_rate": 0.000682153747392072, "loss": 0.0743, "theoretical_loss": 3.653942247330619, "tokens_seen": 1071382528 }, { "epoch": 0.32, "learning_rate": 0.0006820735034504895, "loss": 0.0725, "theoretical_loss": 3.653858912679966, "tokens_seen": 1071644672 }, { "epoch": 0.32, "learning_rate": 0.0006819932595089071, "loss": 0.0735, "theoretical_loss": 3.6537756041182696, "tokens_seen": 1071906816 }, { "epoch": 0.32, "learning_rate": 0.0006819130155673247, "loss": 0.0738, "theoretical_loss": 3.6536923216309862, "tokens_seen": 1072168960 }, { "epoch": 0.32, "learning_rate": 0.0006818327716257423, "loss": 0.0743, "theoretical_loss": 3.653609065203582, "tokens_seen": 1072431104 }, { "epoch": 0.33, "learning_rate": 0.0006817525276841598, "loss": 0.0724, "theoretical_loss": 3.6535258348215356, "tokens_seen": 1072693248 }, { "epoch": 0.33, "learning_rate": 0.0006816722837425775, "loss": 0.0747, "theoretical_loss": 3.653442630470337, "tokens_seen": 1072955392 }, { "epoch": 0.33, "learning_rate": 0.000681592039800995, "loss": 0.074, "theoretical_loss": 3.653359452135488, "tokens_seen": 1073217536 }, { "epoch": 0.33, "learning_rate": 0.0006815117958594125, "loss": 0.0735, "theoretical_loss": 3.653276299802503, "tokens_seen": 1073479680 }, { "epoch": 0.33, "learning_rate": 0.0006814315519178303, "loss": 0.0736, "theoretical_loss": 3.6531931734569056, "tokens_seen": 1073741824 }, { "epoch": 0.33, "learning_rate": 0.0006813513079762478, "loss": 0.0724, "theoretical_loss": 3.6531100730842336, "tokens_seen": 1074003968 }, { "epoch": 0.33, "learning_rate": 0.0006812710640346655, "loss": 0.0746, "theoretical_loss": 3.653026998670035, "tokens_seen": 1074266112 }, { "epoch": 0.33, "learning_rate": 0.000681190820093083, "loss": 0.0733, "theoretical_loss": 3.652943950199869, "tokens_seen": 1074528256 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.0008113806834444404, "objective/train/docs_used": 394219, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4396977424621582, "objective/train/original_loss": 1.4396977424621582, "objective/train/theoretical_loss": 3.652860927659307, "objective/train/tokens_used": 1095250400, "objective/train/value_avg": -0.007663726806640625, "objective/train/value_loss": 0.00024871714413166046, "objective/train/value_max": -0.00011146068572998047, "objective/train/value_min": -0.373046875, "objective/train/value_reward_corr": 0.6646306086042744, "objective/train/value_std": 0.01288604736328125, "objective/train/weight_avg": 1.0009232759475708, "objective/train/weighted_lm_loss": 1.4412873983383179, "objective/train/weights_max": 1.1857911348342896, "objective/train/weights_min": 0.3686436712741852, "theoretical_loss": 3.652860927659307, "tokens_seen": 1074790400 }, { "epoch": 0.33, "learning_rate": 0.0006811105761515006, "loss": 0.0734, "theoretical_loss": 3.652860927659307, "tokens_seen": 1074790400 }, { "epoch": 0.33, "learning_rate": 0.0006810303322099182, "loss": 0.0727, "theoretical_loss": 3.6527779310339326, "tokens_seen": 1075052544 }, { "epoch": 0.33, "learning_rate": 0.0006809500882683357, "loss": 0.0742, "theoretical_loss": 3.652694960309339, "tokens_seen": 1075314688 }, { "epoch": 0.33, "learning_rate": 0.0006808698443267533, "loss": 0.0751, "theoretical_loss": 3.6526120154711332, "tokens_seen": 1075576832 }, { "epoch": 0.33, "learning_rate": 0.0006807896003851709, "loss": 0.072, "theoretical_loss": 3.6525290965049324, "tokens_seen": 1075838976 }, { "epoch": 0.33, "learning_rate": 0.0006807093564435886, "loss": 0.0738, "theoretical_loss": 3.652446203396365, "tokens_seen": 1076101120 }, { "epoch": 0.33, "learning_rate": 0.0006806291125020061, "loss": 0.0729, "theoretical_loss": 3.6523633361310717, "tokens_seen": 1076363264 }, { "epoch": 0.33, "learning_rate": 0.0006805488685604238, "loss": 0.0737, "theoretical_loss": 3.6522804946947045, "tokens_seen": 1076625408 }, { "epoch": 0.33, "learning_rate": 0.0006804686246188413, "loss": 0.0728, "theoretical_loss": 3.6521976790729265, "tokens_seen": 1076887552 }, { "epoch": 0.33, "learning_rate": 0.0006803883806772588, "loss": 0.0727, "theoretical_loss": 3.652114889251412, "tokens_seen": 1077149696 }, { "epoch": 0.33, "learning_rate": 0.0006803081367356765, "loss": 0.0763, "theoretical_loss": 3.6520321252158485, "tokens_seen": 1077411840 }, { "epoch": 0.33, "learning_rate": 0.000680227892794094, "loss": 0.0724, "theoretical_loss": 3.651949386951933, "tokens_seen": 1077673984 }, { "epoch": 0.33, "learning_rate": 0.0006801476488525116, "loss": 0.0729, "theoretical_loss": 3.6518666744453734, "tokens_seen": 1077936128 }, { "epoch": 0.33, "learning_rate": 0.0006800674049109292, "loss": 0.073, "theoretical_loss": 3.651783987681892, "tokens_seen": 1078198272 }, { "epoch": 0.33, "learning_rate": 0.0006799871609693469, "loss": 0.0734, "theoretical_loss": 3.6517013266472187, "tokens_seen": 1078460416 }, { "epoch": 0.33, "learning_rate": 0.0006799069170277645, "loss": 0.0736, "theoretical_loss": 3.651618691327098, "tokens_seen": 1078722560 }, { "epoch": 0.33, "learning_rate": 0.000679826673086182, "loss": 0.0749, "theoretical_loss": 3.651536081707284, "tokens_seen": 1078984704 }, { "epoch": 0.33, "learning_rate": 0.0006797464291445996, "loss": 0.0758, "theoretical_loss": 3.651453497773543, "tokens_seen": 1079246848 }, { "epoch": 0.33, "learning_rate": 0.0006796661852030172, "loss": 0.0746, "theoretical_loss": 3.6513709395116516, "tokens_seen": 1079508992 }, { "epoch": 0.33, "learning_rate": 0.0006795859412614348, "loss": 0.0714, "theoretical_loss": 3.651288406907399, "tokens_seen": 1079771136 }, { "epoch": 0.33, "learning_rate": 0.0006795056973198523, "loss": 0.0737, "theoretical_loss": 3.6512058999465844, "tokens_seen": 1080033280 }, { "epoch": 0.33, "learning_rate": 0.00067942545337827, "loss": 0.0732, "theoretical_loss": 3.6511234186150197, "tokens_seen": 1080295424 }, { "epoch": 0.33, "learning_rate": 0.0006793452094366875, "loss": 0.0742, "theoretical_loss": 3.6510409628985263, "tokens_seen": 1080557568 }, { "epoch": 0.33, "learning_rate": 0.000679264965495105, "loss": 0.0733, "theoretical_loss": 3.6509585327829392, "tokens_seen": 1080819712 }, { "epoch": 0.33, "learning_rate": 0.0006791847215535228, "loss": 0.073, "theoretical_loss": 3.6508761282541027, "tokens_seen": 1081081856 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.0017559973057359457, "objective/train/docs_used": 396489, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5023612976074219, "objective/train/original_loss": 1.5023612976074219, "objective/train/theoretical_loss": 3.6507937492978733, "objective/train/tokens_used": 1101804000, "objective/train/value_avg": -0.0075531005859375, "objective/train/value_loss": 0.0002941191487479955, "objective/train/value_max": -0.00011962652206420898, "objective/train/value_min": -0.9130859375, "objective/train/value_reward_corr": 0.6127776569660509, "objective/train/value_std": 0.0134735107421875, "objective/train/weight_avg": 1.0018857717514038, "objective/train/weighted_lm_loss": 1.5048304796218872, "objective/train/weights_max": 1.7268811464309692, "objective/train/weights_min": 0.3711496591567993, "theoretical_loss": 3.6507937492978733, "tokens_seen": 1081344000 }, { "epoch": 0.33, "learning_rate": 0.0006791044776119403, "loss": 0.0738, "theoretical_loss": 3.6507937492978733, "tokens_seen": 1081344000 }, { "epoch": 0.33, "learning_rate": 0.0006790242336703579, "loss": 0.0712, "theoretical_loss": 3.6507113959001183, "tokens_seen": 1081606144 }, { "epoch": 0.33, "learning_rate": 0.0006789439897287755, "loss": 0.077, "theoretical_loss": 3.6506290680467166, "tokens_seen": 1081868288 }, { "epoch": 0.33, "learning_rate": 0.0006788637457871931, "loss": 0.0739, "theoretical_loss": 3.650546765723558, "tokens_seen": 1082130432 }, { "epoch": 0.33, "learning_rate": 0.0006787835018456106, "loss": 0.0731, "theoretical_loss": 3.650464488916544, "tokens_seen": 1082392576 }, { "epoch": 0.33, "learning_rate": 0.0006787032579040283, "loss": 0.0746, "theoretical_loss": 3.650382237611587, "tokens_seen": 1082654720 }, { "epoch": 0.33, "learning_rate": 0.0006786230139624458, "loss": 0.0763, "theoretical_loss": 3.65030001179461, "tokens_seen": 1082916864 }, { "epoch": 0.33, "learning_rate": 0.0006785427700208634, "loss": 0.0735, "theoretical_loss": 3.650217811451548, "tokens_seen": 1083179008 }, { "epoch": 0.33, "learning_rate": 0.0006784625260792811, "loss": 0.0728, "theoretical_loss": 3.650135636568347, "tokens_seen": 1083441152 }, { "epoch": 0.33, "learning_rate": 0.0006783822821376986, "loss": 0.0734, "theoretical_loss": 3.6500534871309642, "tokens_seen": 1083703296 }, { "epoch": 0.33, "learning_rate": 0.0006783020381961163, "loss": 0.0736, "theoretical_loss": 3.649971363125368, "tokens_seen": 1083965440 }, { "epoch": 0.33, "learning_rate": 0.0006782217942545338, "loss": 0.0738, "theoretical_loss": 3.6498892645375367, "tokens_seen": 1084227584 }, { "epoch": 0.33, "learning_rate": 0.0006781415503129514, "loss": 0.0727, "theoretical_loss": 3.649807191353462, "tokens_seen": 1084489728 }, { "epoch": 0.33, "learning_rate": 0.000678061306371369, "loss": 0.0742, "theoretical_loss": 3.6497251435591442, "tokens_seen": 1084751872 }, { "epoch": 0.33, "learning_rate": 0.0006779810624297865, "loss": 0.0777, "theoretical_loss": 3.6496431211405973, "tokens_seen": 1085014016 }, { "epoch": 0.33, "learning_rate": 0.0006779008184882041, "loss": 0.0714, "theoretical_loss": 3.649561124083844, "tokens_seen": 1085276160 }, { "epoch": 0.33, "learning_rate": 0.0006778205745466217, "loss": 0.0752, "theoretical_loss": 3.6494791523749193, "tokens_seen": 1085538304 }, { "epoch": 0.33, "learning_rate": 0.0006777403306050394, "loss": 0.0739, "theoretical_loss": 3.6493972059998696, "tokens_seen": 1085800448 }, { "epoch": 0.33, "learning_rate": 0.0006776600866634569, "loss": 0.0734, "theoretical_loss": 3.649315284944751, "tokens_seen": 1086062592 }, { "epoch": 0.33, "learning_rate": 0.0006775798427218746, "loss": 0.072, "theoretical_loss": 3.649233389195632, "tokens_seen": 1086324736 }, { "epoch": 0.33, "learning_rate": 0.0006774995987802921, "loss": 0.0735, "theoretical_loss": 3.6491515187385914, "tokens_seen": 1086586880 }, { "epoch": 0.33, "learning_rate": 0.0006774193548387097, "loss": 0.071, "theoretical_loss": 3.649069673559719, "tokens_seen": 1086849024 }, { "epoch": 0.33, "learning_rate": 0.0006773391108971273, "loss": 0.0731, "theoretical_loss": 3.648987853645116, "tokens_seen": 1087111168 }, { "epoch": 0.33, "learning_rate": 0.0006772588669555448, "loss": 0.0715, "theoretical_loss": 3.648906058980894, "tokens_seen": 1087373312 }, { "epoch": 0.33, "learning_rate": 0.0006771786230139625, "loss": 0.0735, "theoretical_loss": 3.6488242895531764, "tokens_seen": 1087635456 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.00033141745370812714, "objective/train/docs_used": 398862, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3994327783584595, "objective/train/original_loss": 1.39943265914917, "objective/train/theoretical_loss": 3.6487425453480973, "objective/train/tokens_used": 1108357600, "objective/train/value_avg": -0.01074981689453125, "objective/train/value_loss": 0.0003049577062483877, "objective/train/value_max": -0.0001499652862548828, "objective/train/value_min": -0.64697265625, "objective/train/value_reward_corr": 0.7564707177647516, "objective/train/value_std": 0.0179290771484375, "objective/train/weight_avg": 1.0004724264144897, "objective/train/weighted_lm_loss": 1.4000054597854614, "objective/train/weights_max": 1.461032509803772, "objective/train/weights_min": 0.3686436712741852, "theoretical_loss": 3.6487425453480973, "tokens_seen": 1087897600 }, { "epoch": 0.33, "learning_rate": 0.00067709837907238, "loss": 0.0722, "theoretical_loss": 3.6487425453480973, "tokens_seen": 1087897600 }, { "epoch": 0.33, "learning_rate": 0.0006770181351307977, "loss": 0.0712, "theoretical_loss": 3.648660826351801, "tokens_seen": 1088159744 }, { "epoch": 0.33, "learning_rate": 0.0006769378911892153, "loss": 0.0736, "theoretical_loss": 3.6485791325504437, "tokens_seen": 1088421888 }, { "epoch": 0.33, "learning_rate": 0.0006768576472476328, "loss": 0.0729, "theoretical_loss": 3.648497463930192, "tokens_seen": 1088684032 }, { "epoch": 0.33, "learning_rate": 0.0006767774033060504, "loss": 0.0729, "theoretical_loss": 3.6484158204772235, "tokens_seen": 1088946176 }, { "epoch": 0.33, "learning_rate": 0.000676697159364468, "loss": 0.0749, "theoretical_loss": 3.648334202177727, "tokens_seen": 1089208320 }, { "epoch": 0.33, "learning_rate": 0.0006766169154228856, "loss": 0.0762, "theoretical_loss": 3.648252609017902, "tokens_seen": 1089470464 }, { "epoch": 0.33, "learning_rate": 0.0006765366714813031, "loss": 0.0741, "theoretical_loss": 3.648171040983959, "tokens_seen": 1089732608 }, { "epoch": 0.33, "learning_rate": 0.0006764564275397208, "loss": 0.0762, "theoretical_loss": 3.648089498062119, "tokens_seen": 1089994752 }, { "epoch": 0.33, "learning_rate": 0.0006763761835981383, "loss": 0.0752, "theoretical_loss": 3.648007980238614, "tokens_seen": 1090256896 }, { "epoch": 0.33, "learning_rate": 0.0006762959396565558, "loss": 0.0723, "theoretical_loss": 3.6479264874996877, "tokens_seen": 1090519040 }, { "epoch": 0.33, "learning_rate": 0.0006762156957149736, "loss": 0.0728, "theoretical_loss": 3.6478450198315926, "tokens_seen": 1090781184 }, { "epoch": 0.33, "learning_rate": 0.0006761354517733911, "loss": 0.0753, "theoretical_loss": 3.6477635772205947, "tokens_seen": 1091043328 }, { "epoch": 0.33, "learning_rate": 0.0006760552078318088, "loss": 0.075, "theoretical_loss": 3.647682159652969, "tokens_seen": 1091305472 }, { "epoch": 0.33, "learning_rate": 0.0006759749638902263, "loss": 0.074, "theoretical_loss": 3.647600767115002, "tokens_seen": 1091567616 }, { "epoch": 0.33, "learning_rate": 0.0006758947199486439, "loss": 0.0758, "theoretical_loss": 3.6475193995929907, "tokens_seen": 1091829760 }, { "epoch": 0.33, "learning_rate": 0.0006758144760070615, "loss": 0.0736, "theoretical_loss": 3.6474380570732423, "tokens_seen": 1092091904 }, { "epoch": 0.33, "learning_rate": 0.000675734232065479, "loss": 0.0749, "theoretical_loss": 3.6473567395420767, "tokens_seen": 1092354048 }, { "epoch": 0.33, "learning_rate": 0.0006756539881238966, "loss": 0.0728, "theoretical_loss": 3.647275446985822, "tokens_seen": 1092616192 }, { "epoch": 0.33, "learning_rate": 0.0006755737441823142, "loss": 0.074, "theoretical_loss": 3.64719417939082, "tokens_seen": 1092878336 }, { "epoch": 0.33, "learning_rate": 0.0006754935002407319, "loss": 0.0759, "theoretical_loss": 3.6471129367434205, "tokens_seen": 1093140480 }, { "epoch": 0.33, "learning_rate": 0.0006754132562991494, "loss": 0.0715, "theoretical_loss": 3.647031719029985, "tokens_seen": 1093402624 }, { "epoch": 0.33, "learning_rate": 0.0006753330123575671, "loss": 0.073, "theoretical_loss": 3.646950526236887, "tokens_seen": 1093664768 }, { "epoch": 0.33, "learning_rate": 0.0006752527684159846, "loss": 0.0741, "theoretical_loss": 3.6468693583505085, "tokens_seen": 1093926912 }, { "epoch": 0.33, "learning_rate": 0.0006751725244744022, "loss": 0.0759, "theoretical_loss": 3.646788215357244, "tokens_seen": 1094189056 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.000770938815549016, "objective/train/docs_used": 401268, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4965838193893433, "objective/train/original_loss": 1.4965837001800537, "objective/train/theoretical_loss": 3.646707097243498, "objective/train/tokens_used": 1114911200, "objective/train/value_avg": -0.00963592529296875, "objective/train/value_loss": 0.0003326190635561943, "objective/train/value_max": -0.0001634359359741211, "objective/train/value_min": -0.9462890625, "objective/train/value_reward_corr": 0.7103235067802546, "objective/train/value_std": 0.0166168212890625, "objective/train/weight_avg": 1.0009206533432007, "objective/train/weighted_lm_loss": 1.4972987174987793, "objective/train/weights_max": 1.833900809288025, "objective/train/weights_min": 0.36999329924583435, "theoretical_loss": 3.646707097243498, "tokens_seen": 1094451200 }, { "epoch": 0.33, "learning_rate": 0.0006750922805328198, "loss": 0.0734, "theoretical_loss": 3.646707097243498, "tokens_seen": 1094451200 }, { "epoch": 0.33, "learning_rate": 0.0006750120365912373, "loss": 0.0727, "theoretical_loss": 3.646626003995685, "tokens_seen": 1094713344 }, { "epoch": 0.33, "learning_rate": 0.000674931792649655, "loss": 0.0757, "theoretical_loss": 3.6465449356002315, "tokens_seen": 1094975488 }, { "epoch": 0.33, "learning_rate": 0.0006748515487080725, "loss": 0.0712, "theoretical_loss": 3.646463892043574, "tokens_seen": 1095237632 }, { "epoch": 0.33, "learning_rate": 0.0006747713047664902, "loss": 0.0736, "theoretical_loss": 3.6463828733121586, "tokens_seen": 1095499776 }, { "epoch": 0.33, "learning_rate": 0.0006746910608249078, "loss": 0.0746, "theoretical_loss": 3.6463018793924453, "tokens_seen": 1095761920 }, { "epoch": 0.33, "learning_rate": 0.0006746108168833254, "loss": 0.0712, "theoretical_loss": 3.6462209102709, "tokens_seen": 1096024064 }, { "epoch": 0.33, "learning_rate": 0.0006745305729417429, "loss": 0.0721, "theoretical_loss": 3.6461399659340037, "tokens_seen": 1096286208 }, { "epoch": 0.33, "learning_rate": 0.0006744503290001605, "loss": 0.0738, "theoretical_loss": 3.6460590463682454, "tokens_seen": 1096548352 }, { "epoch": 0.33, "learning_rate": 0.0006743700850585781, "loss": 0.0727, "theoretical_loss": 3.6459781515601244, "tokens_seen": 1096810496 }, { "epoch": 0.33, "learning_rate": 0.0006742898411169956, "loss": 0.0749, "theoretical_loss": 3.6458972814961528, "tokens_seen": 1097072640 }, { "epoch": 0.33, "learning_rate": 0.0006742095971754133, "loss": 0.0723, "theoretical_loss": 3.6458164361628516, "tokens_seen": 1097334784 }, { "epoch": 0.33, "learning_rate": 0.0006741293532338308, "loss": 0.0721, "theoretical_loss": 3.645735615546752, "tokens_seen": 1097596928 }, { "epoch": 0.33, "learning_rate": 0.0006740491092922485, "loss": 0.0751, "theoretical_loss": 3.645654819634397, "tokens_seen": 1097859072 }, { "epoch": 0.33, "learning_rate": 0.0006739688653506661, "loss": 0.0735, "theoretical_loss": 3.6455740484123407, "tokens_seen": 1098121216 }, { "epoch": 0.33, "learning_rate": 0.0006738886214090836, "loss": 0.0713, "theoretical_loss": 3.645493301867145, "tokens_seen": 1098383360 }, { "epoch": 0.33, "learning_rate": 0.0006738083774675012, "loss": 0.0749, "theoretical_loss": 3.6454125799853854, "tokens_seen": 1098645504 }, { "epoch": 0.33, "learning_rate": 0.0006737281335259188, "loss": 0.0735, "theoretical_loss": 3.645331882753645, "tokens_seen": 1098907648 }, { "epoch": 0.33, "learning_rate": 0.0006736478895843364, "loss": 0.0732, "theoretical_loss": 3.6452512101585195, "tokens_seen": 1099169792 }, { "epoch": 0.33, "learning_rate": 0.000673567645642754, "loss": 0.0701, "theoretical_loss": 3.645170562186615, "tokens_seen": 1099431936 }, { "epoch": 0.33, "learning_rate": 0.0006734874017011716, "loss": 0.0742, "theoretical_loss": 3.6450899388245466, "tokens_seen": 1099694080 }, { "epoch": 0.33, "learning_rate": 0.0006734071577595891, "loss": 0.0747, "theoretical_loss": 3.645009340058941, "tokens_seen": 1099956224 }, { "epoch": 0.33, "learning_rate": 0.0006733269138180067, "loss": 0.0734, "theoretical_loss": 3.644928765876436, "tokens_seen": 1100218368 }, { "epoch": 0.33, "learning_rate": 0.0006732466698764244, "loss": 0.0722, "theoretical_loss": 3.644848216263678, "tokens_seen": 1100480512 }, { "epoch": 0.33, "learning_rate": 0.0006731664259348419, "loss": 0.075, "theoretical_loss": 3.6447676912073255, "tokens_seen": 1100742656 }, { "epoch": 0.33, "objective/train/advantage_avg": -0.001446638605557382, "objective/train/docs_used": 403530, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5791977643966675, "objective/train/original_loss": 1.5791977643966675, "objective/train/theoretical_loss": 3.6446871906940466, "objective/train/tokens_used": 1121464800, "objective/train/value_avg": -0.00872802734375, "objective/train/value_loss": 0.00039117602864280343, "objective/train/value_max": -0.00010389089584350586, "objective/train/value_min": -0.5576171875, "objective/train/value_reward_corr": 0.8311069137177848, "objective/train/value_std": 0.01605224609375, "objective/train/weight_avg": 0.998737633228302, "objective/train/weighted_lm_loss": 1.5767664909362793, "objective/train/weights_max": 1.3273746967315674, "objective/train/weights_min": 0.368156373500824, "theoretical_loss": 3.6446871906940466, "tokens_seen": 1101004800 }, { "epoch": 0.33, "learning_rate": 0.0006730861819932596, "loss": 0.0757, "theoretical_loss": 3.6446871906940466, "tokens_seen": 1101004800 }, { "epoch": 0.33, "learning_rate": 0.0006730059380516771, "loss": 0.072, "theoretical_loss": 3.6446067147105197, "tokens_seen": 1101266944 }, { "epoch": 0.33, "learning_rate": 0.0006729256941100947, "loss": 0.0713, "theoretical_loss": 3.644526263243433, "tokens_seen": 1101529088 }, { "epoch": 0.33, "learning_rate": 0.0006728454501685123, "loss": 0.0749, "theoretical_loss": 3.644445836279488, "tokens_seen": 1101791232 }, { "epoch": 0.33, "learning_rate": 0.0006727652062269298, "loss": 0.0697, "theoretical_loss": 3.644365433805393, "tokens_seen": 1102053376 }, { "epoch": 0.33, "learning_rate": 0.0006726849622853474, "loss": 0.0721, "theoretical_loss": 3.6442850558078685, "tokens_seen": 1102315520 }, { "epoch": 0.33, "learning_rate": 0.000672604718343765, "loss": 0.0766, "theoretical_loss": 3.6442047022736452, "tokens_seen": 1102577664 }, { "epoch": 0.33, "learning_rate": 0.0006725244744021827, "loss": 0.0759, "theoretical_loss": 3.644124373189464, "tokens_seen": 1102839808 }, { "epoch": 0.33, "learning_rate": 0.0006724442304606002, "loss": 0.0744, "theoretical_loss": 3.644044068542076, "tokens_seen": 1103101952 }, { "epoch": 0.33, "learning_rate": 0.0006723639865190179, "loss": 0.0728, "theoretical_loss": 3.643963788318242, "tokens_seen": 1103364096 }, { "epoch": 0.33, "learning_rate": 0.0006722837425774354, "loss": 0.0723, "theoretical_loss": 3.6438835325047356, "tokens_seen": 1103626240 }, { "epoch": 0.33, "learning_rate": 0.0006722034986358531, "loss": 0.0729, "theoretical_loss": 3.6438033010883375, "tokens_seen": 1103888384 }, { "epoch": 0.33, "learning_rate": 0.0006721232546942706, "loss": 0.0745, "theoretical_loss": 3.643723094055841, "tokens_seen": 1104150528 }, { "epoch": 0.33, "learning_rate": 0.0006720430107526881, "loss": 0.0767, "theoretical_loss": 3.643642911394048, "tokens_seen": 1104412672 }, { "epoch": 0.33, "learning_rate": 0.0006719627668111058, "loss": 0.0734, "theoretical_loss": 3.643562753089772, "tokens_seen": 1104674816 }, { "epoch": 0.33, "learning_rate": 0.0006718825228695233, "loss": 0.0733, "theoretical_loss": 3.6434826191298364, "tokens_seen": 1104936960 }, { "epoch": 0.33, "learning_rate": 0.000671802278927941, "loss": 0.0726, "theoretical_loss": 3.6434025095010747, "tokens_seen": 1105199104 }, { "epoch": 0.34, "learning_rate": 0.0006717220349863586, "loss": 0.0763, "theoretical_loss": 3.6433224241903304, "tokens_seen": 1105461248 }, { "epoch": 0.34, "learning_rate": 0.0006716417910447762, "loss": 0.0749, "theoretical_loss": 3.643242363184458, "tokens_seen": 1105723392 }, { "epoch": 0.34, "learning_rate": 0.0006715615471031937, "loss": 0.0741, "theoretical_loss": 3.6431623264703212, "tokens_seen": 1105985536 }, { "epoch": 0.34, "learning_rate": 0.0006714813031616113, "loss": 0.0711, "theoretical_loss": 3.6430823140347943, "tokens_seen": 1106247680 }, { "epoch": 0.34, "learning_rate": 0.0006714010592200289, "loss": 0.0726, "theoretical_loss": 3.643002325864763, "tokens_seen": 1106509824 }, { "epoch": 0.34, "learning_rate": 0.0006713208152784464, "loss": 0.0726, "theoretical_loss": 3.6429223619471207, "tokens_seen": 1106771968 }, { "epoch": 0.34, "learning_rate": 0.0006712405713368641, "loss": 0.0749, "theoretical_loss": 3.6428424222687736, "tokens_seen": 1107034112 }, { "epoch": 0.34, "learning_rate": 0.0006711603273952816, "loss": 0.074, "theoretical_loss": 3.642762506816636, "tokens_seen": 1107296256 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.0007008896209299564, "objective/train/docs_used": 405946, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.543431043624878, "objective/train/original_loss": 1.5434308052062988, "objective/train/theoretical_loss": 3.642682615577634, "objective/train/tokens_used": 1128018400, "objective/train/value_avg": -0.0092010498046875, "objective/train/value_loss": 0.00045838035293854773, "objective/train/value_max": -7.969141006469727e-05, "objective/train/value_min": -0.953125, "objective/train/value_reward_corr": 0.5853789003053751, "objective/train/value_std": 0.0174102783203125, "objective/train/weight_avg": 1.000909447669983, "objective/train/weighted_lm_loss": 1.5446616411209106, "objective/train/weights_max": 2.1671571731567383, "objective/train/weights_min": 0.3689032196998596, "theoretical_loss": 3.642682615577634, "tokens_seen": 1107558400 }, { "epoch": 0.34, "learning_rate": 0.0006710800834536994, "loss": 0.0717, "theoretical_loss": 3.642682615577634, "tokens_seen": 1107558400 }, { "epoch": 0.34, "learning_rate": 0.0006709998395121169, "loss": 0.0757, "theoretical_loss": 3.6426027485387023, "tokens_seen": 1107820544 }, { "epoch": 0.34, "learning_rate": 0.0006709195955705344, "loss": 0.0733, "theoretical_loss": 3.6425229056867865, "tokens_seen": 1108082688 }, { "epoch": 0.34, "learning_rate": 0.0006708393516289521, "loss": 0.0723, "theoretical_loss": 3.642443087008844, "tokens_seen": 1108344832 }, { "epoch": 0.34, "learning_rate": 0.0006707591076873696, "loss": 0.0716, "theoretical_loss": 3.6423632924918383, "tokens_seen": 1108606976 }, { "epoch": 0.34, "learning_rate": 0.0006706788637457872, "loss": 0.0728, "theoretical_loss": 3.6422835221227468, "tokens_seen": 1108869120 }, { "epoch": 0.34, "learning_rate": 0.0006705986198042048, "loss": 0.0728, "theoretical_loss": 3.6422037758885555, "tokens_seen": 1109131264 }, { "epoch": 0.34, "learning_rate": 0.0006705183758626224, "loss": 0.0743, "theoretical_loss": 3.6421240537762607, "tokens_seen": 1109393408 }, { "epoch": 0.34, "learning_rate": 0.0006704381319210399, "loss": 0.0772, "theoretical_loss": 3.6420443557728674, "tokens_seen": 1109655552 }, { "epoch": 0.34, "learning_rate": 0.0006703578879794575, "loss": 0.0741, "theoretical_loss": 3.6419646818653932, "tokens_seen": 1109917696 }, { "epoch": 0.34, "learning_rate": 0.0006702776440378752, "loss": 0.076, "theoretical_loss": 3.641885032040864, "tokens_seen": 1110179840 }, { "epoch": 0.34, "learning_rate": 0.0006701974000962927, "loss": 0.0751, "theoretical_loss": 3.6418054062863163, "tokens_seen": 1110441984 }, { "epoch": 0.34, "learning_rate": 0.0006701171561547104, "loss": 0.073, "theoretical_loss": 3.6417258045887966, "tokens_seen": 1110704128 }, { "epoch": 0.34, "learning_rate": 0.0006700369122131279, "loss": 0.0744, "theoretical_loss": 3.641646226935361, "tokens_seen": 1110966272 }, { "epoch": 0.34, "learning_rate": 0.0006699566682715455, "loss": 0.0759, "theoretical_loss": 3.641566673313076, "tokens_seen": 1111228416 }, { "epoch": 0.34, "learning_rate": 0.0006698764243299631, "loss": 0.0776, "theoretical_loss": 3.6414871437090186, "tokens_seen": 1111490560 }, { "epoch": 0.34, "learning_rate": 0.0006697961803883806, "loss": 0.0752, "theoretical_loss": 3.641407638110275, "tokens_seen": 1111752704 }, { "epoch": 0.34, "learning_rate": 0.0006697159364467983, "loss": 0.0753, "theoretical_loss": 3.641328156503942, "tokens_seen": 1112014848 }, { "epoch": 0.34, "learning_rate": 0.0006696356925052158, "loss": 0.0757, "theoretical_loss": 3.6412486988771255, "tokens_seen": 1112276992 }, { "epoch": 0.34, "learning_rate": 0.0006695554485636335, "loss": 0.0727, "theoretical_loss": 3.6411692652169423, "tokens_seen": 1112539136 }, { "epoch": 0.34, "learning_rate": 0.0006694752046220511, "loss": 0.0749, "theoretical_loss": 3.641089855510518, "tokens_seen": 1112801280 }, { "epoch": 0.34, "learning_rate": 0.0006693949606804687, "loss": 0.0726, "theoretical_loss": 3.64101046974499, "tokens_seen": 1113063424 }, { "epoch": 0.34, "learning_rate": 0.0006693147167388862, "loss": 0.074, "theoretical_loss": 3.640931107907504, "tokens_seen": 1113325568 }, { "epoch": 0.34, "learning_rate": 0.0006692344727973039, "loss": 0.0767, "theoretical_loss": 3.6408517699852165, "tokens_seen": 1113587712 }, { "epoch": 0.34, "learning_rate": 0.0006691542288557214, "loss": 0.0779, "theoretical_loss": 3.640772455965293, "tokens_seen": 1113849856 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.0008538411930203438, "objective/train/docs_used": 408181, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3940370082855225, "objective/train/original_loss": 1.3940370082855225, "objective/train/theoretical_loss": 3.64069316583491, "objective/train/tokens_used": 1134572000, "objective/train/value_avg": -0.0080718994140625, "objective/train/value_loss": 0.0002601642336230725, "objective/train/value_max": -8.684396743774414e-05, "objective/train/value_min": -0.57763671875, "objective/train/value_reward_corr": 0.6369542776258966, "objective/train/value_std": 0.01348876953125, "objective/train/weight_avg": 1.0009734630584717, "objective/train/weighted_lm_loss": 1.3951406478881836, "objective/train/weights_max": 1.348272681236267, "objective/train/weights_min": 0.36883002519607544, "theoretical_loss": 3.64069316583491, "tokens_seen": 1114112000 }, { "epoch": 0.34, "learning_rate": 0.0006690739849141389, "loss": 0.0725, "theoretical_loss": 3.64069316583491, "tokens_seen": 1114112000 }, { "epoch": 0.34, "learning_rate": 0.0006689937409725566, "loss": 0.075, "theoretical_loss": 3.640613899581253, "tokens_seen": 1114374144 }, { "epoch": 0.34, "learning_rate": 0.0006689134970309741, "loss": 0.0775, "theoretical_loss": 3.6405346571915187, "tokens_seen": 1114636288 }, { "epoch": 0.34, "learning_rate": 0.0006688332530893918, "loss": 0.0755, "theoretical_loss": 3.6404554386529115, "tokens_seen": 1114898432 }, { "epoch": 0.34, "learning_rate": 0.0006687530091478094, "loss": 0.0721, "theoretical_loss": 3.640376243952648, "tokens_seen": 1115160576 }, { "epoch": 0.34, "learning_rate": 0.000668672765206227, "loss": 0.0721, "theoretical_loss": 3.640297073077953, "tokens_seen": 1115422720 }, { "epoch": 0.34, "learning_rate": 0.0006685925212646445, "loss": 0.0777, "theoretical_loss": 3.640217926016061, "tokens_seen": 1115684864 }, { "epoch": 0.34, "learning_rate": 0.0006685122773230621, "loss": 0.0741, "theoretical_loss": 3.6401388027542185, "tokens_seen": 1115947008 }, { "epoch": 0.34, "learning_rate": 0.0006684320333814797, "loss": 0.0707, "theoretical_loss": 3.6400597032796798, "tokens_seen": 1116209152 }, { "epoch": 0.34, "learning_rate": 0.0006683517894398973, "loss": 0.0752, "theoretical_loss": 3.6399806275797095, "tokens_seen": 1116471296 }, { "epoch": 0.34, "learning_rate": 0.0006682715454983149, "loss": 0.0761, "theoretical_loss": 3.639901575641582, "tokens_seen": 1116733440 }, { "epoch": 0.34, "learning_rate": 0.0006681913015567324, "loss": 0.0739, "theoretical_loss": 3.6398225474525816, "tokens_seen": 1116995584 }, { "epoch": 0.34, "learning_rate": 0.0006681110576151502, "loss": 0.0725, "theoretical_loss": 3.639743543000003, "tokens_seen": 1117257728 }, { "epoch": 0.34, "learning_rate": 0.0006680308136735677, "loss": 0.0732, "theoretical_loss": 3.6396645622711494, "tokens_seen": 1117519872 }, { "epoch": 0.34, "learning_rate": 0.0006679505697319852, "loss": 0.0764, "theoretical_loss": 3.639585605253335, "tokens_seen": 1117782016 }, { "epoch": 0.34, "learning_rate": 0.0006678703257904029, "loss": 0.0736, "theoretical_loss": 3.639506671933882, "tokens_seen": 1118044160 }, { "epoch": 0.34, "learning_rate": 0.0006677900818488204, "loss": 0.0739, "theoretical_loss": 3.639427762300125, "tokens_seen": 1118306304 }, { "epoch": 0.34, "learning_rate": 0.000667709837907238, "loss": 0.0747, "theoretical_loss": 3.6393488763394064, "tokens_seen": 1118568448 }, { "epoch": 0.34, "learning_rate": 0.0006676295939656556, "loss": 0.0737, "theoretical_loss": 3.639270014039078, "tokens_seen": 1118830592 }, { "epoch": 0.34, "learning_rate": 0.0006675493500240732, "loss": 0.0739, "theoretical_loss": 3.6391911753865034, "tokens_seen": 1119092736 }, { "epoch": 0.34, "learning_rate": 0.0006674691060824907, "loss": 0.0753, "theoretical_loss": 3.639112360369054, "tokens_seen": 1119354880 }, { "epoch": 0.34, "learning_rate": 0.0006673888621409083, "loss": 0.0764, "theoretical_loss": 3.6390335689741113, "tokens_seen": 1119617024 }, { "epoch": 0.34, "learning_rate": 0.000667308618199326, "loss": 0.0751, "theoretical_loss": 3.638954801189067, "tokens_seen": 1119879168 }, { "epoch": 0.34, "learning_rate": 0.0006672283742577436, "loss": 0.0758, "theoretical_loss": 3.6388760570013226, "tokens_seen": 1120141312 }, { "epoch": 0.34, "learning_rate": 0.0006671481303161612, "loss": 0.0729, "theoretical_loss": 3.6387973363982877, "tokens_seen": 1120403456 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.00035512945032678545, "objective/train/docs_used": 410712, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4525707960128784, "objective/train/original_loss": 1.4525706768035889, "objective/train/theoretical_loss": 3.638718639367384, "objective/train/tokens_used": 1141125600, "objective/train/value_avg": -0.00772857666015625, "objective/train/value_loss": 0.00021194637520238757, "objective/train/value_max": -5.3882598876953125e-05, "objective/train/value_min": -0.6298828125, "objective/train/value_reward_corr": 0.7386635939472923, "objective/train/value_std": 0.014801025390625, "objective/train/weight_avg": 1.0004568099975586, "objective/train/weighted_lm_loss": 1.453499674797058, "objective/train/weights_max": 1.5431358814239502, "objective/train/weights_min": 0.541466236114502, "theoretical_loss": 3.638718639367384, "tokens_seen": 1120665600 }, { "epoch": 0.34, "learning_rate": 0.0006670678863745787, "loss": 0.0727, "theoretical_loss": 3.638718639367384, "tokens_seen": 1120665600 }, { "epoch": 0.34, "learning_rate": 0.0006669876424329964, "loss": 0.0738, "theoretical_loss": 3.638639965896041, "tokens_seen": 1120927744 }, { "epoch": 0.34, "learning_rate": 0.0006669073984914139, "loss": 0.0765, "theoretical_loss": 3.638561315971698, "tokens_seen": 1121189888 }, { "epoch": 0.34, "learning_rate": 0.0006668271545498314, "loss": 0.0732, "theoretical_loss": 3.638482689581805, "tokens_seen": 1121452032 }, { "epoch": 0.34, "learning_rate": 0.0006667469106082491, "loss": 0.0724, "theoretical_loss": 3.6384040867138214, "tokens_seen": 1121714176 }, { "epoch": 0.34, "learning_rate": 0.0006666666666666666, "loss": 0.0765, "theoretical_loss": 3.6383255073552148, "tokens_seen": 1121976320 }, { "epoch": 0.34, "learning_rate": 0.0006665864227250843, "loss": 0.0752, "theoretical_loss": 3.638246951493463, "tokens_seen": 1122238464 }, { "epoch": 0.34, "learning_rate": 0.0006665061787835019, "loss": 0.0739, "theoretical_loss": 3.6381684191160555, "tokens_seen": 1122500608 }, { "epoch": 0.34, "learning_rate": 0.0006664259348419195, "loss": 0.0725, "theoretical_loss": 3.638089910210488, "tokens_seen": 1122762752 }, { "epoch": 0.34, "learning_rate": 0.000666345690900337, "loss": 0.0762, "theoretical_loss": 3.638011424764269, "tokens_seen": 1123024896 }, { "epoch": 0.34, "learning_rate": 0.0006662654469587546, "loss": 0.0711, "theoretical_loss": 3.6379329627649137, "tokens_seen": 1123287040 }, { "epoch": 0.34, "learning_rate": 0.0006661852030171722, "loss": 0.0732, "theoretical_loss": 3.6378545241999487, "tokens_seen": 1123549184 }, { "epoch": 0.34, "learning_rate": 0.0006661049590755897, "loss": 0.0746, "theoretical_loss": 3.637776109056909, "tokens_seen": 1123811328 }, { "epoch": 0.34, "learning_rate": 0.0006660247151340074, "loss": 0.0737, "theoretical_loss": 3.6376977173233405, "tokens_seen": 1124073472 }, { "epoch": 0.34, "learning_rate": 0.0006659444711924249, "loss": 0.0729, "theoretical_loss": 3.6376193489867976, "tokens_seen": 1124335616 }, { "epoch": 0.34, "learning_rate": 0.0006658642272508427, "loss": 0.0754, "theoretical_loss": 3.6375410040348446, "tokens_seen": 1124597760 }, { "epoch": 0.34, "learning_rate": 0.0006657839833092602, "loss": 0.071, "theoretical_loss": 3.637462682455055, "tokens_seen": 1124859904 }, { "epoch": 0.34, "learning_rate": 0.0006657037393676778, "loss": 0.0737, "theoretical_loss": 3.6373843842350118, "tokens_seen": 1125122048 }, { "epoch": 0.34, "learning_rate": 0.0006656234954260954, "loss": 0.0752, "theoretical_loss": 3.637306109362308, "tokens_seen": 1125384192 }, { "epoch": 0.34, "learning_rate": 0.0006655432514845129, "loss": 0.0735, "theoretical_loss": 3.6372278578245454, "tokens_seen": 1125646336 }, { "epoch": 0.34, "learning_rate": 0.0006654630075429305, "loss": 0.0732, "theoretical_loss": 3.6371496296093357, "tokens_seen": 1125908480 }, { "epoch": 0.34, "learning_rate": 0.0006653827636013481, "loss": 0.0746, "theoretical_loss": 3.6370714247043003, "tokens_seen": 1126170624 }, { "epoch": 0.34, "learning_rate": 0.0006653025196597657, "loss": 0.0736, "theoretical_loss": 3.6369932430970695, "tokens_seen": 1126432768 }, { "epoch": 0.34, "learning_rate": 0.0006652222757181832, "loss": 0.077, "theoretical_loss": 3.6369150847752834, "tokens_seen": 1126694912 }, { "epoch": 0.34, "learning_rate": 0.000665142031776601, "loss": 0.0746, "theoretical_loss": 3.6368369497265913, "tokens_seen": 1126957056 }, { "epoch": 0.34, "objective/train/advantage_avg": -0.0004652831412386149, "objective/train/docs_used": 413086, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3972619771957397, "objective/train/original_loss": 1.3972619771957397, "objective/train/theoretical_loss": 3.6367588379386513, "objective/train/tokens_used": 1147679200, "objective/train/value_avg": -0.0119476318359375, "objective/train/value_loss": 0.0004500811919569969, "objective/train/value_max": -0.00012242794036865234, "objective/train/value_min": -0.638671875, "objective/train/value_reward_corr": 0.8368038617349711, "objective/train/value_std": 0.02716064453125, "objective/train/weight_avg": 0.9997386932373047, "objective/train/weighted_lm_loss": 1.3970850706100464, "objective/train/weights_max": 1.5594542026519775, "objective/train/weights_min": 0.36921927332878113, "theoretical_loss": 3.6367588379386513, "tokens_seen": 1127219200 }, { "epoch": 0.34, "learning_rate": 0.0006650617878350185, "loss": 0.0732, "theoretical_loss": 3.6367588379386513, "tokens_seen": 1127219200 }, { "epoch": 0.34, "learning_rate": 0.000664981543893436, "loss": 0.0754, "theoretical_loss": 3.636680749399133, "tokens_seen": 1127481344 }, { "epoch": 0.34, "learning_rate": 0.0006649012999518537, "loss": 0.0744, "theoretical_loss": 3.6366026840957133, "tokens_seen": 1127743488 }, { "epoch": 0.34, "learning_rate": 0.0006648210560102712, "loss": 0.076, "theoretical_loss": 3.636524642016079, "tokens_seen": 1128005632 }, { "epoch": 0.34, "learning_rate": 0.0006647408120686889, "loss": 0.0743, "theoretical_loss": 3.636446623147927, "tokens_seen": 1128267776 }, { "epoch": 0.34, "learning_rate": 0.0006646605681271064, "loss": 0.0761, "theoretical_loss": 3.6363686274789626, "tokens_seen": 1128529920 }, { "epoch": 0.34, "learning_rate": 0.000664580324185524, "loss": 0.0731, "theoretical_loss": 3.6362906549969014, "tokens_seen": 1128792064 }, { "epoch": 0.34, "learning_rate": 0.0006645000802439416, "loss": 0.0741, "theoretical_loss": 3.6362127056894673, "tokens_seen": 1129054208 }, { "epoch": 0.34, "learning_rate": 0.0006644198363023591, "loss": 0.0751, "theoretical_loss": 3.6361347795443955, "tokens_seen": 1129316352 }, { "epoch": 0.34, "learning_rate": 0.0006643395923607768, "loss": 0.0739, "theoretical_loss": 3.636056876549427, "tokens_seen": 1129578496 }, { "epoch": 0.34, "learning_rate": 0.0006642593484191944, "loss": 0.0738, "theoretical_loss": 3.6359789966923164, "tokens_seen": 1129840640 }, { "epoch": 0.34, "learning_rate": 0.000664179104477612, "loss": 0.0773, "theoretical_loss": 3.6359011399608243, "tokens_seen": 1130102784 }, { "epoch": 0.34, "learning_rate": 0.0006640988605360295, "loss": 0.0748, "theoretical_loss": 3.6358233063427225, "tokens_seen": 1130364928 }, { "epoch": 0.34, "learning_rate": 0.0006640186165944472, "loss": 0.0717, "theoretical_loss": 3.635745495825791, "tokens_seen": 1130627072 }, { "epoch": 0.34, "learning_rate": 0.0006639383726528647, "loss": 0.0721, "theoretical_loss": 3.63566770839782, "tokens_seen": 1130889216 }, { "epoch": 0.34, "learning_rate": 0.0006638581287112822, "loss": 0.0742, "theoretical_loss": 3.6355899440466075, "tokens_seen": 1131151360 }, { "epoch": 0.34, "learning_rate": 0.0006637778847696999, "loss": 0.0733, "theoretical_loss": 3.635512202759964, "tokens_seen": 1131413504 }, { "epoch": 0.34, "learning_rate": 0.0006636976408281174, "loss": 0.0738, "theoretical_loss": 3.635434484525704, "tokens_seen": 1131675648 }, { "epoch": 0.34, "learning_rate": 0.0006636173968865351, "loss": 0.0759, "theoretical_loss": 3.6353567893316567, "tokens_seen": 1131937792 }, { "epoch": 0.34, "learning_rate": 0.0006635371529449527, "loss": 0.0737, "theoretical_loss": 3.6352791171656573, "tokens_seen": 1132199936 }, { "epoch": 0.34, "learning_rate": 0.0006634569090033703, "loss": 0.0742, "theoretical_loss": 3.635201468015551, "tokens_seen": 1132462080 }, { "epoch": 0.34, "learning_rate": 0.0006633766650617879, "loss": 0.074, "theoretical_loss": 3.635123841869193, "tokens_seen": 1132724224 }, { "epoch": 0.34, "learning_rate": 0.0006632964211202054, "loss": 0.0751, "theoretical_loss": 3.6350462387144464, "tokens_seen": 1132986368 }, { "epoch": 0.34, "learning_rate": 0.000663216177178623, "loss": 0.0747, "theoretical_loss": 3.634968658539184, "tokens_seen": 1133248512 }, { "epoch": 0.34, "learning_rate": 0.0006631359332370406, "loss": 0.0768, "theoretical_loss": 3.6348911013312883, "tokens_seen": 1133510656 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.0009396148379892111, "objective/train/docs_used": 415442, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4944310188293457, "objective/train/original_loss": 1.4944311380386353, "objective/train/theoretical_loss": 3.6348135670786506, "objective/train/tokens_used": 1154232800, "objective/train/value_avg": -0.0083770751953125, "objective/train/value_loss": 0.00016622063412796706, "objective/train/value_max": -7.545948028564453e-05, "objective/train/value_min": -0.54296875, "objective/train/value_reward_corr": 0.7126787421781615, "objective/train/value_std": 0.01410675048828125, "objective/train/weight_avg": 1.0010205507278442, "objective/train/weighted_lm_loss": 1.4958319664001465, "objective/train/weights_max": 1.4884049892425537, "objective/train/weights_min": 0.5564592480659485, "theoretical_loss": 3.6348135670786506, "tokens_seen": 1133772800 }, { "epoch": 0.34, "learning_rate": 0.0006630556892954582, "loss": 0.0735, "theoretical_loss": 3.6348135670786506, "tokens_seen": 1133772800 }, { "epoch": 0.34, "learning_rate": 0.0006629754453538757, "loss": 0.0746, "theoretical_loss": 3.6347360557691712, "tokens_seen": 1134034944 }, { "epoch": 0.34, "learning_rate": 0.0006628952014122935, "loss": 0.0737, "theoretical_loss": 3.63465856739076, "tokens_seen": 1134297088 }, { "epoch": 0.34, "learning_rate": 0.000662814957470711, "loss": 0.0745, "theoretical_loss": 3.634581101931336, "tokens_seen": 1134559232 }, { "epoch": 0.34, "learning_rate": 0.0006627347135291286, "loss": 0.0753, "theoretical_loss": 3.6345036593788276, "tokens_seen": 1134821376 }, { "epoch": 0.34, "learning_rate": 0.0006626544695875462, "loss": 0.0758, "theoretical_loss": 3.6344262397211704, "tokens_seen": 1135083520 }, { "epoch": 0.34, "learning_rate": 0.0006625742256459637, "loss": 0.0753, "theoretical_loss": 3.6343488429463124, "tokens_seen": 1135345664 }, { "epoch": 0.34, "learning_rate": 0.0006624939817043813, "loss": 0.076, "theoretical_loss": 3.634271469042208, "tokens_seen": 1135607808 }, { "epoch": 0.34, "learning_rate": 0.0006624137377627989, "loss": 0.0727, "theoretical_loss": 3.634194117996822, "tokens_seen": 1135869952 }, { "epoch": 0.34, "learning_rate": 0.0006623334938212165, "loss": 0.0736, "theoretical_loss": 3.634116789798129, "tokens_seen": 1136132096 }, { "epoch": 0.34, "learning_rate": 0.000662253249879634, "loss": 0.0768, "theoretical_loss": 3.6340394844341097, "tokens_seen": 1136394240 }, { "epoch": 0.34, "learning_rate": 0.0006621730059380518, "loss": 0.0768, "theoretical_loss": 3.6339622018927575, "tokens_seen": 1136656384 }, { "epoch": 0.34, "learning_rate": 0.0006620927619964693, "loss": 0.0738, "theoretical_loss": 3.633884942162073, "tokens_seen": 1136918528 }, { "epoch": 0.34, "learning_rate": 0.0006620125180548869, "loss": 0.0733, "theoretical_loss": 3.6338077052300664, "tokens_seen": 1137180672 }, { "epoch": 0.34, "learning_rate": 0.0006619322741133045, "loss": 0.0725, "theoretical_loss": 3.633730491084756, "tokens_seen": 1137442816 }, { "epoch": 0.34, "learning_rate": 0.000661852030171722, "loss": 0.0746, "theoretical_loss": 3.6336532997141706, "tokens_seen": 1137704960 }, { "epoch": 0.34, "learning_rate": 0.0006617717862301397, "loss": 0.0779, "theoretical_loss": 3.6335761311063473, "tokens_seen": 1137967104 }, { "epoch": 0.34, "learning_rate": 0.0006616915422885572, "loss": 0.0763, "theoretical_loss": 3.633498985249332, "tokens_seen": 1138229248 }, { "epoch": 0.35, "learning_rate": 0.0006616112983469748, "loss": 0.0738, "theoretical_loss": 3.63342186213118, "tokens_seen": 1138491392 }, { "epoch": 0.35, "learning_rate": 0.0006615310544053924, "loss": 0.0735, "theoretical_loss": 3.6333447617399557, "tokens_seen": 1138753536 }, { "epoch": 0.35, "learning_rate": 0.00066145081046381, "loss": 0.0751, "theoretical_loss": 3.6332676840637324, "tokens_seen": 1139015680 }, { "epoch": 0.35, "learning_rate": 0.0006613705665222276, "loss": 0.0758, "theoretical_loss": 3.633190629090592, "tokens_seen": 1139277824 }, { "epoch": 0.35, "learning_rate": 0.0006612903225806452, "loss": 0.075, "theoretical_loss": 3.6331135968086263, "tokens_seen": 1139539968 }, { "epoch": 0.35, "learning_rate": 0.0006612100786390628, "loss": 0.0738, "theoretical_loss": 3.633036587205935, "tokens_seen": 1139802112 }, { "epoch": 0.35, "learning_rate": 0.0006611298346974803, "loss": 0.0745, "theoretical_loss": 3.6329596002706275, "tokens_seen": 1140064256 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.0012790506007149816, "objective/train/docs_used": 417664, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.491121530532837, "objective/train/original_loss": 1.491121530532837, "objective/train/theoretical_loss": 3.632882635990822, "objective/train/tokens_used": 1160786400, "objective/train/value_avg": -0.006160736083984375, "objective/train/value_loss": 0.00011857429490191862, "objective/train/value_max": -8.684396743774414e-05, "objective/train/value_min": -0.5185546875, "objective/train/value_reward_corr": 0.7576070544182378, "objective/train/value_std": 0.01470184326171875, "objective/train/weight_avg": 1.0013362169265747, "objective/train/weighted_lm_loss": 1.4934520721435547, "objective/train/weights_max": 1.1257035732269287, "objective/train/weights_min": 0.4068576395511627, "theoretical_loss": 3.632882635990822, "tokens_seen": 1140326400 }, { "epoch": 0.35, "learning_rate": 0.000661049590755898, "loss": 0.0743, "theoretical_loss": 3.632882635990822, "tokens_seen": 1140326400 }, { "epoch": 0.35, "learning_rate": 0.0006609693468143155, "loss": 0.0763, "theoretical_loss": 3.632805694354646, "tokens_seen": 1140588544 }, { "epoch": 0.35, "learning_rate": 0.0006608891028727331, "loss": 0.0751, "theoretical_loss": 3.6327287753502358, "tokens_seen": 1140850688 }, { "epoch": 0.35, "learning_rate": 0.0006608088589311507, "loss": 0.0752, "theoretical_loss": 3.632651878965735, "tokens_seen": 1141112832 }, { "epoch": 0.35, "learning_rate": 0.0006607286149895682, "loss": 0.0735, "theoretical_loss": 3.632575005189299, "tokens_seen": 1141374976 }, { "epoch": 0.35, "learning_rate": 0.000660648371047986, "loss": 0.0763, "theoretical_loss": 3.6324981540090895, "tokens_seen": 1141637120 }, { "epoch": 0.35, "learning_rate": 0.0006605681271064035, "loss": 0.0741, "theoretical_loss": 3.6324213254132793, "tokens_seen": 1141899264 }, { "epoch": 0.35, "learning_rate": 0.0006604878831648211, "loss": 0.0758, "theoretical_loss": 3.632344519390049, "tokens_seen": 1142161408 }, { "epoch": 0.35, "learning_rate": 0.0006604076392232387, "loss": 0.0725, "theoretical_loss": 3.632267735927588, "tokens_seen": 1142423552 }, { "epoch": 0.35, "learning_rate": 0.0006603273952816562, "loss": 0.0745, "theoretical_loss": 3.632190975014094, "tokens_seen": 1142685696 }, { "epoch": 0.35, "learning_rate": 0.0006602471513400738, "loss": 0.0721, "theoretical_loss": 3.6321142366377757, "tokens_seen": 1142947840 }, { "epoch": 0.35, "learning_rate": 0.0006601669073984914, "loss": 0.0719, "theoretical_loss": 3.6320375207868483, "tokens_seen": 1143209984 }, { "epoch": 0.35, "learning_rate": 0.000660086663456909, "loss": 0.073, "theoretical_loss": 3.6319608274495376, "tokens_seen": 1143472128 }, { "epoch": 0.35, "learning_rate": 0.0006600064195153265, "loss": 0.0744, "theoretical_loss": 3.6318841566140767, "tokens_seen": 1143734272 }, { "epoch": 0.35, "learning_rate": 0.0006599261755737443, "loss": 0.0722, "theoretical_loss": 3.63180750826871, "tokens_seen": 1143996416 }, { "epoch": 0.35, "learning_rate": 0.0006598459316321618, "loss": 0.0701, "theoretical_loss": 3.6317308824016874, "tokens_seen": 1144258560 }, { "epoch": 0.35, "learning_rate": 0.0006597656876905794, "loss": 0.0707, "theoretical_loss": 3.6316542790012702, "tokens_seen": 1144520704 }, { "epoch": 0.35, "learning_rate": 0.000659685443748997, "loss": 0.0717, "theoretical_loss": 3.631577698055727, "tokens_seen": 1144782848 }, { "epoch": 0.35, "learning_rate": 0.0006596051998074145, "loss": 0.0705, "theoretical_loss": 3.631501139553337, "tokens_seen": 1145044992 }, { "epoch": 0.35, "learning_rate": 0.0006595249558658322, "loss": 0.0738, "theoretical_loss": 3.6314246034823867, "tokens_seen": 1145307136 }, { "epoch": 0.35, "learning_rate": 0.0006594447119242497, "loss": 0.075, "theoretical_loss": 3.631348089831171, "tokens_seen": 1145569280 }, { "epoch": 0.35, "learning_rate": 0.0006593644679826673, "loss": 0.0737, "theoretical_loss": 3.631271598587995, "tokens_seen": 1145831424 }, { "epoch": 0.35, "learning_rate": 0.0006592842240410849, "loss": 0.0739, "theoretical_loss": 3.631195129741172, "tokens_seen": 1146093568 }, { "epoch": 0.35, "learning_rate": 0.0006592039800995026, "loss": 0.0747, "theoretical_loss": 3.631118683279024, "tokens_seen": 1146355712 }, { "epoch": 0.35, "learning_rate": 0.0006591237361579201, "loss": 0.0734, "theoretical_loss": 3.6310422591898814, "tokens_seen": 1146617856 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.0006740565877407789, "objective/train/docs_used": 419954, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3567689657211304, "objective/train/original_loss": 1.3567688465118408, "objective/train/theoretical_loss": 3.630965857462084, "objective/train/tokens_used": 1167340000, "objective/train/value_avg": -0.00856781005859375, "objective/train/value_loss": 0.0002693459973670542, "objective/train/value_max": -5.9664249420166016e-05, "objective/train/value_min": -0.62451171875, "objective/train/value_reward_corr": 0.6644929738275986, "objective/train/value_std": 0.01461029052734375, "objective/train/weight_avg": 1.0007959604263306, "objective/train/weighted_lm_loss": 1.357459545135498, "objective/train/weights_max": 1.5120787620544434, "objective/train/weights_min": 0.3692566156387329, "theoretical_loss": 3.630965857462084, "tokens_seen": 1146880000 }, { "epoch": 0.35, "learning_rate": 0.0006590434922163377, "loss": 0.0715, "theoretical_loss": 3.630965857462084, "tokens_seen": 1146880000 }, { "epoch": 0.35, "learning_rate": 0.0006589632482747553, "loss": 0.0742, "theoretical_loss": 3.6308894780839798, "tokens_seen": 1147142144 }, { "epoch": 0.35, "learning_rate": 0.0006588830043331728, "loss": 0.0715, "theoretical_loss": 3.630813121043926, "tokens_seen": 1147404288 }, { "epoch": 0.35, "learning_rate": 0.0006588027603915905, "loss": 0.0738, "theoretical_loss": 3.630736786330288, "tokens_seen": 1147666432 }, { "epoch": 0.35, "learning_rate": 0.000658722516450008, "loss": 0.0747, "theoretical_loss": 3.630660473931441, "tokens_seen": 1147928576 }, { "epoch": 0.35, "learning_rate": 0.0006586422725084256, "loss": 0.0754, "theoretical_loss": 3.6305841838357673, "tokens_seen": 1148190720 }, { "epoch": 0.35, "learning_rate": 0.0006585620285668432, "loss": 0.0704, "theoretical_loss": 3.630507916031659, "tokens_seen": 1148452864 }, { "epoch": 0.35, "learning_rate": 0.0006584817846252607, "loss": 0.0744, "theoretical_loss": 3.630431670507517, "tokens_seen": 1148715008 }, { "epoch": 0.35, "learning_rate": 0.0006584015406836785, "loss": 0.0741, "theoretical_loss": 3.6303554472517496, "tokens_seen": 1148977152 }, { "epoch": 0.35, "learning_rate": 0.000658321296742096, "loss": 0.0715, "theoretical_loss": 3.6302792462527758, "tokens_seen": 1149239296 }, { "epoch": 0.35, "learning_rate": 0.0006582410528005136, "loss": 0.071, "theoretical_loss": 3.6302030674990213, "tokens_seen": 1149501440 }, { "epoch": 0.35, "learning_rate": 0.0006581608088589312, "loss": 0.075, "theoretical_loss": 3.6301269109789214, "tokens_seen": 1149763584 }, { "epoch": 0.35, "learning_rate": 0.0006580805649173488, "loss": 0.0717, "theoretical_loss": 3.63005077668092, "tokens_seen": 1150025728 }, { "epoch": 0.35, "learning_rate": 0.0006580003209757663, "loss": 0.0723, "theoretical_loss": 3.62997466459347, "tokens_seen": 1150287872 }, { "epoch": 0.35, "learning_rate": 0.0006579200770341839, "loss": 0.0704, "theoretical_loss": 3.629898574705031, "tokens_seen": 1150550016 }, { "epoch": 0.35, "learning_rate": 0.0006578398330926015, "loss": 0.0734, "theoretical_loss": 3.629822507004075, "tokens_seen": 1150812160 }, { "epoch": 0.35, "learning_rate": 0.000657759589151019, "loss": 0.0721, "theoretical_loss": 3.629746461479079, "tokens_seen": 1151074304 }, { "epoch": 0.35, "learning_rate": 0.0006576793452094368, "loss": 0.0714, "theoretical_loss": 3.62967043811853, "tokens_seen": 1151336448 }, { "epoch": 0.35, "learning_rate": 0.0006575991012678543, "loss": 0.0736, "theoretical_loss": 3.629594436910924, "tokens_seen": 1151598592 }, { "epoch": 0.35, "learning_rate": 0.0006575188573262719, "loss": 0.0727, "theoretical_loss": 3.6295184578447643, "tokens_seen": 1151860736 }, { "epoch": 0.35, "learning_rate": 0.0006574386133846895, "loss": 0.0698, "theoretical_loss": 3.6294425009085645, "tokens_seen": 1152122880 }, { "epoch": 0.35, "learning_rate": 0.000657358369443107, "loss": 0.073, "theoretical_loss": 3.6293665660908454, "tokens_seen": 1152385024 }, { "epoch": 0.35, "learning_rate": 0.0006572781255015246, "loss": 0.0718, "theoretical_loss": 3.6292906533801372, "tokens_seen": 1152647168 }, { "epoch": 0.35, "learning_rate": 0.0006571978815599422, "loss": 0.074, "theoretical_loss": 3.6292147627649776, "tokens_seen": 1152909312 }, { "epoch": 0.35, "learning_rate": 0.0006571176376183598, "loss": 0.0751, "theoretical_loss": 3.6291388942339147, "tokens_seen": 1153171456 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.0011869670124724507, "objective/train/docs_used": 422447, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4943604469299316, "objective/train/original_loss": 1.4943604469299316, "objective/train/theoretical_loss": 3.6290630477755026, "objective/train/tokens_used": 1173893600, "objective/train/value_avg": -0.0102081298828125, "objective/train/value_loss": 0.00040764882578514516, "objective/train/value_max": -7.31348991394043e-05, "objective/train/value_min": -0.70703125, "objective/train/value_reward_corr": 0.6485598952753047, "objective/train/value_std": 0.0184478759765625, "objective/train/weight_avg": 1.0013765096664429, "objective/train/weighted_lm_loss": 1.496140718460083, "objective/train/weights_max": 1.7507752180099487, "objective/train/weights_min": 0.3769771456718445, "theoretical_loss": 3.6290630477755026, "tokens_seen": 1153433600 }, { "epoch": 0.35, "learning_rate": 0.0006570373936767774, "loss": 0.0714, "theoretical_loss": 3.6290630477755026, "tokens_seen": 1153433600 }, { "epoch": 0.35, "learning_rate": 0.0006569571497351951, "loss": 0.0716, "theoretical_loss": 3.6289872233783065, "tokens_seen": 1153695744 }, { "epoch": 0.35, "learning_rate": 0.0006568769057936126, "loss": 0.0704, "theoretical_loss": 3.6289114210308977, "tokens_seen": 1153957888 }, { "epoch": 0.35, "learning_rate": 0.0006567966618520302, "loss": 0.0727, "theoretical_loss": 3.628835640721859, "tokens_seen": 1154220032 }, { "epoch": 0.35, "learning_rate": 0.0006567164179104478, "loss": 0.0725, "theoretical_loss": 3.6287598824397787, "tokens_seen": 1154482176 }, { "epoch": 0.35, "learning_rate": 0.0006566361739688653, "loss": 0.0745, "theoretical_loss": 3.6286841461732546, "tokens_seen": 1154744320 }, { "epoch": 0.35, "learning_rate": 0.000656555930027283, "loss": 0.0711, "theoretical_loss": 3.6286084319108944, "tokens_seen": 1155006464 }, { "epoch": 0.35, "learning_rate": 0.0006564756860857005, "loss": 0.07, "theoretical_loss": 3.628532739641312, "tokens_seen": 1155268608 }, { "epoch": 0.35, "learning_rate": 0.0006563954421441181, "loss": 0.0699, "theoretical_loss": 3.6284570693531317, "tokens_seen": 1155530752 }, { "epoch": 0.35, "learning_rate": 0.0006563151982025357, "loss": 0.0715, "theoretical_loss": 3.6283814210349847, "tokens_seen": 1155792896 }, { "epoch": 0.35, "learning_rate": 0.0006562349542609534, "loss": 0.0707, "theoretical_loss": 3.628305794675512, "tokens_seen": 1156055040 }, { "epoch": 0.35, "learning_rate": 0.0006561547103193709, "loss": 0.0711, "theoretical_loss": 3.6282301902633627, "tokens_seen": 1156317184 }, { "epoch": 0.35, "learning_rate": 0.0006560744663777885, "loss": 0.069, "theoretical_loss": 3.628154607787194, "tokens_seen": 1156579328 }, { "epoch": 0.35, "learning_rate": 0.0006559942224362061, "loss": 0.0703, "theoretical_loss": 3.6280790472356705, "tokens_seen": 1156841472 }, { "epoch": 0.35, "learning_rate": 0.0006559139784946236, "loss": 0.0698, "theoretical_loss": 3.628003508597468, "tokens_seen": 1157103616 }, { "epoch": 0.35, "learning_rate": 0.0006558337345530413, "loss": 0.0734, "theoretical_loss": 3.6279279918612675, "tokens_seen": 1157365760 }, { "epoch": 0.35, "learning_rate": 0.0006557534906114588, "loss": 0.0704, "theoretical_loss": 3.6278524970157613, "tokens_seen": 1157627904 }, { "epoch": 0.35, "learning_rate": 0.0006556732466698765, "loss": 0.0729, "theoretical_loss": 3.6277770240496476, "tokens_seen": 1157890048 }, { "epoch": 0.35, "learning_rate": 0.000655593002728294, "loss": 0.0693, "theoretical_loss": 3.6277015729516355, "tokens_seen": 1158152192 }, { "epoch": 0.35, "learning_rate": 0.0006555127587867115, "loss": 0.0708, "theoretical_loss": 3.6276261437104402, "tokens_seen": 1158414336 }, { "epoch": 0.35, "learning_rate": 0.0006554325148451293, "loss": 0.0739, "theoretical_loss": 3.6275507363147868, "tokens_seen": 1158676480 }, { "epoch": 0.35, "learning_rate": 0.0006553522709035468, "loss": 0.0713, "theoretical_loss": 3.6274753507534077, "tokens_seen": 1158938624 }, { "epoch": 0.35, "learning_rate": 0.0006552720269619644, "loss": 0.0745, "theoretical_loss": 3.6273999870150444, "tokens_seen": 1159200768 }, { "epoch": 0.35, "learning_rate": 0.000655191783020382, "loss": 0.0682, "theoretical_loss": 3.627324645088446, "tokens_seen": 1159462912 }, { "epoch": 0.35, "learning_rate": 0.0006551115390787996, "loss": 0.0721, "theoretical_loss": 3.627249324962371, "tokens_seen": 1159725056 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.0011229667579755187, "objective/train/docs_used": 424758, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5378754138946533, "objective/train/original_loss": 1.5378755331039429, "objective/train/theoretical_loss": 3.6271740266255854, "objective/train/tokens_used": 1180447200, "objective/train/value_avg": -0.0070648193359375, "objective/train/value_loss": 0.00018219789490103722, "objective/train/value_max": -0.00013446807861328125, "objective/train/value_min": -0.4599609375, "objective/train/value_reward_corr": 0.6781258515515318, "objective/train/value_std": 0.01233673095703125, "objective/train/weight_avg": 1.0012083053588867, "objective/train/weighted_lm_loss": 1.5396517515182495, "objective/train/weights_max": 1.1805914640426636, "objective/train/weights_min": 0.37442609667778015, "theoretical_loss": 3.6271740266255854, "tokens_seen": 1159987200 }, { "epoch": 0.35, "learning_rate": 0.0006550312951372171, "loss": 0.0719, "theoretical_loss": 3.6271740266255854, "tokens_seen": 1159987200 }, { "epoch": 0.35, "learning_rate": 0.0006549510511956347, "loss": 0.0728, "theoretical_loss": 3.6270987500668648, "tokens_seen": 1160249344 }, { "epoch": 0.35, "learning_rate": 0.0006548708072540523, "loss": 0.0726, "theoretical_loss": 3.6270234952749902, "tokens_seen": 1160511488 }, { "epoch": 0.35, "learning_rate": 0.0006547905633124698, "loss": 0.0711, "theoretical_loss": 3.6269482622387548, "tokens_seen": 1160773632 }, { "epoch": 0.35, "learning_rate": 0.0006547103193708876, "loss": 0.0723, "theoretical_loss": 3.6268730509469567, "tokens_seen": 1161035776 }, { "epoch": 0.35, "learning_rate": 0.0006546300754293051, "loss": 0.0698, "theoretical_loss": 3.626797861388404, "tokens_seen": 1161297920 }, { "epoch": 0.35, "learning_rate": 0.0006545498314877228, "loss": 0.073, "theoretical_loss": 3.6267226935519132, "tokens_seen": 1161560064 }, { "epoch": 0.35, "learning_rate": 0.0006544695875461403, "loss": 0.0723, "theoretical_loss": 3.626647547426309, "tokens_seen": 1161822208 }, { "epoch": 0.35, "learning_rate": 0.0006543893436045578, "loss": 0.0729, "theoretical_loss": 3.6265724230004226, "tokens_seen": 1162084352 }, { "epoch": 0.35, "learning_rate": 0.0006543090996629755, "loss": 0.0711, "theoretical_loss": 3.6264973202630966, "tokens_seen": 1162346496 }, { "epoch": 0.35, "learning_rate": 0.000654228855721393, "loss": 0.0696, "theoretical_loss": 3.6264222392031797, "tokens_seen": 1162608640 }, { "epoch": 0.35, "learning_rate": 0.0006541486117798106, "loss": 0.0696, "theoretical_loss": 3.6263471798095286, "tokens_seen": 1162870784 }, { "epoch": 0.35, "learning_rate": 0.0006540683678382282, "loss": 0.0749, "theoretical_loss": 3.6262721420710093, "tokens_seen": 1163132928 }, { "epoch": 0.35, "learning_rate": 0.0006539881238966459, "loss": 0.0707, "theoretical_loss": 3.6261971259764962, "tokens_seen": 1163395072 }, { "epoch": 0.35, "learning_rate": 0.0006539078799550634, "loss": 0.0694, "theoretical_loss": 3.626122131514871, "tokens_seen": 1163657216 }, { "epoch": 0.35, "learning_rate": 0.000653827636013481, "loss": 0.0724, "theoretical_loss": 3.626047158675024, "tokens_seen": 1163919360 }, { "epoch": 0.35, "learning_rate": 0.0006537473920718986, "loss": 0.0701, "theoretical_loss": 3.625972207445854, "tokens_seen": 1164181504 }, { "epoch": 0.35, "learning_rate": 0.0006536671481303161, "loss": 0.0686, "theoretical_loss": 3.625897277816267, "tokens_seen": 1164443648 }, { "epoch": 0.35, "learning_rate": 0.0006535869041887338, "loss": 0.0709, "theoretical_loss": 3.625822369775179, "tokens_seen": 1164705792 }, { "epoch": 0.35, "learning_rate": 0.0006535066602471513, "loss": 0.0731, "theoretical_loss": 3.6257474833115113, "tokens_seen": 1164967936 }, { "epoch": 0.35, "learning_rate": 0.0006534264163055689, "loss": 0.0726, "theoretical_loss": 3.625672618414198, "tokens_seen": 1165230080 }, { "epoch": 0.35, "learning_rate": 0.0006533461723639865, "loss": 0.0714, "theoretical_loss": 3.6255977750721753, "tokens_seen": 1165492224 }, { "epoch": 0.35, "learning_rate": 0.0006532659284224042, "loss": 0.0747, "theoretical_loss": 3.6255229532743933, "tokens_seen": 1165754368 }, { "epoch": 0.35, "learning_rate": 0.0006531856844808218, "loss": 0.0712, "theoretical_loss": 3.625448153009807, "tokens_seen": 1166016512 }, { "epoch": 0.35, "learning_rate": 0.0006531054405392393, "loss": 0.0693, "theoretical_loss": 3.6253733742673795, "tokens_seen": 1166278656 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.002040127059444785, "objective/train/docs_used": 427016, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.472293734550476, "objective/train/original_loss": 1.4722938537597656, "objective/train/theoretical_loss": 3.625298617036084, "objective/train/tokens_used": 1187000800, "objective/train/value_avg": -0.01036834716796875, "objective/train/value_loss": 0.0002555120154283941, "objective/train/value_max": -0.00013339519500732422, "objective/train/value_min": -0.6376953125, "objective/train/value_reward_corr": 0.7320040769195669, "objective/train/value_std": 0.0182952880859375, "objective/train/weight_avg": 1.0021584033966064, "objective/train/weighted_lm_loss": 1.475948452949524, "objective/train/weights_max": 1.3658320903778076, "objective/train/weights_min": 0.3710109293460846, "theoretical_loss": 3.625298617036084, "tokens_seen": 1166540800 }, { "epoch": 0.35, "learning_rate": 0.0006530251965976569, "loss": 0.0744, "theoretical_loss": 3.625298617036084, "tokens_seen": 1166540800 }, { "epoch": 0.35, "learning_rate": 0.0006529449526560745, "loss": 0.0727, "theoretical_loss": 3.6252238813049, "tokens_seen": 1166802944 }, { "epoch": 0.35, "learning_rate": 0.0006528647087144921, "loss": 0.073, "theoretical_loss": 3.6251491670628155, "tokens_seen": 1167065088 }, { "epoch": 0.35, "learning_rate": 0.0006527844647729096, "loss": 0.071, "theoretical_loss": 3.6250744742988275, "tokens_seen": 1167327232 }, { "epoch": 0.35, "learning_rate": 0.0006527042208313273, "loss": 0.0727, "theoretical_loss": 3.6249998030019404, "tokens_seen": 1167589376 }, { "epoch": 0.35, "learning_rate": 0.0006526239768897448, "loss": 0.0707, "theoretical_loss": 3.6249251531611666, "tokens_seen": 1167851520 }, { "epoch": 0.35, "learning_rate": 0.0006525437329481623, "loss": 0.0715, "theoretical_loss": 3.6248505247655265, "tokens_seen": 1168113664 }, { "epoch": 0.35, "learning_rate": 0.0006524634890065801, "loss": 0.0731, "theoretical_loss": 3.6247759178040493, "tokens_seen": 1168375808 }, { "epoch": 0.35, "learning_rate": 0.0006523832450649976, "loss": 0.0703, "theoretical_loss": 3.624701332265772, "tokens_seen": 1168637952 }, { "epoch": 0.35, "learning_rate": 0.0006523030011234152, "loss": 0.0703, "theoretical_loss": 3.6246267681397386, "tokens_seen": 1168900096 }, { "epoch": 0.35, "learning_rate": 0.0006522227571818328, "loss": 0.0715, "theoretical_loss": 3.624552225415003, "tokens_seen": 1169162240 }, { "epoch": 0.35, "learning_rate": 0.0006521425132402504, "loss": 0.0691, "theoretical_loss": 3.6244777040806255, "tokens_seen": 1169424384 }, { "epoch": 0.35, "learning_rate": 0.0006520622692986679, "loss": 0.0722, "theoretical_loss": 3.624403204125676, "tokens_seen": 1169686528 }, { "epoch": 0.35, "learning_rate": 0.0006519820253570855, "loss": 0.0748, "theoretical_loss": 3.6243287255392307, "tokens_seen": 1169948672 }, { "epoch": 0.35, "learning_rate": 0.0006519017814155031, "loss": 0.0722, "theoretical_loss": 3.624254268310375, "tokens_seen": 1170210816 }, { "epoch": 0.35, "learning_rate": 0.0006518215374739207, "loss": 0.0697, "theoretical_loss": 3.6241798324282017, "tokens_seen": 1170472960 }, { "epoch": 0.35, "learning_rate": 0.0006517412935323384, "loss": 0.0733, "theoretical_loss": 3.624105417881813, "tokens_seen": 1170735104 }, { "epoch": 0.35, "learning_rate": 0.0006516610495907559, "loss": 0.0729, "theoretical_loss": 3.624031024660317, "tokens_seen": 1170997248 }, { "epoch": 0.35, "learning_rate": 0.0006515808056491736, "loss": 0.0734, "theoretical_loss": 3.6239566527528306, "tokens_seen": 1171259392 }, { "epoch": 0.36, "learning_rate": 0.0006515005617075911, "loss": 0.0714, "theoretical_loss": 3.6238823021484796, "tokens_seen": 1171521536 }, { "epoch": 0.36, "learning_rate": 0.0006514203177660086, "loss": 0.0728, "theoretical_loss": 3.6238079728363974, "tokens_seen": 1171783680 }, { "epoch": 0.36, "learning_rate": 0.0006513400738244263, "loss": 0.0711, "theoretical_loss": 3.6237336648057243, "tokens_seen": 1172045824 }, { "epoch": 0.36, "learning_rate": 0.0006512598298828438, "loss": 0.0691, "theoretical_loss": 3.62365937804561, "tokens_seen": 1172307968 }, { "epoch": 0.36, "learning_rate": 0.0006511795859412614, "loss": 0.0682, "theoretical_loss": 3.6235851125452108, "tokens_seen": 1172570112 }, { "epoch": 0.36, "learning_rate": 0.000651099341999679, "loss": 0.072, "theoretical_loss": 3.6235108682936916, "tokens_seen": 1172832256 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.0011571807553991675, "objective/train/docs_used": 429267, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3518650531768799, "objective/train/original_loss": 1.3518651723861694, "objective/train/theoretical_loss": 3.623436645280226, "objective/train/tokens_used": 1193554400, "objective/train/value_avg": -0.00867462158203125, "objective/train/value_loss": 0.00022238181554712355, "objective/train/value_max": -8.612871170043945e-05, "objective/train/value_min": -0.67236328125, "objective/train/value_reward_corr": 0.6452302653535441, "objective/train/value_std": 0.014129638671875, "objective/train/weight_avg": 1.0012578964233398, "objective/train/weighted_lm_loss": 1.3529399633407593, "objective/train/weights_max": 1.2395051717758179, "objective/train/weights_min": 0.37000179290771484, "theoretical_loss": 3.623436645280226, "tokens_seen": 1173094400 }, { "epoch": 0.36, "learning_rate": 0.0006510190980580967, "loss": 0.0723, "theoretical_loss": 3.623436645280226, "tokens_seen": 1173094400 }, { "epoch": 0.36, "learning_rate": 0.0006509388541165142, "loss": 0.0724, "theoretical_loss": 3.6233624434939946, "tokens_seen": 1173356544 }, { "epoch": 0.36, "learning_rate": 0.0006508586101749318, "loss": 0.07, "theoretical_loss": 3.623288262924186, "tokens_seen": 1173618688 }, { "epoch": 0.36, "learning_rate": 0.0006507783662333494, "loss": 0.0738, "theoretical_loss": 3.623214103559997, "tokens_seen": 1173880832 }, { "epoch": 0.36, "learning_rate": 0.000650698122291767, "loss": 0.0719, "theoretical_loss": 3.6231399653906315, "tokens_seen": 1174142976 }, { "epoch": 0.36, "learning_rate": 0.0006506178783501846, "loss": 0.0686, "theoretical_loss": 3.623065848405303, "tokens_seen": 1174405120 }, { "epoch": 0.36, "learning_rate": 0.0006505376344086021, "loss": 0.0717, "theoretical_loss": 3.622991752593231, "tokens_seen": 1174667264 }, { "epoch": 0.36, "learning_rate": 0.0006504573904670198, "loss": 0.0724, "theoretical_loss": 3.6229176779436445, "tokens_seen": 1174929408 }, { "epoch": 0.36, "learning_rate": 0.0006503771465254373, "loss": 0.0705, "theoretical_loss": 3.6228436244457796, "tokens_seen": 1175191552 }, { "epoch": 0.36, "learning_rate": 0.000650296902583855, "loss": 0.0719, "theoretical_loss": 3.6227695920888796, "tokens_seen": 1175453696 }, { "epoch": 0.36, "learning_rate": 0.0006502166586422726, "loss": 0.0703, "theoretical_loss": 3.622695580862197, "tokens_seen": 1175715840 }, { "epoch": 0.36, "learning_rate": 0.0006501364147006901, "loss": 0.0703, "theoretical_loss": 3.6226215907549912, "tokens_seen": 1175977984 }, { "epoch": 0.36, "learning_rate": 0.0006500561707591077, "loss": 0.0717, "theoretical_loss": 3.6225476217565307, "tokens_seen": 1176240128 }, { "epoch": 0.36, "learning_rate": 0.0006499759268175253, "loss": 0.0711, "theoretical_loss": 3.6224736738560894, "tokens_seen": 1176502272 }, { "epoch": 0.36, "learning_rate": 0.0006498956828759429, "loss": 0.0754, "theoretical_loss": 3.6223997470429516, "tokens_seen": 1176764416 }, { "epoch": 0.36, "learning_rate": 0.0006498154389343604, "loss": 0.0713, "theoretical_loss": 3.6223258413064086, "tokens_seen": 1177026560 }, { "epoch": 0.36, "learning_rate": 0.0006497351949927781, "loss": 0.0733, "theoretical_loss": 3.6222519566357585, "tokens_seen": 1177288704 }, { "epoch": 0.36, "learning_rate": 0.0006496549510511956, "loss": 0.071, "theoretical_loss": 3.6221780930203096, "tokens_seen": 1177550848 }, { "epoch": 0.36, "learning_rate": 0.0006495747071096131, "loss": 0.073, "theoretical_loss": 3.6221042504493743, "tokens_seen": 1177812992 }, { "epoch": 0.36, "learning_rate": 0.0006494944631680309, "loss": 0.0736, "theoretical_loss": 3.622030428912276, "tokens_seen": 1178075136 }, { "epoch": 0.36, "learning_rate": 0.0006494142192264484, "loss": 0.0699, "theoretical_loss": 3.6219566283983458, "tokens_seen": 1178337280 }, { "epoch": 0.36, "learning_rate": 0.0006493339752848661, "loss": 0.0747, "theoretical_loss": 3.6218828488969197, "tokens_seen": 1178599424 }, { "epoch": 0.36, "learning_rate": 0.0006492537313432836, "loss": 0.0707, "theoretical_loss": 3.6218090903973446, "tokens_seen": 1178861568 }, { "epoch": 0.36, "learning_rate": 0.0006491734874017012, "loss": 0.0765, "theoretical_loss": 3.621735352888974, "tokens_seen": 1179123712 }, { "epoch": 0.36, "learning_rate": 0.0006490932434601188, "loss": 0.0787, "theoretical_loss": 3.621661636361169, "tokens_seen": 1179385856 }, { "epoch": 0.36, "objective/train/advantage_avg": -0.000662869424559176, "objective/train/docs_used": 431781, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5813153982162476, "objective/train/original_loss": 1.5813153982162476, "objective/train/theoretical_loss": 3.6215879408032987, "objective/train/tokens_used": 1200108000, "objective/train/value_avg": -0.00632476806640625, "objective/train/value_loss": 0.0001973478210857138, "objective/train/value_max": -7.545948028564453e-05, "objective/train/value_min": -0.392822265625, "objective/train/value_reward_corr": 0.6522775297311824, "objective/train/value_std": 0.01153564453125, "objective/train/weight_avg": 0.9994257688522339, "objective/train/weighted_lm_loss": 1.5801219940185547, "objective/train/weights_max": 1.2123867273330688, "objective/train/weights_min": 0.3755246698856354, "theoretical_loss": 3.6215879408032987, "tokens_seen": 1179648000 }, { "epoch": 0.36, "learning_rate": 0.0006490129995185363, "loss": 0.0741, "theoretical_loss": 3.6215879408032987, "tokens_seen": 1179648000 }, { "epoch": 0.36, "learning_rate": 0.0006489327555769539, "loss": 0.0739, "theoretical_loss": 3.6215142662047395, "tokens_seen": 1179910144 }, { "epoch": 0.36, "learning_rate": 0.0006488525116353715, "loss": 0.074, "theoretical_loss": 3.621440612554876, "tokens_seen": 1180172288 }, { "epoch": 0.36, "learning_rate": 0.0006487722676937892, "loss": 0.0739, "theoretical_loss": 3.6213669798431005, "tokens_seen": 1180434432 }, { "epoch": 0.36, "learning_rate": 0.0006486920237522067, "loss": 0.0723, "theoretical_loss": 3.621293368058813, "tokens_seen": 1180696576 }, { "epoch": 0.36, "learning_rate": 0.0006486117798106244, "loss": 0.074, "theoretical_loss": 3.621219777191421, "tokens_seen": 1180958720 }, { "epoch": 0.36, "learning_rate": 0.0006485315358690419, "loss": 0.0744, "theoretical_loss": 3.62114620723034, "tokens_seen": 1181220864 }, { "epoch": 0.36, "learning_rate": 0.0006484512919274594, "loss": 0.0737, "theoretical_loss": 3.621072658164993, "tokens_seen": 1181483008 }, { "epoch": 0.36, "learning_rate": 0.0006483710479858771, "loss": 0.0735, "theoretical_loss": 3.6209991299848108, "tokens_seen": 1181745152 }, { "epoch": 0.36, "learning_rate": 0.0006482908040442946, "loss": 0.0752, "theoretical_loss": 3.620925622679232, "tokens_seen": 1182007296 }, { "epoch": 0.36, "learning_rate": 0.0006482105601027123, "loss": 0.074, "theoretical_loss": 3.620852136237702, "tokens_seen": 1182269440 }, { "epoch": 0.36, "learning_rate": 0.0006481303161611298, "loss": 0.074, "theoretical_loss": 3.620778670649676, "tokens_seen": 1182531584 }, { "epoch": 0.36, "learning_rate": 0.0006480500722195475, "loss": 0.0709, "theoretical_loss": 3.620705225904614, "tokens_seen": 1182793728 }, { "epoch": 0.36, "learning_rate": 0.0006479698282779651, "loss": 0.0733, "theoretical_loss": 3.620631801991987, "tokens_seen": 1183055872 }, { "epoch": 0.36, "learning_rate": 0.0006478895843363826, "loss": 0.075, "theoretical_loss": 3.6205583989012697, "tokens_seen": 1183318016 }, { "epoch": 0.36, "learning_rate": 0.0006478093403948002, "loss": 0.0748, "theoretical_loss": 3.6204850166219478, "tokens_seen": 1183580160 }, { "epoch": 0.36, "learning_rate": 0.0006477290964532178, "loss": 0.0699, "theoretical_loss": 3.6204116551435126, "tokens_seen": 1183842304 }, { "epoch": 0.36, "learning_rate": 0.0006476488525116354, "loss": 0.0745, "theoretical_loss": 3.620338314455465, "tokens_seen": 1184104448 }, { "epoch": 0.36, "learning_rate": 0.0006475686085700529, "loss": 0.0742, "theoretical_loss": 3.6202649945473113, "tokens_seen": 1184366592 }, { "epoch": 0.36, "learning_rate": 0.0006474883646284706, "loss": 0.0724, "theoretical_loss": 3.6201916954085664, "tokens_seen": 1184628736 }, { "epoch": 0.36, "learning_rate": 0.0006474081206868881, "loss": 0.0725, "theoretical_loss": 3.620118417028754, "tokens_seen": 1184890880 }, { "epoch": 0.36, "learning_rate": 0.0006473278767453056, "loss": 0.0735, "theoretical_loss": 3.620045159397403, "tokens_seen": 1185153024 }, { "epoch": 0.36, "learning_rate": 0.0006472476328037234, "loss": 0.0741, "theoretical_loss": 3.619971922504052, "tokens_seen": 1185415168 }, { "epoch": 0.36, "learning_rate": 0.0006471673888621409, "loss": 0.0711, "theoretical_loss": 3.6198987063382457, "tokens_seen": 1185677312 }, { "epoch": 0.36, "learning_rate": 0.0006470871449205585, "loss": 0.0757, "theoretical_loss": 3.6198255108895374, "tokens_seen": 1185939456 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.0009745230199769139, "objective/train/docs_used": 434243, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5072544813156128, "objective/train/original_loss": 1.5072546005249023, "objective/train/theoretical_loss": 3.6197523361474877, "objective/train/tokens_used": 1206661600, "objective/train/value_avg": -0.00830841064453125, "objective/train/value_loss": 0.00030490392236970365, "objective/train/value_max": -9.763240814208984e-05, "objective/train/value_min": -0.658203125, "objective/train/value_reward_corr": 0.6350907920593141, "objective/train/value_std": 0.0146636962890625, "objective/train/weight_avg": 1.0011084079742432, "objective/train/weighted_lm_loss": 1.509034514427185, "objective/train/weights_max": 1.5940988063812256, "objective/train/weights_min": 0.37102511525154114, "theoretical_loss": 3.6197523361474877, "tokens_seen": 1186201600 }, { "epoch": 0.36, "learning_rate": 0.0006470069009789761, "loss": 0.0753, "theoretical_loss": 3.6197523361474877, "tokens_seen": 1186201600 }, { "epoch": 0.36, "learning_rate": 0.0006469266570373937, "loss": 0.0723, "theoretical_loss": 3.619679182101664, "tokens_seen": 1186463744 }, { "epoch": 0.36, "learning_rate": 0.0006468464130958113, "loss": 0.0766, "theoretical_loss": 3.619606048741643, "tokens_seen": 1186725888 }, { "epoch": 0.36, "learning_rate": 0.0006467661691542289, "loss": 0.0751, "theoretical_loss": 3.6195329360570065, "tokens_seen": 1186988032 }, { "epoch": 0.36, "learning_rate": 0.0006466859252126464, "loss": 0.0743, "theoretical_loss": 3.6194598440373467, "tokens_seen": 1187250176 }, { "epoch": 0.36, "learning_rate": 0.000646605681271064, "loss": 0.072, "theoretical_loss": 3.619386772672261, "tokens_seen": 1187512320 }, { "epoch": 0.36, "learning_rate": 0.0006465254373294817, "loss": 0.0766, "theoretical_loss": 3.6193137219513556, "tokens_seen": 1187774464 }, { "epoch": 0.36, "learning_rate": 0.0006464451933878992, "loss": 0.0759, "theoretical_loss": 3.619240691864243, "tokens_seen": 1188036608 }, { "epoch": 0.36, "learning_rate": 0.0006463649494463169, "loss": 0.0709, "theoretical_loss": 3.619167682400545, "tokens_seen": 1188298752 }, { "epoch": 0.36, "learning_rate": 0.0006462847055047344, "loss": 0.0769, "theoretical_loss": 3.619094693549889, "tokens_seen": 1188560896 }, { "epoch": 0.36, "learning_rate": 0.000646204461563152, "loss": 0.0729, "theoretical_loss": 3.6190217253019124, "tokens_seen": 1188823040 }, { "epoch": 0.36, "learning_rate": 0.0006461242176215696, "loss": 0.0747, "theoretical_loss": 3.6189487776462568, "tokens_seen": 1189085184 }, { "epoch": 0.36, "learning_rate": 0.0006460439736799871, "loss": 0.0706, "theoretical_loss": 3.6188758505725738, "tokens_seen": 1189347328 }, { "epoch": 0.36, "learning_rate": 0.0006459637297384047, "loss": 0.0767, "theoretical_loss": 3.618802944070522, "tokens_seen": 1189609472 }, { "epoch": 0.36, "learning_rate": 0.0006458834857968223, "loss": 0.0741, "theoretical_loss": 3.618730058129766, "tokens_seen": 1189871616 }, { "epoch": 0.36, "learning_rate": 0.00064580324185524, "loss": 0.0745, "theoretical_loss": 3.61865719273998, "tokens_seen": 1190133760 }, { "epoch": 0.36, "learning_rate": 0.0006457229979136575, "loss": 0.0736, "theoretical_loss": 3.6185843478908453, "tokens_seen": 1190395904 }, { "epoch": 0.36, "learning_rate": 0.0006456427539720752, "loss": 0.0736, "theoretical_loss": 3.618511523572049, "tokens_seen": 1190658048 }, { "epoch": 0.36, "learning_rate": 0.0006455625100304927, "loss": 0.0728, "theoretical_loss": 3.6184387197732875, "tokens_seen": 1190920192 }, { "epoch": 0.36, "learning_rate": 0.0006454822660889103, "loss": 0.0775, "theoretical_loss": 3.6183659364842624, "tokens_seen": 1191182336 }, { "epoch": 0.36, "learning_rate": 0.0006454020221473279, "loss": 0.0742, "theoretical_loss": 3.6182931736946857, "tokens_seen": 1191444480 }, { "epoch": 0.36, "learning_rate": 0.0006453217782057454, "loss": 0.0723, "theoretical_loss": 3.618220431394274, "tokens_seen": 1191706624 }, { "epoch": 0.36, "learning_rate": 0.0006452415342641631, "loss": 0.0704, "theoretical_loss": 3.618147709572754, "tokens_seen": 1191968768 }, { "epoch": 0.36, "learning_rate": 0.0006451612903225806, "loss": 0.0738, "theoretical_loss": 3.618075008219858, "tokens_seen": 1192230912 }, { "epoch": 0.36, "learning_rate": 0.0006450810463809983, "loss": 0.0715, "theoretical_loss": 3.6180023273253252, "tokens_seen": 1192493056 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.001384525210596621, "objective/train/docs_used": 436657, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4595648050308228, "objective/train/original_loss": 1.4595648050308228, "objective/train/theoretical_loss": 3.6179296668789043, "objective/train/tokens_used": 1213215200, "objective/train/value_avg": -0.01062774658203125, "objective/train/value_loss": 0.00031874870182946324, "objective/train/value_max": -0.00014317035675048828, "objective/train/value_min": -0.7646484375, "objective/train/value_reward_corr": 0.6390448707495002, "objective/train/value_std": 0.0160675048828125, "objective/train/weight_avg": 1.0015238523483276, "objective/train/weighted_lm_loss": 1.4609448909759521, "objective/train/weights_max": 1.3013832569122314, "objective/train/weights_min": 0.3687829375267029, "theoretical_loss": 3.6179296668789043, "tokens_seen": 1192755200 }, { "epoch": 0.36, "learning_rate": 0.0006450008024394159, "loss": 0.0753, "theoretical_loss": 3.6179296668789043, "tokens_seen": 1192755200 }, { "epoch": 0.36, "learning_rate": 0.0006449205584978334, "loss": 0.0736, "theoretical_loss": 3.6178570268703494, "tokens_seen": 1193017344 }, { "epoch": 0.36, "learning_rate": 0.000644840314556251, "loss": 0.0733, "theoretical_loss": 3.617784407289424, "tokens_seen": 1193279488 }, { "epoch": 0.36, "learning_rate": 0.0006447600706146686, "loss": 0.0747, "theoretical_loss": 3.617711808125896, "tokens_seen": 1193541632 }, { "epoch": 0.36, "learning_rate": 0.0006446798266730862, "loss": 0.0751, "theoretical_loss": 3.6176392293695434, "tokens_seen": 1193803776 }, { "epoch": 0.36, "learning_rate": 0.0006445995827315037, "loss": 0.0753, "theoretical_loss": 3.6175666710101506, "tokens_seen": 1194065920 }, { "epoch": 0.36, "learning_rate": 0.0006445193387899214, "loss": 0.0733, "theoretical_loss": 3.6174941330375097, "tokens_seen": 1194328064 }, { "epoch": 0.36, "learning_rate": 0.0006444390948483389, "loss": 0.0753, "theoretical_loss": 3.617421615441419, "tokens_seen": 1194590208 }, { "epoch": 0.36, "learning_rate": 0.0006443588509067564, "loss": 0.0777, "theoretical_loss": 3.617349118211685, "tokens_seen": 1194852352 }, { "epoch": 0.36, "learning_rate": 0.0006442786069651742, "loss": 0.0731, "theoretical_loss": 3.6172766413381225, "tokens_seen": 1195114496 }, { "epoch": 0.36, "learning_rate": 0.0006441983630235917, "loss": 0.0704, "theoretical_loss": 3.617204184810552, "tokens_seen": 1195376640 }, { "epoch": 0.36, "learning_rate": 0.0006441181190820094, "loss": 0.074, "theoretical_loss": 3.6171317486188013, "tokens_seen": 1195638784 }, { "epoch": 0.36, "learning_rate": 0.0006440378751404269, "loss": 0.0744, "theoretical_loss": 3.6170593327527074, "tokens_seen": 1195900928 }, { "epoch": 0.36, "learning_rate": 0.0006439576311988445, "loss": 0.0725, "theoretical_loss": 3.616986937202112, "tokens_seen": 1196163072 }, { "epoch": 0.36, "learning_rate": 0.0006438773872572621, "loss": 0.0745, "theoretical_loss": 3.616914561956867, "tokens_seen": 1196425216 }, { "epoch": 0.36, "learning_rate": 0.0006437971433156797, "loss": 0.0744, "theoretical_loss": 3.6168422070068287, "tokens_seen": 1196687360 }, { "epoch": 0.36, "learning_rate": 0.0006437168993740972, "loss": 0.0729, "theoretical_loss": 3.6167698723418624, "tokens_seen": 1196949504 }, { "epoch": 0.36, "learning_rate": 0.0006436366554325149, "loss": 0.076, "theoretical_loss": 3.6166975579518406, "tokens_seen": 1197211648 }, { "epoch": 0.36, "learning_rate": 0.0006435564114909325, "loss": 0.0729, "theoretical_loss": 3.6166252638266423, "tokens_seen": 1197473792 }, { "epoch": 0.36, "learning_rate": 0.00064347616754935, "loss": 0.0746, "theoretical_loss": 3.616552989956155, "tokens_seen": 1197735936 }, { "epoch": 0.36, "learning_rate": 0.0006433959236077677, "loss": 0.0745, "theoretical_loss": 3.616480736330272, "tokens_seen": 1197998080 }, { "epoch": 0.36, "learning_rate": 0.0006433156796661852, "loss": 0.0709, "theoretical_loss": 3.616408502938895, "tokens_seen": 1198260224 }, { "epoch": 0.36, "learning_rate": 0.0006432354357246028, "loss": 0.07, "theoretical_loss": 3.616336289771932, "tokens_seen": 1198522368 }, { "epoch": 0.36, "learning_rate": 0.0006431551917830204, "loss": 0.0712, "theoretical_loss": 3.6162640968192994, "tokens_seen": 1198784512 }, { "epoch": 0.36, "learning_rate": 0.0006430749478414379, "loss": 0.0726, "theoretical_loss": 3.61619192407092, "tokens_seen": 1199046656 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.0016890951665118337, "objective/train/docs_used": 438488, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4938664436340332, "objective/train/original_loss": 1.493866205215454, "objective/train/theoretical_loss": 3.6161197715167237, "objective/train/tokens_used": 1219768800, "objective/train/value_avg": -0.00862884521484375, "objective/train/value_loss": 0.0003480366722214967, "objective/train/value_max": -0.00011146068572998047, "objective/train/value_min": -0.95068359375, "objective/train/value_reward_corr": 0.7120363623901087, "objective/train/value_std": 0.0188140869140625, "objective/train/weight_avg": 1.0018378496170044, "objective/train/weighted_lm_loss": 1.4969854354858398, "objective/train/weights_max": 1.3778945207595825, "objective/train/weights_min": 0.22749631106853485, "theoretical_loss": 3.6161197715167237, "tokens_seen": 1199308800 }, { "epoch": 0.36, "learning_rate": 0.0006429947038998556, "loss": 0.072, "theoretical_loss": 3.6161197715167237, "tokens_seen": 1199308800 }, { "epoch": 0.36, "learning_rate": 0.0006429144599582731, "loss": 0.0718, "theoretical_loss": 3.616047639146648, "tokens_seen": 1199570944 }, { "epoch": 0.36, "learning_rate": 0.0006428342160166908, "loss": 0.0716, "theoretical_loss": 3.6159755269506375, "tokens_seen": 1199833088 }, { "epoch": 0.36, "learning_rate": 0.0006427539720751084, "loss": 0.0734, "theoretical_loss": 3.6159034349186445, "tokens_seen": 1200095232 }, { "epoch": 0.36, "learning_rate": 0.000642673728133526, "loss": 0.0759, "theoretical_loss": 3.615831363040628, "tokens_seen": 1200357376 }, { "epoch": 0.36, "learning_rate": 0.0006425934841919435, "loss": 0.0709, "theoretical_loss": 3.615759311306553, "tokens_seen": 1200619520 }, { "epoch": 0.36, "learning_rate": 0.0006425132402503611, "loss": 0.0715, "theoretical_loss": 3.6156872797063944, "tokens_seen": 1200881664 }, { "epoch": 0.36, "learning_rate": 0.0006424329963087787, "loss": 0.0745, "theoretical_loss": 3.6156152682301324, "tokens_seen": 1201143808 }, { "epoch": 0.36, "learning_rate": 0.0006423527523671962, "loss": 0.0752, "theoretical_loss": 3.6155432768677542, "tokens_seen": 1201405952 }, { "epoch": 0.36, "learning_rate": 0.0006422725084256139, "loss": 0.0726, "theoretical_loss": 3.615471305609255, "tokens_seen": 1201668096 }, { "epoch": 0.36, "learning_rate": 0.0006421922644840314, "loss": 0.0736, "theoretical_loss": 3.6153993544446372, "tokens_seen": 1201930240 }, { "epoch": 0.36, "learning_rate": 0.0006421120205424491, "loss": 0.073, "theoretical_loss": 3.61532742336391, "tokens_seen": 1202192384 }, { "epoch": 0.36, "learning_rate": 0.0006420317766008667, "loss": 0.0709, "theoretical_loss": 3.6152555123570895, "tokens_seen": 1202454528 }, { "epoch": 0.36, "learning_rate": 0.0006419515326592842, "loss": 0.0685, "theoretical_loss": 3.6151836214141992, "tokens_seen": 1202716672 }, { "epoch": 0.36, "learning_rate": 0.0006418712887177018, "loss": 0.0737, "theoretical_loss": 3.61511175052527, "tokens_seen": 1202978816 }, { "epoch": 0.36, "learning_rate": 0.0006417910447761194, "loss": 0.0711, "theoretical_loss": 3.6150398996803395, "tokens_seen": 1203240960 }, { "epoch": 0.36, "learning_rate": 0.000641710800834537, "loss": 0.0732, "theoretical_loss": 3.6149680688694525, "tokens_seen": 1203503104 }, { "epoch": 0.36, "learning_rate": 0.0006416305568929546, "loss": 0.0737, "theoretical_loss": 3.614896258082661, "tokens_seen": 1203765248 }, { "epoch": 0.36, "learning_rate": 0.0006415503129513722, "loss": 0.071, "theoretical_loss": 3.614824467310025, "tokens_seen": 1204027392 }, { "epoch": 0.36, "learning_rate": 0.0006414700690097897, "loss": 0.0743, "theoretical_loss": 3.614752696541609, "tokens_seen": 1204289536 }, { "epoch": 0.37, "learning_rate": 0.0006413898250682074, "loss": 0.0732, "theoretical_loss": 3.614680945767488, "tokens_seen": 1204551680 }, { "epoch": 0.37, "learning_rate": 0.000641309581126625, "loss": 0.0728, "theoretical_loss": 3.614609214977741, "tokens_seen": 1204813824 }, { "epoch": 0.37, "learning_rate": 0.0006412293371850425, "loss": 0.0751, "theoretical_loss": 3.614537504162457, "tokens_seen": 1205075968 }, { "epoch": 0.37, "learning_rate": 0.0006411490932434602, "loss": 0.0736, "theoretical_loss": 3.614465813311729, "tokens_seen": 1205338112 }, { "epoch": 0.37, "learning_rate": 0.0006410688493018777, "loss": 0.0736, "theoretical_loss": 3.6143941424156596, "tokens_seen": 1205600256 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.0002445022400934249, "objective/train/docs_used": 441116, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4677330255508423, "objective/train/original_loss": 1.4677330255508423, "objective/train/theoretical_loss": 3.614322491464357, "objective/train/tokens_used": 1226322400, "objective/train/value_avg": -0.01202392578125, "objective/train/value_loss": 0.0005131938378326595, "objective/train/value_max": -9.608268737792969e-05, "objective/train/value_min": -0.91015625, "objective/train/value_reward_corr": 0.7615977591679253, "objective/train/value_std": 0.025634765625, "objective/train/weight_avg": 1.0004706382751465, "objective/train/weighted_lm_loss": 1.4688076972961426, "objective/train/weights_max": 1.7541979551315308, "objective/train/weights_min": 0.3682364523410797, "theoretical_loss": 3.614322491464357, "tokens_seen": 1205862400 }, { "epoch": 0.37, "learning_rate": 0.0006409886053602953, "loss": 0.0725, "theoretical_loss": 3.614322491464357, "tokens_seen": 1205862400 }, { "epoch": 0.37, "learning_rate": 0.0006409083614187129, "loss": 0.0761, "theoretical_loss": 3.614250860447936, "tokens_seen": 1206124544 }, { "epoch": 0.37, "learning_rate": 0.0006408281174771304, "loss": 0.0747, "theoretical_loss": 3.6141792493565212, "tokens_seen": 1206386688 }, { "epoch": 0.37, "learning_rate": 0.000640747873535548, "loss": 0.071, "theoretical_loss": 3.6141076581802416, "tokens_seen": 1206648832 }, { "epoch": 0.37, "learning_rate": 0.0006406676295939656, "loss": 0.0718, "theoretical_loss": 3.614036086909234, "tokens_seen": 1206910976 }, { "epoch": 0.37, "learning_rate": 0.0006405873856523833, "loss": 0.0748, "theoretical_loss": 3.6139645355336425, "tokens_seen": 1207173120 }, { "epoch": 0.37, "learning_rate": 0.0006405071417108009, "loss": 0.076, "theoretical_loss": 3.613893004043617, "tokens_seen": 1207435264 }, { "epoch": 0.37, "learning_rate": 0.0006404268977692185, "loss": 0.0723, "theoretical_loss": 3.6138214924293166, "tokens_seen": 1207697408 }, { "epoch": 0.37, "learning_rate": 0.000640346653827636, "loss": 0.0771, "theoretical_loss": 3.6137500006809056, "tokens_seen": 1207959552 }, { "epoch": 0.37, "learning_rate": 0.0006402664098860537, "loss": 0.074, "theoretical_loss": 3.6136785287885553, "tokens_seen": 1208221696 }, { "epoch": 0.37, "learning_rate": 0.0006401861659444712, "loss": 0.0736, "theoretical_loss": 3.6136070767424457, "tokens_seen": 1208483840 }, { "epoch": 0.37, "learning_rate": 0.0006401059220028887, "loss": 0.0746, "theoretical_loss": 3.6135356445327624, "tokens_seen": 1208745984 }, { "epoch": 0.37, "learning_rate": 0.0006400256780613064, "loss": 0.0692, "theoretical_loss": 3.6134642321496977, "tokens_seen": 1209008128 }, { "epoch": 0.37, "learning_rate": 0.000639945434119724, "loss": 0.0706, "theoretical_loss": 3.613392839583452, "tokens_seen": 1209270272 }, { "epoch": 0.37, "learning_rate": 0.0006398651901781416, "loss": 0.0741, "theoretical_loss": 3.6133214668242317, "tokens_seen": 1209532416 }, { "epoch": 0.37, "learning_rate": 0.0006397849462365592, "loss": 0.0709, "theoretical_loss": 3.613250113862251, "tokens_seen": 1209794560 }, { "epoch": 0.37, "learning_rate": 0.0006397047022949768, "loss": 0.0724, "theoretical_loss": 3.61317878068773, "tokens_seen": 1210056704 }, { "epoch": 0.37, "learning_rate": 0.0006396244583533943, "loss": 0.0715, "theoretical_loss": 3.6131074672908965, "tokens_seen": 1210318848 }, { "epoch": 0.37, "learning_rate": 0.0006395442144118119, "loss": 0.0715, "theoretical_loss": 3.6130361736619854, "tokens_seen": 1210580992 }, { "epoch": 0.37, "learning_rate": 0.0006394639704702295, "loss": 0.0739, "theoretical_loss": 3.6129648997912382, "tokens_seen": 1210843136 }, { "epoch": 0.37, "learning_rate": 0.000639383726528647, "loss": 0.0723, "theoretical_loss": 3.6128936456689034, "tokens_seen": 1211105280 }, { "epoch": 0.37, "learning_rate": 0.0006393034825870647, "loss": 0.0729, "theoretical_loss": 3.612822411285236, "tokens_seen": 1211367424 }, { "epoch": 0.37, "learning_rate": 0.0006392232386454822, "loss": 0.0755, "theoretical_loss": 3.612751196630499, "tokens_seen": 1211629568 }, { "epoch": 0.37, "learning_rate": 0.0006391429947039, "loss": 0.0725, "theoretical_loss": 3.61268000169496, "tokens_seen": 1211891712 }, { "epoch": 0.37, "learning_rate": 0.0006390627507623175, "loss": 0.0742, "theoretical_loss": 3.612608826468897, "tokens_seen": 1212153856 }, { "epoch": 0.37, "objective/train/advantage_avg": -0.0005174296093173325, "objective/train/docs_used": 443456, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.490218162536621, "objective/train/original_loss": 1.490218162536621, "objective/train/theoretical_loss": 3.6125376709425923, "objective/train/tokens_used": 1232876000, "objective/train/value_avg": -0.008087158203125, "objective/train/value_loss": 0.00029237871058285236, "objective/train/value_max": -7.599592208862305e-05, "objective/train/value_min": -0.3349609375, "objective/train/value_reward_corr": 0.6280084039572571, "objective/train/value_std": 0.01226806640625, "objective/train/weight_avg": 0.999613881111145, "objective/train/weighted_lm_loss": 1.4892261028289795, "objective/train/weights_max": 1.3555288314819336, "objective/train/weights_min": 0.3726279139518738, "theoretical_loss": 3.6125376709425923, "tokens_seen": 1212416000 }, { "epoch": 0.37, "learning_rate": 0.000638982506820735, "loss": 0.0738, "theoretical_loss": 3.6125376709425923, "tokens_seen": 1212416000 }, { "epoch": 0.37, "learning_rate": 0.0006389022628791527, "loss": 0.0683, "theoretical_loss": 3.6124665351063356, "tokens_seen": 1212678144 }, { "epoch": 0.37, "learning_rate": 0.0006388220189375702, "loss": 0.0737, "theoretical_loss": 3.612395418950424, "tokens_seen": 1212940288 }, { "epoch": 0.37, "learning_rate": 0.0006387417749959878, "loss": 0.0741, "theoretical_loss": 3.6123243224651604, "tokens_seen": 1213202432 }, { "epoch": 0.37, "learning_rate": 0.0006386615310544054, "loss": 0.0719, "theoretical_loss": 3.612253245640856, "tokens_seen": 1213464576 }, { "epoch": 0.37, "learning_rate": 0.000638581287112823, "loss": 0.0716, "theoretical_loss": 3.6121821884678287, "tokens_seen": 1213726720 }, { "epoch": 0.37, "learning_rate": 0.0006385010431712405, "loss": 0.0742, "theoretical_loss": 3.6121111509364017, "tokens_seen": 1213988864 }, { "epoch": 0.37, "learning_rate": 0.0006384207992296582, "loss": 0.0751, "theoretical_loss": 3.6120401330369067, "tokens_seen": 1214251008 }, { "epoch": 0.37, "learning_rate": 0.0006383405552880758, "loss": 0.0718, "theoretical_loss": 3.6119691347596814, "tokens_seen": 1214513152 }, { "epoch": 0.37, "learning_rate": 0.0006382603113464933, "loss": 0.0739, "theoretical_loss": 3.611898156095071, "tokens_seen": 1214775296 }, { "epoch": 0.37, "learning_rate": 0.000638180067404911, "loss": 0.0736, "theoretical_loss": 3.6118271970334264, "tokens_seen": 1215037440 }, { "epoch": 0.37, "learning_rate": 0.0006380998234633285, "loss": 0.0722, "theoretical_loss": 3.6117562575651068, "tokens_seen": 1215299584 }, { "epoch": 0.37, "learning_rate": 0.0006380195795217462, "loss": 0.0728, "theoretical_loss": 3.611685337680477, "tokens_seen": 1215561728 }, { "epoch": 0.37, "learning_rate": 0.0006379393355801637, "loss": 0.072, "theoretical_loss": 3.6116144373699086, "tokens_seen": 1215823872 }, { "epoch": 0.37, "learning_rate": 0.0006378590916385812, "loss": 0.0729, "theoretical_loss": 3.611543556623782, "tokens_seen": 1216086016 }, { "epoch": 0.37, "learning_rate": 0.0006377788476969989, "loss": 0.0728, "theoretical_loss": 3.6114726954324814, "tokens_seen": 1216348160 }, { "epoch": 0.37, "learning_rate": 0.0006376986037554164, "loss": 0.0723, "theoretical_loss": 3.6114018537864, "tokens_seen": 1216610304 }, { "epoch": 0.37, "learning_rate": 0.0006376183598138341, "loss": 0.0724, "theoretical_loss": 3.6113310316759373, "tokens_seen": 1216872448 }, { "epoch": 0.37, "learning_rate": 0.0006375381158722517, "loss": 0.0691, "theoretical_loss": 3.6112602290914984, "tokens_seen": 1217134592 }, { "epoch": 0.37, "learning_rate": 0.0006374578719306693, "loss": 0.0716, "theoretical_loss": 3.611189446023497, "tokens_seen": 1217396736 }, { "epoch": 0.37, "learning_rate": 0.0006373776279890868, "loss": 0.0725, "theoretical_loss": 3.6111186824623527, "tokens_seen": 1217658880 }, { "epoch": 0.37, "learning_rate": 0.0006372973840475045, "loss": 0.0737, "theoretical_loss": 3.6110479383984915, "tokens_seen": 1217921024 }, { "epoch": 0.37, "learning_rate": 0.000637217140105922, "loss": 0.0729, "theoretical_loss": 3.6109772138223466, "tokens_seen": 1218183168 }, { "epoch": 0.37, "learning_rate": 0.0006371368961643395, "loss": 0.0735, "theoretical_loss": 3.6109065087243577, "tokens_seen": 1218445312 }, { "epoch": 0.37, "learning_rate": 0.0006370566522227572, "loss": 0.0715, "theoretical_loss": 3.610835823094972, "tokens_seen": 1218707456 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.0005563150043599308, "objective/train/docs_used": 445814, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4974902868270874, "objective/train/original_loss": 1.4974902868270874, "objective/train/theoretical_loss": 3.6107651569246424, "objective/train/tokens_used": 1239429600, "objective/train/value_avg": -0.0152435302734375, "objective/train/value_loss": 0.0005672244587913156, "objective/train/value_max": -0.0001055598258972168, "objective/train/value_min": -0.96044921875, "objective/train/value_reward_corr": 0.7760111785211823, "objective/train/value_std": 0.0281982421875, "objective/train/weight_avg": 1.0008221864700317, "objective/train/weighted_lm_loss": 1.4987361431121826, "objective/train/weights_max": 1.8249680995941162, "objective/train/weights_min": 0.3728724420070648, "theoretical_loss": 3.6107651569246424, "tokens_seen": 1218969600 }, { "epoch": 0.37, "learning_rate": 0.0006369764082811747, "loss": 0.073, "theoretical_loss": 3.6107651569246424, "tokens_seen": 1218969600 }, { "epoch": 0.37, "learning_rate": 0.0006368961643395924, "loss": 0.0712, "theoretical_loss": 3.610694510203829, "tokens_seen": 1219231744 }, { "epoch": 0.37, "learning_rate": 0.00063681592039801, "loss": 0.0716, "theoretical_loss": 3.610623882922999, "tokens_seen": 1219493888 }, { "epoch": 0.37, "learning_rate": 0.0006367356764564276, "loss": 0.0716, "theoretical_loss": 3.6105532750726255, "tokens_seen": 1219756032 }, { "epoch": 0.37, "learning_rate": 0.0006366554325148452, "loss": 0.0717, "theoretical_loss": 3.6104826866431887, "tokens_seen": 1220018176 }, { "epoch": 0.37, "learning_rate": 0.0006365751885732627, "loss": 0.0717, "theoretical_loss": 3.6104121176251764, "tokens_seen": 1220280320 }, { "epoch": 0.37, "learning_rate": 0.0006364949446316803, "loss": 0.0723, "theoretical_loss": 3.6103415680090816, "tokens_seen": 1220542464 }, { "epoch": 0.37, "learning_rate": 0.0006364147006900979, "loss": 0.0739, "theoretical_loss": 3.6102710377854046, "tokens_seen": 1220804608 }, { "epoch": 0.37, "learning_rate": 0.0006363344567485155, "loss": 0.0704, "theoretical_loss": 3.610200526944652, "tokens_seen": 1221066752 }, { "epoch": 0.37, "learning_rate": 0.000636254212806933, "loss": 0.0721, "theoretical_loss": 3.610130035477339, "tokens_seen": 1221328896 }, { "epoch": 0.37, "learning_rate": 0.0006361739688653508, "loss": 0.0752, "theoretical_loss": 3.6100595633739854, "tokens_seen": 1221591040 }, { "epoch": 0.37, "learning_rate": 0.0006360937249237683, "loss": 0.0727, "theoretical_loss": 3.6099891106251176, "tokens_seen": 1221853184 }, { "epoch": 0.37, "learning_rate": 0.0006360134809821858, "loss": 0.0746, "theoretical_loss": 3.6099186772212697, "tokens_seen": 1222115328 }, { "epoch": 0.37, "learning_rate": 0.0006359332370406035, "loss": 0.0709, "theoretical_loss": 3.6098482631529825, "tokens_seen": 1222377472 }, { "epoch": 0.37, "learning_rate": 0.000635852993099021, "loss": 0.0727, "theoretical_loss": 3.6097778684108026, "tokens_seen": 1222639616 }, { "epoch": 0.37, "learning_rate": 0.0006357727491574386, "loss": 0.0716, "theoretical_loss": 3.609707492985284, "tokens_seen": 1222901760 }, { "epoch": 0.37, "learning_rate": 0.0006356925052158562, "loss": 0.0714, "theoretical_loss": 3.609637136866987, "tokens_seen": 1223163904 }, { "epoch": 0.37, "learning_rate": 0.0006356122612742738, "loss": 0.0697, "theoretical_loss": 3.609566800046478, "tokens_seen": 1223426048 }, { "epoch": 0.37, "learning_rate": 0.0006355320173326913, "loss": 0.0711, "theoretical_loss": 3.609496482514332, "tokens_seen": 1223688192 }, { "epoch": 0.37, "learning_rate": 0.000635451773391109, "loss": 0.0732, "theoretical_loss": 3.609426184261128, "tokens_seen": 1223950336 }, { "epoch": 0.37, "learning_rate": 0.0006353715294495266, "loss": 0.0735, "theoretical_loss": 3.6093559052774538, "tokens_seen": 1224212480 }, { "epoch": 0.37, "learning_rate": 0.0006352912855079442, "loss": 0.0715, "theoretical_loss": 3.6092856455539017, "tokens_seen": 1224474624 }, { "epoch": 0.37, "learning_rate": 0.0006352110415663618, "loss": 0.0745, "theoretical_loss": 3.6092154050810725, "tokens_seen": 1224736768 }, { "epoch": 0.37, "learning_rate": 0.0006351307976247793, "loss": 0.073, "theoretical_loss": 3.6091451838495727, "tokens_seen": 1224998912 }, { "epoch": 0.37, "learning_rate": 0.000635050553683197, "loss": 0.0714, "theoretical_loss": 3.609074981850016, "tokens_seen": 1225261056 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.000483504292787984, "objective/train/docs_used": 448161, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4086085557937622, "objective/train/original_loss": 1.4086084365844727, "objective/train/theoretical_loss": 3.6090047990730216, "objective/train/tokens_used": 1245983200, "objective/train/value_avg": -0.007053375244140625, "objective/train/value_loss": 0.0002645938075147569, "objective/train/value_max": -8.285045623779297e-05, "objective/train/value_min": -0.9697265625, "objective/train/value_reward_corr": 0.7658859999331806, "objective/train/value_std": 0.0177764892578125, "objective/train/weight_avg": 1.0006027221679688, "objective/train/weighted_lm_loss": 1.4089957475662231, "objective/train/weights_max": 1.5008656978607178, "objective/train/weights_min": 0.3695588707923889, "theoretical_loss": 3.6090047990730216, "tokens_seen": 1225523200 }, { "epoch": 0.37, "learning_rate": 0.0006349703097416145, "loss": 0.0747, "theoretical_loss": 3.6090047990730216, "tokens_seen": 1225523200 }, { "epoch": 0.37, "learning_rate": 0.000634890065800032, "loss": 0.0711, "theoretical_loss": 3.6089346355092164, "tokens_seen": 1225785344 }, { "epoch": 0.37, "learning_rate": 0.0006348098218584497, "loss": 0.0737, "theoretical_loss": 3.6088644911492334, "tokens_seen": 1226047488 }, { "epoch": 0.37, "learning_rate": 0.0006347295779168672, "loss": 0.0731, "theoretical_loss": 3.6087943659837114, "tokens_seen": 1226309632 }, { "epoch": 0.37, "learning_rate": 0.0006346493339752849, "loss": 0.0708, "theoretical_loss": 3.6087242600032976, "tokens_seen": 1226571776 }, { "epoch": 0.37, "learning_rate": 0.0006345690900337025, "loss": 0.0721, "theoretical_loss": 3.6086541731986443, "tokens_seen": 1226833920 }, { "epoch": 0.37, "learning_rate": 0.0006344888460921201, "loss": 0.0685, "theoretical_loss": 3.6085841055604106, "tokens_seen": 1227096064 }, { "epoch": 0.37, "learning_rate": 0.0006344086021505376, "loss": 0.073, "theoretical_loss": 3.608514057079262, "tokens_seen": 1227358208 }, { "epoch": 0.37, "learning_rate": 0.0006343283582089553, "loss": 0.0712, "theoretical_loss": 3.6084440277458714, "tokens_seen": 1227620352 }, { "epoch": 0.37, "learning_rate": 0.0006342481142673728, "loss": 0.0702, "theoretical_loss": 3.6083740175509176, "tokens_seen": 1227882496 }, { "epoch": 0.37, "learning_rate": 0.0006341678703257904, "loss": 0.0749, "theoretical_loss": 3.6083040264850856, "tokens_seen": 1228144640 }, { "epoch": 0.37, "learning_rate": 0.000634087626384208, "loss": 0.0711, "theoretical_loss": 3.6082340545390674, "tokens_seen": 1228406784 }, { "epoch": 0.37, "learning_rate": 0.0006340073824426255, "loss": 0.0732, "theoretical_loss": 3.6081641017035615, "tokens_seen": 1228668928 }, { "epoch": 0.37, "learning_rate": 0.0006339271385010433, "loss": 0.0748, "theoretical_loss": 3.608094167969273, "tokens_seen": 1228931072 }, { "epoch": 0.37, "learning_rate": 0.0006338468945594608, "loss": 0.0717, "theoretical_loss": 3.6080242533269136, "tokens_seen": 1229193216 }, { "epoch": 0.37, "learning_rate": 0.0006337666506178784, "loss": 0.0701, "theoretical_loss": 3.6079543577672, "tokens_seen": 1229455360 }, { "epoch": 0.37, "learning_rate": 0.000633686406676296, "loss": 0.074, "theoretical_loss": 3.6078844812808577, "tokens_seen": 1229717504 }, { "epoch": 0.37, "learning_rate": 0.0006336061627347135, "loss": 0.0758, "theoretical_loss": 3.6078146238586175, "tokens_seen": 1229979648 }, { "epoch": 0.37, "learning_rate": 0.0006335259187931311, "loss": 0.0737, "theoretical_loss": 3.6077447854912164, "tokens_seen": 1230241792 }, { "epoch": 0.37, "learning_rate": 0.0006334456748515487, "loss": 0.0721, "theoretical_loss": 3.6076749661693985, "tokens_seen": 1230503936 }, { "epoch": 0.37, "learning_rate": 0.0006333654309099663, "loss": 0.0742, "theoretical_loss": 3.607605165883914, "tokens_seen": 1230766080 }, { "epoch": 0.37, "learning_rate": 0.0006332851869683838, "loss": 0.0728, "theoretical_loss": 3.60753538462552, "tokens_seen": 1231028224 }, { "epoch": 0.37, "learning_rate": 0.0006332049430268016, "loss": 0.0718, "theoretical_loss": 3.607465622384979, "tokens_seen": 1231290368 }, { "epoch": 0.37, "learning_rate": 0.0006331246990852191, "loss": 0.0731, "theoretical_loss": 3.6073958791530614, "tokens_seen": 1231552512 }, { "epoch": 0.37, "learning_rate": 0.0006330444551436366, "loss": 0.0727, "theoretical_loss": 3.607326154920543, "tokens_seen": 1231814656 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.0014651658711954951, "objective/train/docs_used": 450737, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4861600399017334, "objective/train/original_loss": 1.4861600399017334, "objective/train/theoretical_loss": 3.6072564496782062, "objective/train/tokens_used": 1252536800, "objective/train/value_avg": -0.01021575927734375, "objective/train/value_loss": 0.0003100598114542663, "objective/train/value_max": -0.0001055598258972168, "objective/train/value_min": -0.75244140625, "objective/train/value_reward_corr": 0.6669433027871567, "objective/train/value_std": 0.0163116455078125, "objective/train/weight_avg": 1.001604437828064, "objective/train/weighted_lm_loss": 1.4885464906692505, "objective/train/weights_max": 1.9935988187789917, "objective/train/weights_min": 0.36889123916625977, "theoretical_loss": 3.6072564496782062, "tokens_seen": 1232076800 }, { "epoch": 0.37, "learning_rate": 0.0006329642112020543, "loss": 0.0716, "theoretical_loss": 3.6072564496782062, "tokens_seen": 1232076800 }, { "epoch": 0.37, "learning_rate": 0.0006328839672604718, "loss": 0.0703, "theoretical_loss": 3.607186763416841, "tokens_seen": 1232338944 }, { "epoch": 0.37, "learning_rate": 0.0006328037233188895, "loss": 0.0728, "theoretical_loss": 3.607117096127242, "tokens_seen": 1232601088 }, { "epoch": 0.37, "learning_rate": 0.000632723479377307, "loss": 0.0744, "theoretical_loss": 3.60704744780021, "tokens_seen": 1232863232 }, { "epoch": 0.37, "learning_rate": 0.0006326432354357246, "loss": 0.0721, "theoretical_loss": 3.606977818426555, "tokens_seen": 1233125376 }, { "epoch": 0.37, "learning_rate": 0.0006325629914941422, "loss": 0.0745, "theoretical_loss": 3.6069082079970913, "tokens_seen": 1233387520 }, { "epoch": 0.37, "learning_rate": 0.0006324827475525598, "loss": 0.0739, "theoretical_loss": 3.60683861650264, "tokens_seen": 1233649664 }, { "epoch": 0.37, "learning_rate": 0.0006324025036109774, "loss": 0.0708, "theoretical_loss": 3.606769043934027, "tokens_seen": 1233911808 }, { "epoch": 0.37, "learning_rate": 0.000632322259669395, "loss": 0.0736, "theoretical_loss": 3.606699490282088, "tokens_seen": 1234173952 }, { "epoch": 0.37, "learning_rate": 0.0006322420157278126, "loss": 0.0699, "theoretical_loss": 3.606629955537663, "tokens_seen": 1234436096 }, { "epoch": 0.37, "learning_rate": 0.0006321617717862301, "loss": 0.0711, "theoretical_loss": 3.606560439691598, "tokens_seen": 1234698240 }, { "epoch": 0.37, "learning_rate": 0.0006320815278446478, "loss": 0.0722, "theoretical_loss": 3.6064909427347462, "tokens_seen": 1234960384 }, { "epoch": 0.37, "learning_rate": 0.0006320012839030653, "loss": 0.0733, "theoretical_loss": 3.6064214646579673, "tokens_seen": 1235222528 }, { "epoch": 0.37, "learning_rate": 0.0006319210399614828, "loss": 0.0743, "theoretical_loss": 3.606352005452126, "tokens_seen": 1235484672 }, { "epoch": 0.37, "learning_rate": 0.0006318407960199005, "loss": 0.0728, "theoretical_loss": 3.6062825651080956, "tokens_seen": 1235746816 }, { "epoch": 0.37, "learning_rate": 0.000631760552078318, "loss": 0.0736, "theoretical_loss": 3.6062131436167544, "tokens_seen": 1236008960 }, { "epoch": 0.37, "learning_rate": 0.0006316803081367358, "loss": 0.0717, "theoretical_loss": 3.606143740968986, "tokens_seen": 1236271104 }, { "epoch": 0.37, "learning_rate": 0.0006316000641951533, "loss": 0.0749, "theoretical_loss": 3.6060743571556833, "tokens_seen": 1236533248 }, { "epoch": 0.37, "learning_rate": 0.0006315198202535709, "loss": 0.0741, "theoretical_loss": 3.606004992167742, "tokens_seen": 1236795392 }, { "epoch": 0.37, "learning_rate": 0.0006314395763119885, "loss": 0.0716, "theoretical_loss": 3.605935645996067, "tokens_seen": 1237057536 }, { "epoch": 0.37, "learning_rate": 0.000631359332370406, "loss": 0.0698, "theoretical_loss": 3.605866318631568, "tokens_seen": 1237319680 }, { "epoch": 0.38, "learning_rate": 0.0006312790884288236, "loss": 0.0712, "theoretical_loss": 3.605797010065161, "tokens_seen": 1237581824 }, { "epoch": 0.38, "learning_rate": 0.0006311988444872412, "loss": 0.0714, "theoretical_loss": 3.6057277202877698, "tokens_seen": 1237843968 }, { "epoch": 0.38, "learning_rate": 0.0006311186005456588, "loss": 0.0719, "theoretical_loss": 3.6056584492903223, "tokens_seen": 1238106112 }, { "epoch": 0.38, "learning_rate": 0.0006310383566040763, "loss": 0.0714, "theoretical_loss": 3.6055891970637544, "tokens_seen": 1238368256 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.0007562974351458251, "objective/train/docs_used": 453261, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5378789901733398, "objective/train/original_loss": 1.5378789901733398, "objective/train/theoretical_loss": 3.605519963599008, "objective/train/tokens_used": 1259090400, "objective/train/value_avg": -0.009429931640625, "objective/train/value_loss": 0.0003417150001041591, "objective/train/value_max": -0.00011593103408813477, "objective/train/value_min": -0.72119140625, "objective/train/value_reward_corr": 0.7248880939979041, "objective/train/value_std": 0.0178985595703125, "objective/train/weight_avg": 1.0009098052978516, "objective/train/weighted_lm_loss": 1.539913296699524, "objective/train/weights_max": 1.2381511926651, "objective/train/weights_min": 0.3683769106864929, "theoretical_loss": 3.605519963599008, "tokens_seen": 1238630400 }, { "epoch": 0.38, "learning_rate": 0.0006309581126624941, "loss": 0.0748, "theoretical_loss": 3.605519963599008, "tokens_seen": 1238630400 }, { "epoch": 0.38, "learning_rate": 0.0006308778687209116, "loss": 0.0723, "theoretical_loss": 3.60545074888703, "tokens_seen": 1238892544 }, { "epoch": 0.38, "learning_rate": 0.0006307976247793292, "loss": 0.0714, "theoretical_loss": 3.6053815529187756, "tokens_seen": 1239154688 }, { "epoch": 0.38, "learning_rate": 0.0006307173808377468, "loss": 0.0739, "theoretical_loss": 3.6053123756852052, "tokens_seen": 1239416832 }, { "epoch": 0.38, "learning_rate": 0.0006306371368961643, "loss": 0.0709, "theoretical_loss": 3.605243217177285, "tokens_seen": 1239678976 }, { "epoch": 0.38, "learning_rate": 0.0006305568929545819, "loss": 0.0711, "theoretical_loss": 3.6051740773859877, "tokens_seen": 1239941120 }, { "epoch": 0.38, "learning_rate": 0.0006304766490129995, "loss": 0.0729, "theoretical_loss": 3.605104956302293, "tokens_seen": 1240203264 }, { "epoch": 0.38, "learning_rate": 0.0006303964050714171, "loss": 0.0728, "theoretical_loss": 3.605035853917187, "tokens_seen": 1240465408 }, { "epoch": 0.38, "learning_rate": 0.0006303161611298347, "loss": 0.0748, "theoretical_loss": 3.604966770221661, "tokens_seen": 1240727552 }, { "epoch": 0.38, "learning_rate": 0.0006302359171882524, "loss": 0.0696, "theoretical_loss": 3.604897705206713, "tokens_seen": 1240989696 }, { "epoch": 0.38, "learning_rate": 0.0006301556732466699, "loss": 0.0723, "theoretical_loss": 3.6048286588633465, "tokens_seen": 1241251840 }, { "epoch": 0.38, "learning_rate": 0.0006300754293050875, "loss": 0.0747, "theoretical_loss": 3.6047596311825725, "tokens_seen": 1241513984 }, { "epoch": 0.38, "learning_rate": 0.0006299951853635051, "loss": 0.0746, "theoretical_loss": 3.6046906221554087, "tokens_seen": 1241776128 }, { "epoch": 0.38, "learning_rate": 0.0006299149414219226, "loss": 0.0737, "theoretical_loss": 3.6046216317728765, "tokens_seen": 1242038272 }, { "epoch": 0.38, "learning_rate": 0.0006298346974803403, "loss": 0.0722, "theoretical_loss": 3.6045526600260054, "tokens_seen": 1242300416 }, { "epoch": 0.38, "learning_rate": 0.0006297544535387578, "loss": 0.0716, "theoretical_loss": 3.6044837069058318, "tokens_seen": 1242562560 }, { "epoch": 0.38, "learning_rate": 0.0006296742095971754, "loss": 0.0751, "theoretical_loss": 3.6044147724033957, "tokens_seen": 1242824704 }, { "epoch": 0.38, "learning_rate": 0.000629593965655593, "loss": 0.0752, "theoretical_loss": 3.6043458565097453, "tokens_seen": 1243086848 }, { "epoch": 0.38, "learning_rate": 0.0006295137217140106, "loss": 0.07, "theoretical_loss": 3.6042769592159356, "tokens_seen": 1243348992 }, { "epoch": 0.38, "learning_rate": 0.0006294334777724282, "loss": 0.0724, "theoretical_loss": 3.6042080805130254, "tokens_seen": 1243611136 }, { "epoch": 0.38, "learning_rate": 0.0006293532338308458, "loss": 0.0749, "theoretical_loss": 3.6041392203920815, "tokens_seen": 1243873280 }, { "epoch": 0.38, "learning_rate": 0.0006292729898892634, "loss": 0.0747, "theoretical_loss": 3.6040703788441757, "tokens_seen": 1244135424 }, { "epoch": 0.38, "learning_rate": 0.0006291927459476809, "loss": 0.0744, "theoretical_loss": 3.6040015558603877, "tokens_seen": 1244397568 }, { "epoch": 0.38, "learning_rate": 0.0006291125020060986, "loss": 0.073, "theoretical_loss": 3.603932751431802, "tokens_seen": 1244659712 }, { "epoch": 0.38, "learning_rate": 0.0006290322580645161, "loss": 0.072, "theoretical_loss": 3.6038639655495093, "tokens_seen": 1244921856 }, { "epoch": 0.38, "objective/train/advantage_avg": -0.000271944678388536, "objective/train/docs_used": 455531, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5588250160217285, "objective/train/original_loss": 1.558824896812439, "objective/train/theoretical_loss": 3.6037951982046064, "objective/train/tokens_used": 1265644000, "objective/train/value_avg": -0.009490966796875, "objective/train/value_loss": 0.000395492505049333, "objective/train/value_max": -8.153915405273438e-05, "objective/train/value_min": -0.5654296875, "objective/train/value_reward_corr": 0.6741612325784748, "objective/train/value_std": 0.01556396484375, "objective/train/weight_avg": 0.9999120831489563, "objective/train/weighted_lm_loss": 1.55759859085083, "objective/train/weights_max": 1.5399620532989502, "objective/train/weights_min": 0.3693220913410187, "theoretical_loss": 3.6037951982046064, "tokens_seen": 1245184000 }, { "epoch": 0.38, "learning_rate": 0.0006289520141229337, "loss": 0.0727, "theoretical_loss": 3.6037951982046064, "tokens_seen": 1245184000 }, { "epoch": 0.38, "learning_rate": 0.0006288717701813513, "loss": 0.0728, "theoretical_loss": 3.6037264493881973, "tokens_seen": 1245446144 }, { "epoch": 0.38, "learning_rate": 0.0006287915262397688, "loss": 0.075, "theoretical_loss": 3.603657719091391, "tokens_seen": 1245708288 }, { "epoch": 0.38, "learning_rate": 0.0006287112822981866, "loss": 0.0731, "theoretical_loss": 3.603589007305303, "tokens_seen": 1245970432 }, { "epoch": 0.38, "learning_rate": 0.0006286310383566041, "loss": 0.075, "theoretical_loss": 3.603520314021055, "tokens_seen": 1246232576 }, { "epoch": 0.38, "learning_rate": 0.0006285507944150217, "loss": 0.0718, "theoretical_loss": 3.6034516392297746, "tokens_seen": 1246494720 }, { "epoch": 0.38, "learning_rate": 0.0006284705504734393, "loss": 0.076, "theoretical_loss": 3.6033829829225965, "tokens_seen": 1246756864 }, { "epoch": 0.38, "learning_rate": 0.0006283903065318568, "loss": 0.075, "theoretical_loss": 3.6033143450906593, "tokens_seen": 1247019008 }, { "epoch": 0.38, "learning_rate": 0.0006283100625902744, "loss": 0.0732, "theoretical_loss": 3.6032457257251105, "tokens_seen": 1247281152 }, { "epoch": 0.38, "learning_rate": 0.000628229818648692, "loss": 0.0701, "theoretical_loss": 3.603177124817101, "tokens_seen": 1247543296 }, { "epoch": 0.38, "learning_rate": 0.0006281495747071096, "loss": 0.0724, "theoretical_loss": 3.603108542357791, "tokens_seen": 1247805440 }, { "epoch": 0.38, "learning_rate": 0.0006280693307655271, "loss": 0.0741, "theoretical_loss": 3.603039978338343, "tokens_seen": 1248067584 }, { "epoch": 0.38, "learning_rate": 0.0006279890868239449, "loss": 0.0714, "theoretical_loss": 3.6029714327499285, "tokens_seen": 1248329728 }, { "epoch": 0.38, "learning_rate": 0.0006279088428823624, "loss": 0.0752, "theoretical_loss": 3.602902905583724, "tokens_seen": 1248591872 }, { "epoch": 0.38, "learning_rate": 0.0006278285989407801, "loss": 0.0717, "theoretical_loss": 3.602834396830912, "tokens_seen": 1248854016 }, { "epoch": 0.38, "learning_rate": 0.0006277483549991976, "loss": 0.0725, "theoretical_loss": 3.6027659064826816, "tokens_seen": 1249116160 }, { "epoch": 0.38, "learning_rate": 0.0006276681110576151, "loss": 0.0745, "theoretical_loss": 3.602697434530227, "tokens_seen": 1249378304 }, { "epoch": 0.38, "learning_rate": 0.0006275878671160328, "loss": 0.0745, "theoretical_loss": 3.6026289809647487, "tokens_seen": 1249640448 }, { "epoch": 0.38, "learning_rate": 0.0006275076231744503, "loss": 0.0741, "theoretical_loss": 3.6025605457774548, "tokens_seen": 1249902592 }, { "epoch": 0.38, "learning_rate": 0.0006274273792328679, "loss": 0.0722, "theoretical_loss": 3.602492128959558, "tokens_seen": 1250164736 }, { "epoch": 0.38, "learning_rate": 0.0006273471352912855, "loss": 0.073, "theoretical_loss": 3.6024237305022764, "tokens_seen": 1250426880 }, { "epoch": 0.38, "learning_rate": 0.0006272668913497032, "loss": 0.0741, "theoretical_loss": 3.602355350396836, "tokens_seen": 1250689024 }, { "epoch": 0.38, "learning_rate": 0.0006271866474081207, "loss": 0.072, "theoretical_loss": 3.602286988634467, "tokens_seen": 1250951168 }, { "epoch": 0.38, "learning_rate": 0.0006271064034665383, "loss": 0.0719, "theoretical_loss": 3.6022186452064076, "tokens_seen": 1251213312 }, { "epoch": 0.38, "learning_rate": 0.0006270261595249559, "loss": 0.0717, "theoretical_loss": 3.6021503201039, "tokens_seen": 1251475456 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.0007342287572100759, "objective/train/docs_used": 458046, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4588574171066284, "objective/train/original_loss": 1.4588572978973389, "objective/train/theoretical_loss": 3.6020820133181934, "objective/train/tokens_used": 1272197600, "objective/train/value_avg": -0.006072998046875, "objective/train/value_loss": 0.00018203555373474956, "objective/train/value_max": -6.157159805297852e-05, "objective/train/value_min": -0.29638671875, "objective/train/value_reward_corr": 0.6572453525608541, "objective/train/value_std": 0.0111083984375, "objective/train/weight_avg": 1.0008169412612915, "objective/train/weighted_lm_loss": 1.460231065750122, "objective/train/weights_max": 1.3257553577423096, "objective/train/weights_min": 0.3682669997215271, "theoretical_loss": 3.6020820133181934, "tokens_seen": 1251737600 }, { "epoch": 0.38, "learning_rate": 0.0006269459155833734, "loss": 0.0709, "theoretical_loss": 3.6020820133181934, "tokens_seen": 1251737600 }, { "epoch": 0.38, "learning_rate": 0.0006268656716417911, "loss": 0.0742, "theoretical_loss": 3.6020137248405435, "tokens_seen": 1251999744 }, { "epoch": 0.38, "learning_rate": 0.0006267854277002086, "loss": 0.0733, "theoretical_loss": 3.6019454546622107, "tokens_seen": 1252261888 }, { "epoch": 0.38, "learning_rate": 0.0006267051837586262, "loss": 0.0712, "theoretical_loss": 3.601877202774463, "tokens_seen": 1252524032 }, { "epoch": 0.38, "learning_rate": 0.0006266249398170438, "loss": 0.0727, "theoretical_loss": 3.601808969168573, "tokens_seen": 1252786176 }, { "epoch": 0.38, "learning_rate": 0.0006265446958754613, "loss": 0.0721, "theoretical_loss": 3.60174075383582, "tokens_seen": 1253048320 }, { "epoch": 0.38, "learning_rate": 0.0006264644519338791, "loss": 0.0753, "theoretical_loss": 3.6016725567674883, "tokens_seen": 1253310464 }, { "epoch": 0.38, "learning_rate": 0.0006263842079922966, "loss": 0.0728, "theoretical_loss": 3.60160437795487, "tokens_seen": 1253572608 }, { "epoch": 0.38, "learning_rate": 0.0006263039640507142, "loss": 0.073, "theoretical_loss": 3.6015362173892624, "tokens_seen": 1253834752 }, { "epoch": 0.38, "learning_rate": 0.0006262237201091318, "loss": 0.0745, "theoretical_loss": 3.6014680750619674, "tokens_seen": 1254096896 }, { "epoch": 0.38, "learning_rate": 0.0006261434761675494, "loss": 0.0726, "theoretical_loss": 3.6013999509642947, "tokens_seen": 1254359040 }, { "epoch": 0.38, "learning_rate": 0.0006260632322259669, "loss": 0.0723, "theoretical_loss": 3.601331845087559, "tokens_seen": 1254621184 }, { "epoch": 0.38, "learning_rate": 0.0006259829882843845, "loss": 0.073, "theoretical_loss": 3.601263757423082, "tokens_seen": 1254883328 }, { "epoch": 0.38, "learning_rate": 0.0006259027443428021, "loss": 0.073, "theoretical_loss": 3.601195687962189, "tokens_seen": 1255145472 }, { "epoch": 0.38, "learning_rate": 0.0006258225004012196, "loss": 0.0718, "theoretical_loss": 3.601127636696214, "tokens_seen": 1255407616 }, { "epoch": 0.38, "learning_rate": 0.0006257422564596374, "loss": 0.0725, "theoretical_loss": 3.6010596036164957, "tokens_seen": 1255669760 }, { "epoch": 0.38, "learning_rate": 0.0006256620125180549, "loss": 0.0724, "theoretical_loss": 3.600991588714378, "tokens_seen": 1255931904 }, { "epoch": 0.38, "learning_rate": 0.0006255817685764725, "loss": 0.0705, "theoretical_loss": 3.6009235919812124, "tokens_seen": 1256194048 }, { "epoch": 0.38, "learning_rate": 0.0006255015246348901, "loss": 0.0717, "theoretical_loss": 3.6008556134083545, "tokens_seen": 1256456192 }, { "epoch": 0.38, "learning_rate": 0.0006254212806933076, "loss": 0.0733, "theoretical_loss": 3.600787652987167, "tokens_seen": 1256718336 }, { "epoch": 0.38, "learning_rate": 0.0006253410367517252, "loss": 0.0717, "theoretical_loss": 3.6007197107090185, "tokens_seen": 1256980480 }, { "epoch": 0.38, "learning_rate": 0.0006252607928101428, "loss": 0.0736, "theoretical_loss": 3.600651786565283, "tokens_seen": 1257242624 }, { "epoch": 0.38, "learning_rate": 0.0006251805488685604, "loss": 0.0718, "theoretical_loss": 3.6005838805473402, "tokens_seen": 1257504768 }, { "epoch": 0.38, "learning_rate": 0.000625100304926978, "loss": 0.0717, "theoretical_loss": 3.600515992646577, "tokens_seen": 1257766912 }, { "epoch": 0.38, "learning_rate": 0.0006250200609853957, "loss": 0.0719, "theoretical_loss": 3.6004481228543854, "tokens_seen": 1258029056 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.0012113068951293826, "objective/train/docs_used": 460354, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4824901819229126, "objective/train/original_loss": 1.482490062713623, "objective/train/theoretical_loss": 3.600380271162162, "objective/train/tokens_used": 1278751200, "objective/train/value_avg": -0.01016998291015625, "objective/train/value_loss": 0.00046331010526046157, "objective/train/value_max": -0.00010889768600463867, "objective/train/value_min": -0.97119140625, "objective/train/value_reward_corr": 0.7243134544742209, "objective/train/value_std": 0.0211181640625, "objective/train/weight_avg": 1.0014119148254395, "objective/train/weighted_lm_loss": 1.484576940536499, "objective/train/weights_max": 1.8089985847473145, "objective/train/weights_min": 0.36829686164855957, "theoretical_loss": 3.600380271162162, "tokens_seen": 1258291200 }, { "epoch": 0.38, "learning_rate": 0.0006249398170438132, "loss": 0.0709, "theoretical_loss": 3.600380271162162, "tokens_seen": 1258291200 }, { "epoch": 0.38, "learning_rate": 0.0006248595731022309, "loss": 0.0693, "theoretical_loss": 3.6003124375613114, "tokens_seen": 1258553344 }, { "epoch": 0.38, "learning_rate": 0.0006247793291606484, "loss": 0.0746, "theoretical_loss": 3.6002446220432427, "tokens_seen": 1258815488 }, { "epoch": 0.38, "learning_rate": 0.0006246990852190659, "loss": 0.074, "theoretical_loss": 3.6001768245993713, "tokens_seen": 1259077632 }, { "epoch": 0.38, "learning_rate": 0.0006246188412774836, "loss": 0.0731, "theoretical_loss": 3.600109045221119, "tokens_seen": 1259339776 }, { "epoch": 0.38, "learning_rate": 0.0006245385973359011, "loss": 0.0749, "theoretical_loss": 3.600041283899912, "tokens_seen": 1259601920 }, { "epoch": 0.38, "learning_rate": 0.0006244583533943187, "loss": 0.0713, "theoretical_loss": 3.599973540627184, "tokens_seen": 1259864064 }, { "epoch": 0.38, "learning_rate": 0.0006243781094527363, "loss": 0.0742, "theoretical_loss": 3.599905815394374, "tokens_seen": 1260126208 }, { "epoch": 0.38, "learning_rate": 0.000624297865511154, "loss": 0.0736, "theoretical_loss": 3.599838108192926, "tokens_seen": 1260388352 }, { "epoch": 0.38, "learning_rate": 0.0006242176215695715, "loss": 0.0724, "theoretical_loss": 3.5997704190142907, "tokens_seen": 1260650496 }, { "epoch": 0.38, "learning_rate": 0.0006241373776279891, "loss": 0.0741, "theoretical_loss": 3.599702747849925, "tokens_seen": 1260912640 }, { "epoch": 0.38, "learning_rate": 0.0006240571336864067, "loss": 0.073, "theoretical_loss": 3.5996350946912896, "tokens_seen": 1261174784 }, { "epoch": 0.38, "learning_rate": 0.0006239768897448243, "loss": 0.0715, "theoretical_loss": 3.5995674595298537, "tokens_seen": 1261436928 }, { "epoch": 0.38, "learning_rate": 0.0006238966458032419, "loss": 0.072, "theoretical_loss": 3.5994998423570914, "tokens_seen": 1261699072 }, { "epoch": 0.38, "learning_rate": 0.0006238164018616594, "loss": 0.0714, "theoretical_loss": 3.599432243164481, "tokens_seen": 1261961216 }, { "epoch": 0.38, "learning_rate": 0.0006237361579200771, "loss": 0.0713, "theoretical_loss": 3.599364661943509, "tokens_seen": 1262223360 }, { "epoch": 0.38, "learning_rate": 0.0006236559139784946, "loss": 0.0726, "theoretical_loss": 3.599297098685666, "tokens_seen": 1262485504 }, { "epoch": 0.38, "learning_rate": 0.0006235756700369121, "loss": 0.071, "theoretical_loss": 3.599229553382449, "tokens_seen": 1262747648 }, { "epoch": 0.38, "learning_rate": 0.0006234954260953299, "loss": 0.0725, "theoretical_loss": 3.599162026025361, "tokens_seen": 1263009792 }, { "epoch": 0.38, "learning_rate": 0.0006234151821537474, "loss": 0.0705, "theoretical_loss": 3.599094516605911, "tokens_seen": 1263271936 }, { "epoch": 0.38, "learning_rate": 0.000623334938212165, "loss": 0.0738, "theoretical_loss": 3.5990270251156122, "tokens_seen": 1263534080 }, { "epoch": 0.38, "learning_rate": 0.0006232546942705826, "loss": 0.0746, "theoretical_loss": 3.5989595515459856, "tokens_seen": 1263796224 }, { "epoch": 0.38, "learning_rate": 0.0006231744503290002, "loss": 0.0743, "theoretical_loss": 3.598892095888557, "tokens_seen": 1264058368 }, { "epoch": 0.38, "learning_rate": 0.0006230942063874177, "loss": 0.0716, "theoretical_loss": 3.5988246581348573, "tokens_seen": 1264320512 }, { "epoch": 0.38, "learning_rate": 0.0006230139624458353, "loss": 0.073, "theoretical_loss": 3.5987572382764252, "tokens_seen": 1264582656 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.0005436477949842811, "objective/train/docs_used": 462829, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.450318455696106, "objective/train/original_loss": 1.4503185749053955, "objective/train/theoretical_loss": 3.5986898363048025, "objective/train/tokens_used": 1285304800, "objective/train/value_avg": -0.007289886474609375, "objective/train/value_loss": 0.00032011562143452466, "objective/train/value_max": -8.285045623779297e-05, "objective/train/value_min": -0.841796875, "objective/train/value_reward_corr": 0.5236847438293369, "objective/train/value_std": 0.011871337890625, "objective/train/weight_avg": 1.0006617307662964, "objective/train/weighted_lm_loss": 1.4513938426971436, "objective/train/weights_max": 1.2407280206680298, "objective/train/weights_min": 0.05488771200180054, "theoretical_loss": 3.5986898363048025, "tokens_seen": 1264844800 }, { "epoch": 0.38, "learning_rate": 0.0006229337185042529, "loss": 0.0724, "theoretical_loss": 3.5986898363048025, "tokens_seen": 1264844800 }, { "epoch": 0.38, "learning_rate": 0.0006228534745626704, "loss": 0.0713, "theoretical_loss": 3.5986224522115395, "tokens_seen": 1265106944 }, { "epoch": 0.38, "learning_rate": 0.0006227732306210882, "loss": 0.0704, "theoretical_loss": 3.59855508598819, "tokens_seen": 1265369088 }, { "epoch": 0.38, "learning_rate": 0.0006226929866795057, "loss": 0.0736, "theoretical_loss": 3.5984877376263142, "tokens_seen": 1265631232 }, { "epoch": 0.38, "learning_rate": 0.0006226127427379234, "loss": 0.0699, "theoretical_loss": 3.5984204071174783, "tokens_seen": 1265893376 }, { "epoch": 0.38, "learning_rate": 0.0006225324987963409, "loss": 0.0763, "theoretical_loss": 3.598353094453255, "tokens_seen": 1266155520 }, { "epoch": 0.38, "learning_rate": 0.0006224522548547584, "loss": 0.0724, "theoretical_loss": 3.598285799625221, "tokens_seen": 1266417664 }, { "epoch": 0.38, "learning_rate": 0.0006223720109131761, "loss": 0.072, "theoretical_loss": 3.5982185226249594, "tokens_seen": 1266679808 }, { "epoch": 0.38, "learning_rate": 0.0006222917669715936, "loss": 0.0718, "theoretical_loss": 3.5981512634440604, "tokens_seen": 1266941952 }, { "epoch": 0.38, "learning_rate": 0.0006222115230300112, "loss": 0.0725, "theoretical_loss": 3.5980840220741177, "tokens_seen": 1267204096 }, { "epoch": 0.38, "learning_rate": 0.0006221312790884289, "loss": 0.0748, "theoretical_loss": 3.5980167985067317, "tokens_seen": 1267466240 }, { "epoch": 0.38, "learning_rate": 0.0006220510351468465, "loss": 0.074, "theoretical_loss": 3.5979495927335092, "tokens_seen": 1267728384 }, { "epoch": 0.38, "learning_rate": 0.000621970791205264, "loss": 0.0727, "theoretical_loss": 3.5978824047460614, "tokens_seen": 1267990528 }, { "epoch": 0.38, "learning_rate": 0.0006218905472636816, "loss": 0.0732, "theoretical_loss": 3.5978152345360055, "tokens_seen": 1268252672 }, { "epoch": 0.38, "learning_rate": 0.0006218103033220992, "loss": 0.0723, "theoretical_loss": 3.5977480820949657, "tokens_seen": 1268514816 }, { "epoch": 0.38, "learning_rate": 0.0006217300593805167, "loss": 0.0741, "theoretical_loss": 3.5976809474145703, "tokens_seen": 1268776960 }, { "epoch": 0.38, "learning_rate": 0.0006216498154389344, "loss": 0.073, "theoretical_loss": 3.597613830486454, "tokens_seen": 1269039104 }, { "epoch": 0.38, "learning_rate": 0.0006215695714973519, "loss": 0.0717, "theoretical_loss": 3.5975467313022564, "tokens_seen": 1269301248 }, { "epoch": 0.38, "learning_rate": 0.0006214893275557696, "loss": 0.0734, "theoretical_loss": 3.5974796498536237, "tokens_seen": 1269563392 }, { "epoch": 0.38, "learning_rate": 0.0006214090836141871, "loss": 0.0708, "theoretical_loss": 3.597412586132208, "tokens_seen": 1269825536 }, { "epoch": 0.38, "learning_rate": 0.0006213288396726048, "loss": 0.0728, "theoretical_loss": 3.5973455401296652, "tokens_seen": 1270087680 }, { "epoch": 0.38, "learning_rate": 0.0006212485957310224, "loss": 0.0698, "theoretical_loss": 3.597278511837659, "tokens_seen": 1270349824 }, { "epoch": 0.39, "learning_rate": 0.0006211683517894399, "loss": 0.0713, "theoretical_loss": 3.597211501247858, "tokens_seen": 1270611968 }, { "epoch": 0.39, "learning_rate": 0.0006210881078478575, "loss": 0.07, "theoretical_loss": 3.5971445083519358, "tokens_seen": 1270874112 }, { "epoch": 0.39, "learning_rate": 0.0006210078639062751, "loss": 0.0732, "theoretical_loss": 3.5970775331415723, "tokens_seen": 1271136256 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.0007621328113600612, "objective/train/docs_used": 465264, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4400675296783447, "objective/train/original_loss": 1.4400676488876343, "objective/train/theoretical_loss": 3.5970105756084525, "objective/train/tokens_used": 1291858400, "objective/train/value_avg": -0.007965087890625, "objective/train/value_loss": 0.00021634330914821476, "objective/train/value_max": -9.101629257202148e-05, "objective/train/value_min": -0.335205078125, "objective/train/value_reward_corr": 0.6737940618129363, "objective/train/value_std": 0.0167388916015625, "objective/train/weight_avg": 1.0008677244186401, "objective/train/weighted_lm_loss": 1.4404685497283936, "objective/train/weights_max": 1.2927756309509277, "objective/train/weights_min": 0.37808898091316223, "theoretical_loss": 3.5970105756084525, "tokens_seen": 1271398400 }, { "epoch": 0.39, "learning_rate": 0.0006209276199646927, "loss": 0.0703, "theoretical_loss": 3.5970105756084525, "tokens_seen": 1271398400 }, { "epoch": 0.39, "learning_rate": 0.0006208473760231102, "loss": 0.0727, "theoretical_loss": 3.5969436357442683, "tokens_seen": 1271660544 }, { "epoch": 0.39, "learning_rate": 0.0006207671320815279, "loss": 0.0719, "theoretical_loss": 3.5968767135407154, "tokens_seen": 1271922688 }, { "epoch": 0.39, "learning_rate": 0.0006206868881399454, "loss": 0.0712, "theoretical_loss": 3.596809808989496, "tokens_seen": 1272184832 }, { "epoch": 0.39, "learning_rate": 0.000620606644198363, "loss": 0.0727, "theoretical_loss": 3.596742922082319, "tokens_seen": 1272446976 }, { "epoch": 0.39, "learning_rate": 0.0006205264002567807, "loss": 0.0704, "theoretical_loss": 3.5966760528108965, "tokens_seen": 1272709120 }, { "epoch": 0.39, "learning_rate": 0.0006204461563151982, "loss": 0.0725, "theoretical_loss": 3.596609201166948, "tokens_seen": 1272971264 }, { "epoch": 0.39, "learning_rate": 0.0006203659123736158, "loss": 0.0726, "theoretical_loss": 3.596542367142198, "tokens_seen": 1273233408 }, { "epoch": 0.39, "learning_rate": 0.0006202856684320334, "loss": 0.0725, "theoretical_loss": 3.5964755507283774, "tokens_seen": 1273495552 }, { "epoch": 0.39, "learning_rate": 0.000620205424490451, "loss": 0.0737, "theoretical_loss": 3.5964087519172203, "tokens_seen": 1273757696 }, { "epoch": 0.39, "learning_rate": 0.0006201251805488686, "loss": 0.0744, "theoretical_loss": 3.59634197070047, "tokens_seen": 1274019840 }, { "epoch": 0.39, "learning_rate": 0.0006200449366072861, "loss": 0.075, "theoretical_loss": 3.5962752070698727, "tokens_seen": 1274281984 }, { "epoch": 0.39, "learning_rate": 0.0006199646926657037, "loss": 0.0693, "theoretical_loss": 3.59620846101718, "tokens_seen": 1274544128 }, { "epoch": 0.39, "learning_rate": 0.0006198844487241214, "loss": 0.0726, "theoretical_loss": 3.596141732534151, "tokens_seen": 1274806272 }, { "epoch": 0.39, "learning_rate": 0.000619804204782539, "loss": 0.0749, "theoretical_loss": 3.596075021612549, "tokens_seen": 1275068416 }, { "epoch": 0.39, "learning_rate": 0.0006197239608409565, "loss": 0.0739, "theoretical_loss": 3.5960083282441433, "tokens_seen": 1275330560 }, { "epoch": 0.39, "learning_rate": 0.0006196437168993742, "loss": 0.0741, "theoretical_loss": 3.5959416524207084, "tokens_seen": 1275592704 }, { "epoch": 0.39, "learning_rate": 0.0006195634729577917, "loss": 0.0732, "theoretical_loss": 3.5958749941340242, "tokens_seen": 1275854848 }, { "epoch": 0.39, "learning_rate": 0.0006194832290162092, "loss": 0.0735, "theoretical_loss": 3.595808353375877, "tokens_seen": 1276116992 }, { "epoch": 0.39, "learning_rate": 0.0006194029850746269, "loss": 0.0723, "theoretical_loss": 3.5957417301380588, "tokens_seen": 1276379136 }, { "epoch": 0.39, "learning_rate": 0.0006193227411330444, "loss": 0.0735, "theoretical_loss": 3.595675124412365, "tokens_seen": 1276641280 }, { "epoch": 0.39, "learning_rate": 0.000619242497191462, "loss": 0.0762, "theoretical_loss": 3.595608536190599, "tokens_seen": 1276903424 }, { "epoch": 0.39, "learning_rate": 0.0006191622532498797, "loss": 0.0724, "theoretical_loss": 3.595541965464568, "tokens_seen": 1277165568 }, { "epoch": 0.39, "learning_rate": 0.0006190820093082973, "loss": 0.0702, "theoretical_loss": 3.5954754122260866, "tokens_seen": 1277427712 }, { "epoch": 0.39, "learning_rate": 0.0006190017653667148, "loss": 0.0739, "theoretical_loss": 3.595408876466972, "tokens_seen": 1277689856 }, { "epoch": 0.39, "objective/train/advantage_avg": -0.0006468091160058975, "objective/train/docs_used": 467639, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4236520528793335, "objective/train/original_loss": 1.423651933670044, "objective/train/theoretical_loss": 3.59534235817905, "objective/train/tokens_used": 1298412000, "objective/train/value_avg": -0.0084686279296875, "objective/train/value_loss": 0.0006970210233703256, "objective/train/value_max": -8.547306060791016e-05, "objective/train/value_min": -0.8544921875, "objective/train/value_reward_corr": 0.7078890831361996, "objective/train/value_std": 0.022552490234375, "objective/train/weight_avg": 0.9996680021286011, "objective/train/weighted_lm_loss": 1.4211854934692383, "objective/train/weights_max": 2.232908010482788, "objective/train/weights_min": 0.36951375007629395, "theoretical_loss": 3.59534235817905, "tokens_seen": 1277952000 }, { "epoch": 0.39, "learning_rate": 0.0006189215214251324, "loss": 0.0727, "theoretical_loss": 3.59534235817905, "tokens_seen": 1277952000 }, { "epoch": 0.39, "learning_rate": 0.00061884127748355, "loss": 0.0732, "theoretical_loss": 3.5952758573541503, "tokens_seen": 1278214144 }, { "epoch": 0.39, "learning_rate": 0.0006187610335419676, "loss": 0.0722, "theoretical_loss": 3.5952093739841073, "tokens_seen": 1278476288 }, { "epoch": 0.39, "learning_rate": 0.0006186807896003852, "loss": 0.0743, "theoretical_loss": 3.5951429080607635, "tokens_seen": 1278738432 }, { "epoch": 0.39, "learning_rate": 0.0006186005456588027, "loss": 0.0717, "theoretical_loss": 3.5950764595759646, "tokens_seen": 1279000576 }, { "epoch": 0.39, "learning_rate": 0.0006185203017172204, "loss": 0.0725, "theoretical_loss": 3.5950100285215623, "tokens_seen": 1279262720 }, { "epoch": 0.39, "learning_rate": 0.000618440057775638, "loss": 0.0704, "theoretical_loss": 3.594943614889414, "tokens_seen": 1279524864 }, { "epoch": 0.39, "learning_rate": 0.0006183598138340556, "loss": 0.0745, "theoretical_loss": 3.5948772186713827, "tokens_seen": 1279787008 }, { "epoch": 0.39, "learning_rate": 0.0006182795698924732, "loss": 0.0727, "theoretical_loss": 3.5948108398593357, "tokens_seen": 1280049152 }, { "epoch": 0.39, "learning_rate": 0.0006181993259508907, "loss": 0.0735, "theoretical_loss": 3.5947444784451488, "tokens_seen": 1280311296 }, { "epoch": 0.39, "learning_rate": 0.0006181190820093083, "loss": 0.0715, "theoretical_loss": 3.5946781344206995, "tokens_seen": 1280573440 }, { "epoch": 0.39, "learning_rate": 0.0006180388380677259, "loss": 0.0745, "theoretical_loss": 3.5946118077778735, "tokens_seen": 1280835584 }, { "epoch": 0.39, "learning_rate": 0.0006179585941261435, "loss": 0.0758, "theoretical_loss": 3.59454549850856, "tokens_seen": 1281097728 }, { "epoch": 0.39, "learning_rate": 0.000617878350184561, "loss": 0.0736, "theoretical_loss": 3.594479206604655, "tokens_seen": 1281359872 }, { "epoch": 0.39, "learning_rate": 0.0006177981062429787, "loss": 0.0746, "theoretical_loss": 3.59441293205806, "tokens_seen": 1281622016 }, { "epoch": 0.39, "learning_rate": 0.0006177178623013962, "loss": 0.0724, "theoretical_loss": 3.59434667486068, "tokens_seen": 1281884160 }, { "epoch": 0.39, "learning_rate": 0.0006176376183598139, "loss": 0.0726, "theoretical_loss": 3.5942804350044284, "tokens_seen": 1282146304 }, { "epoch": 0.39, "learning_rate": 0.0006175573744182315, "loss": 0.0723, "theoretical_loss": 3.594214212481222, "tokens_seen": 1282408448 }, { "epoch": 0.39, "learning_rate": 0.000617477130476649, "loss": 0.07, "theoretical_loss": 3.594148007282983, "tokens_seen": 1282670592 }, { "epoch": 0.39, "learning_rate": 0.0006173968865350667, "loss": 0.0673, "theoretical_loss": 3.59408181940164, "tokens_seen": 1282932736 }, { "epoch": 0.39, "learning_rate": 0.0006173166425934842, "loss": 0.0711, "theoretical_loss": 3.5940156488291266, "tokens_seen": 1283194880 }, { "epoch": 0.39, "learning_rate": 0.0006172363986519018, "loss": 0.0704, "theoretical_loss": 3.593949495557381, "tokens_seen": 1283457024 }, { "epoch": 0.39, "learning_rate": 0.0006171561547103194, "loss": 0.0721, "theoretical_loss": 3.5938833595783484, "tokens_seen": 1283719168 }, { "epoch": 0.39, "learning_rate": 0.0006170759107687369, "loss": 0.0723, "theoretical_loss": 3.593817240883978, "tokens_seen": 1283981312 }, { "epoch": 0.39, "learning_rate": 0.0006169956668271545, "loss": 0.072, "theoretical_loss": 3.5937511394662254, "tokens_seen": 1284243456 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.0013448818353936076, "objective/train/docs_used": 470107, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3530259132385254, "objective/train/original_loss": 1.3530259132385254, "objective/train/theoretical_loss": 3.5936850553170503, "objective/train/tokens_used": 1304965600, "objective/train/value_avg": -0.0072174072265625, "objective/train/value_loss": 0.00014462636318057775, "objective/train/value_max": -8.887052536010742e-05, "objective/train/value_min": -0.44189453125, "objective/train/value_reward_corr": 0.6305170353978925, "objective/train/value_std": 0.011138916015625, "objective/train/weight_avg": 1.0014116764068604, "objective/train/weighted_lm_loss": 1.3553401231765747, "objective/train/weights_max": 1.2892757654190063, "objective/train/weights_min": 0.39345037937164307, "theoretical_loss": 3.5936850553170503, "tokens_seen": 1284505600 }, { "epoch": 0.39, "learning_rate": 0.0006169154228855722, "loss": 0.0713, "theoretical_loss": 3.5936850553170503, "tokens_seen": 1284505600 }, { "epoch": 0.39, "learning_rate": 0.0006168351789439898, "loss": 0.0734, "theoretical_loss": 3.593618988428419, "tokens_seen": 1284767744 }, { "epoch": 0.39, "learning_rate": 0.0006167549350024073, "loss": 0.07, "theoretical_loss": 3.593552938792303, "tokens_seen": 1285029888 }, { "epoch": 0.39, "learning_rate": 0.000616674691060825, "loss": 0.0716, "theoretical_loss": 3.5934869064006785, "tokens_seen": 1285292032 }, { "epoch": 0.39, "learning_rate": 0.0006165944471192425, "loss": 0.0734, "theoretical_loss": 3.593420891245527, "tokens_seen": 1285554176 }, { "epoch": 0.39, "learning_rate": 0.00061651420317766, "loss": 0.0748, "theoretical_loss": 3.5933548933188377, "tokens_seen": 1285816320 }, { "epoch": 0.39, "learning_rate": 0.0006164339592360777, "loss": 0.0713, "theoretical_loss": 3.5932889126126017, "tokens_seen": 1286078464 }, { "epoch": 0.39, "learning_rate": 0.0006163537152944952, "loss": 0.0738, "theoretical_loss": 3.5932229491188172, "tokens_seen": 1286340608 }, { "epoch": 0.39, "learning_rate": 0.0006162734713529129, "loss": 0.0708, "theoretical_loss": 3.5931570028294884, "tokens_seen": 1286602752 }, { "epoch": 0.39, "learning_rate": 0.0006161932274113304, "loss": 0.0719, "theoretical_loss": 3.5930910737366233, "tokens_seen": 1286864896 }, { "epoch": 0.39, "learning_rate": 0.0006161129834697481, "loss": 0.0731, "theoretical_loss": 3.5930251618322364, "tokens_seen": 1287127040 }, { "epoch": 0.39, "learning_rate": 0.0006160327395281657, "loss": 0.072, "theoretical_loss": 3.5929592671083466, "tokens_seen": 1287389184 }, { "epoch": 0.39, "learning_rate": 0.0006159524955865832, "loss": 0.0716, "theoretical_loss": 3.5928933895569792, "tokens_seen": 1287651328 }, { "epoch": 0.39, "learning_rate": 0.0006158722516450008, "loss": 0.07, "theoretical_loss": 3.5928275291701643, "tokens_seen": 1287913472 }, { "epoch": 0.39, "learning_rate": 0.0006157920077034184, "loss": 0.071, "theoretical_loss": 3.592761685939937, "tokens_seen": 1288175616 }, { "epoch": 0.39, "learning_rate": 0.000615711763761836, "loss": 0.0717, "theoretical_loss": 3.5926958598583383, "tokens_seen": 1288437760 }, { "epoch": 0.39, "learning_rate": 0.0006156315198202535, "loss": 0.0692, "theoretical_loss": 3.5926300509174136, "tokens_seen": 1288699904 }, { "epoch": 0.39, "learning_rate": 0.0006155512758786712, "loss": 0.0711, "theoretical_loss": 3.5925642591092153, "tokens_seen": 1288962048 }, { "epoch": 0.39, "learning_rate": 0.0006154710319370887, "loss": 0.0694, "theoretical_loss": 3.592498484425799, "tokens_seen": 1289224192 }, { "epoch": 0.39, "learning_rate": 0.0006153907879955064, "loss": 0.0737, "theoretical_loss": 3.5924327268592267, "tokens_seen": 1289486336 }, { "epoch": 0.39, "learning_rate": 0.000615310544053924, "loss": 0.0696, "theoretical_loss": 3.5923669864015664, "tokens_seen": 1289748480 }, { "epoch": 0.39, "learning_rate": 0.0006152303001123415, "loss": 0.0699, "theoretical_loss": 3.59230126304489, "tokens_seen": 1290010624 }, { "epoch": 0.39, "learning_rate": 0.0006151500561707592, "loss": 0.0732, "theoretical_loss": 3.592235556781276, "tokens_seen": 1290272768 }, { "epoch": 0.39, "learning_rate": 0.0006150698122291767, "loss": 0.0705, "theoretical_loss": 3.5921698676028058, "tokens_seen": 1290534912 }, { "epoch": 0.39, "learning_rate": 0.0006149895682875943, "loss": 0.0684, "theoretical_loss": 3.59210419550157, "tokens_seen": 1290797056 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.0011514985235407948, "objective/train/docs_used": 472355, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4526315927505493, "objective/train/original_loss": 1.4526317119598389, "objective/train/theoretical_loss": 3.5920385404696606, "objective/train/tokens_used": 1311519200, "objective/train/value_avg": -0.007213592529296875, "objective/train/value_loss": 0.00013709472841583192, "objective/train/value_max": -7.545948028564453e-05, "objective/train/value_min": -0.9716796875, "objective/train/value_reward_corr": 0.8126945195500097, "objective/train/value_std": 0.01486968994140625, "objective/train/weight_avg": 1.0012160539627075, "objective/train/weighted_lm_loss": 1.4547836780548096, "objective/train/weights_max": 1.2823024988174438, "objective/train/weights_min": 0.5326542258262634, "theoretical_loss": 3.5920385404696606, "tokens_seen": 1291059200 }, { "epoch": 0.39, "learning_rate": 0.0006149093243460119, "loss": 0.0722, "theoretical_loss": 3.5920385404696606, "tokens_seen": 1291059200 }, { "epoch": 0.39, "learning_rate": 0.0006148290804044295, "loss": 0.0715, "theoretical_loss": 3.591972902499177, "tokens_seen": 1291321344 }, { "epoch": 0.39, "learning_rate": 0.000614748836462847, "loss": 0.0714, "theoretical_loss": 3.5919072815822233, "tokens_seen": 1291583488 }, { "epoch": 0.39, "learning_rate": 0.0006146685925212647, "loss": 0.0713, "theoretical_loss": 3.5918416777109092, "tokens_seen": 1291845632 }, { "epoch": 0.39, "learning_rate": 0.0006145883485796823, "loss": 0.0698, "theoretical_loss": 3.591776090877349, "tokens_seen": 1292107776 }, { "epoch": 0.39, "learning_rate": 0.0006145081046380998, "loss": 0.0753, "theoretical_loss": 3.5917105210736624, "tokens_seen": 1292369920 }, { "epoch": 0.39, "learning_rate": 0.0006144278606965175, "loss": 0.0718, "theoretical_loss": 3.591644968291975, "tokens_seen": 1292632064 }, { "epoch": 0.39, "learning_rate": 0.000614347616754935, "loss": 0.0707, "theoretical_loss": 3.5915794325244175, "tokens_seen": 1292894208 }, { "epoch": 0.39, "learning_rate": 0.0006142673728133526, "loss": 0.0722, "theoretical_loss": 3.5915139137631242, "tokens_seen": 1293156352 }, { "epoch": 0.39, "learning_rate": 0.0006141871288717702, "loss": 0.07, "theoretical_loss": 3.5914484120002372, "tokens_seen": 1293418496 }, { "epoch": 0.39, "learning_rate": 0.0006141068849301877, "loss": 0.0718, "theoretical_loss": 3.591382927227902, "tokens_seen": 1293680640 }, { "epoch": 0.39, "learning_rate": 0.0006140266409886053, "loss": 0.0747, "theoretical_loss": 3.5913174594382693, "tokens_seen": 1293942784 }, { "epoch": 0.39, "learning_rate": 0.000613946397047023, "loss": 0.0725, "theoretical_loss": 3.5912520086234974, "tokens_seen": 1294204928 }, { "epoch": 0.39, "learning_rate": 0.0006138661531054406, "loss": 0.0733, "theoretical_loss": 3.5911865747757457, "tokens_seen": 1294467072 }, { "epoch": 0.39, "learning_rate": 0.0006137859091638582, "loss": 0.0715, "theoretical_loss": 3.5911211578871827, "tokens_seen": 1294729216 }, { "epoch": 0.39, "learning_rate": 0.0006137056652222758, "loss": 0.0701, "theoretical_loss": 3.591055757949981, "tokens_seen": 1294991360 }, { "epoch": 0.39, "learning_rate": 0.0006136254212806933, "loss": 0.0739, "theoretical_loss": 3.590990374956316, "tokens_seen": 1295253504 }, { "epoch": 0.39, "learning_rate": 0.0006135451773391109, "loss": 0.0739, "theoretical_loss": 3.5909250088983713, "tokens_seen": 1295515648 }, { "epoch": 0.39, "learning_rate": 0.0006134649333975285, "loss": 0.074, "theoretical_loss": 3.5908596597683347, "tokens_seen": 1295777792 }, { "epoch": 0.39, "learning_rate": 0.000613384689455946, "loss": 0.0713, "theoretical_loss": 3.590794327558399, "tokens_seen": 1296039936 }, { "epoch": 0.39, "learning_rate": 0.0006133044455143637, "loss": 0.0697, "theoretical_loss": 3.590729012260762, "tokens_seen": 1296302080 }, { "epoch": 0.39, "learning_rate": 0.0006132242015727812, "loss": 0.0739, "theoretical_loss": 3.5906637138676265, "tokens_seen": 1296564224 }, { "epoch": 0.39, "learning_rate": 0.0006131439576311989, "loss": 0.0708, "theoretical_loss": 3.590598432371202, "tokens_seen": 1296826368 }, { "epoch": 0.39, "learning_rate": 0.0006130637136896165, "loss": 0.0705, "theoretical_loss": 3.5905331677637013, "tokens_seen": 1297088512 }, { "epoch": 0.39, "learning_rate": 0.000612983469748034, "loss": 0.0712, "theoretical_loss": 3.5904679200373435, "tokens_seen": 1297350656 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.0005244042840786278, "objective/train/docs_used": 474727, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4231724739074707, "objective/train/original_loss": 1.4231724739074707, "objective/train/theoretical_loss": 3.5904026891843523, "objective/train/tokens_used": 1318072800, "objective/train/value_avg": -0.006511688232421875, "objective/train/value_loss": 0.00018917610577773303, "objective/train/value_max": -5.1856040954589844e-05, "objective/train/value_min": -0.394287109375, "objective/train/value_reward_corr": 0.6175634721862767, "objective/train/value_std": 0.0108795166015625, "objective/train/weight_avg": 1.000605583190918, "objective/train/weighted_lm_loss": 1.4241219758987427, "objective/train/weights_max": 1.4251643419265747, "objective/train/weights_min": 0.23235876858234406, "theoretical_loss": 3.5904026891843523, "tokens_seen": 1297612800 }, { "epoch": 0.39, "learning_rate": 0.0006129032258064516, "loss": 0.0701, "theoretical_loss": 3.5904026891843523, "tokens_seen": 1297612800 }, { "epoch": 0.39, "learning_rate": 0.0006128229818648692, "loss": 0.0683, "theoretical_loss": 3.5903374751969563, "tokens_seen": 1297874944 }, { "epoch": 0.39, "learning_rate": 0.0006127427379232868, "loss": 0.0687, "theoretical_loss": 3.59027227806739, "tokens_seen": 1298137088 }, { "epoch": 0.39, "learning_rate": 0.0006126624939817043, "loss": 0.0709, "theoretical_loss": 3.5902070977878937, "tokens_seen": 1298399232 }, { "epoch": 0.39, "learning_rate": 0.000612582250040122, "loss": 0.07, "theoretical_loss": 3.5901419343507106, "tokens_seen": 1298661376 }, { "epoch": 0.39, "learning_rate": 0.0006125020060985395, "loss": 0.0704, "theoretical_loss": 3.5900767877480906, "tokens_seen": 1298923520 }, { "epoch": 0.39, "learning_rate": 0.0006124217621569572, "loss": 0.0695, "theoretical_loss": 3.5900116579722883, "tokens_seen": 1299185664 }, { "epoch": 0.39, "learning_rate": 0.0006123415182153748, "loss": 0.0715, "theoretical_loss": 3.5899465450155637, "tokens_seen": 1299447808 }, { "epoch": 0.39, "learning_rate": 0.0006122612742737923, "loss": 0.0724, "theoretical_loss": 3.589881448870182, "tokens_seen": 1299709952 }, { "epoch": 0.39, "learning_rate": 0.00061218103033221, "loss": 0.0722, "theoretical_loss": 3.589816369528413, "tokens_seen": 1299972096 }, { "epoch": 0.39, "learning_rate": 0.0006121007863906275, "loss": 0.0702, "theoretical_loss": 3.5897513069825324, "tokens_seen": 1300234240 }, { "epoch": 0.39, "learning_rate": 0.0006120205424490451, "loss": 0.0719, "theoretical_loss": 3.589686261224819, "tokens_seen": 1300496384 }, { "epoch": 0.39, "learning_rate": 0.0006119402985074627, "loss": 0.0717, "theoretical_loss": 3.5896212322475605, "tokens_seen": 1300758528 }, { "epoch": 0.39, "learning_rate": 0.0006118600545658803, "loss": 0.0712, "theoretical_loss": 3.589556220043046, "tokens_seen": 1301020672 }, { "epoch": 0.39, "learning_rate": 0.0006117798106242978, "loss": 0.0715, "theoretical_loss": 3.589491224603571, "tokens_seen": 1301282816 }, { "epoch": 0.39, "learning_rate": 0.0006116995666827155, "loss": 0.0714, "theoretical_loss": 3.5894262459214366, "tokens_seen": 1301544960 }, { "epoch": 0.39, "learning_rate": 0.0006116193227411331, "loss": 0.0695, "theoretical_loss": 3.589361283988948, "tokens_seen": 1301807104 }, { "epoch": 0.39, "learning_rate": 0.0006115390787995506, "loss": 0.0711, "theoretical_loss": 3.589296338798418, "tokens_seen": 1302069248 }, { "epoch": 0.39, "learning_rate": 0.0006114588348579683, "loss": 0.0724, "theoretical_loss": 3.5892314103421596, "tokens_seen": 1302331392 }, { "epoch": 0.39, "learning_rate": 0.0006113785909163858, "loss": 0.0712, "theoretical_loss": 3.589166498612496, "tokens_seen": 1302593536 }, { "epoch": 0.39, "learning_rate": 0.0006112983469748035, "loss": 0.0718, "theoretical_loss": 3.589101603601752, "tokens_seen": 1302855680 }, { "epoch": 0.39, "learning_rate": 0.000611218103033221, "loss": 0.0731, "theoretical_loss": 3.58903672530226, "tokens_seen": 1303117824 }, { "epoch": 0.39, "learning_rate": 0.0006111378590916385, "loss": 0.0719, "theoretical_loss": 3.5889718637063552, "tokens_seen": 1303379968 }, { "epoch": 0.4, "learning_rate": 0.0006110576151500562, "loss": 0.0701, "theoretical_loss": 3.5889070188063794, "tokens_seen": 1303642112 }, { "epoch": 0.4, "learning_rate": 0.0006109773712084738, "loss": 0.0733, "theoretical_loss": 3.5888421905946783, "tokens_seen": 1303904256 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.0004786965437233448, "objective/train/docs_used": 476908, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.320482850074768, "objective/train/original_loss": 1.3204827308654785, "objective/train/theoretical_loss": 3.588777379063604, "objective/train/tokens_used": 1324626400, "objective/train/value_avg": -0.007488250732421875, "objective/train/value_loss": 0.0003243642277084291, "objective/train/value_max": -8.821487426757812e-05, "objective/train/value_min": -0.79052734375, "objective/train/value_reward_corr": 0.5708697386938653, "objective/train/value_std": 0.01364898681640625, "objective/train/weight_avg": 1.0006197690963745, "objective/train/weighted_lm_loss": 1.3216181993484497, "objective/train/weights_max": 1.9110832214355469, "objective/train/weights_min": 0.368977814912796, "theoretical_loss": 3.588777379063604, "tokens_seen": 1304166400 }, { "epoch": 0.4, "learning_rate": 0.0006108971272668914, "loss": 0.0702, "theoretical_loss": 3.588777379063604, "tokens_seen": 1304166400 }, { "epoch": 0.4, "learning_rate": 0.000610816883325309, "loss": 0.074, "theoretical_loss": 3.5887125842055116, "tokens_seen": 1304428544 }, { "epoch": 0.4, "learning_rate": 0.0006107366393837266, "loss": 0.07, "theoretical_loss": 3.588647806012765, "tokens_seen": 1304690688 }, { "epoch": 0.4, "learning_rate": 0.0006106563954421441, "loss": 0.0711, "theoretical_loss": 3.588583044477728, "tokens_seen": 1304952832 }, { "epoch": 0.4, "learning_rate": 0.0006105761515005617, "loss": 0.0741, "theoretical_loss": 3.5885182995927734, "tokens_seen": 1305214976 }, { "epoch": 0.4, "learning_rate": 0.0006104959075589793, "loss": 0.071, "theoretical_loss": 3.5884535713502776, "tokens_seen": 1305477120 }, { "epoch": 0.4, "learning_rate": 0.0006104156636173968, "loss": 0.07, "theoretical_loss": 3.588388859742622, "tokens_seen": 1305739264 }, { "epoch": 0.4, "learning_rate": 0.0006103354196758145, "loss": 0.0677, "theoretical_loss": 3.5883241647621933, "tokens_seen": 1306001408 }, { "epoch": 0.4, "learning_rate": 0.000610255175734232, "loss": 0.0699, "theoretical_loss": 3.588259486401383, "tokens_seen": 1306263552 }, { "epoch": 0.4, "learning_rate": 0.0006101749317926497, "loss": 0.0711, "theoretical_loss": 3.5881948246525877, "tokens_seen": 1306525696 }, { "epoch": 0.4, "learning_rate": 0.0006100946878510673, "loss": 0.071, "theoretical_loss": 3.588130179508209, "tokens_seen": 1306787840 }, { "epoch": 0.4, "learning_rate": 0.0006100144439094848, "loss": 0.0716, "theoretical_loss": 3.5880655509606534, "tokens_seen": 1307049984 }, { "epoch": 0.4, "learning_rate": 0.0006099341999679025, "loss": 0.0734, "theoretical_loss": 3.5880009390023324, "tokens_seen": 1307312128 }, { "epoch": 0.4, "learning_rate": 0.00060985395602632, "loss": 0.0694, "theoretical_loss": 3.5879363436256626, "tokens_seen": 1307574272 }, { "epoch": 0.4, "learning_rate": 0.0006097737120847376, "loss": 0.0722, "theoretical_loss": 3.587871764823066, "tokens_seen": 1307836416 }, { "epoch": 0.4, "learning_rate": 0.0006096934681431552, "loss": 0.073, "theoretical_loss": 3.5878072025869683, "tokens_seen": 1308098560 }, { "epoch": 0.4, "learning_rate": 0.0006096132242015728, "loss": 0.0697, "theoretical_loss": 3.5877426569098017, "tokens_seen": 1308360704 }, { "epoch": 0.4, "learning_rate": 0.0006095329802599903, "loss": 0.069, "theoretical_loss": 3.5876781277840024, "tokens_seen": 1308622848 }, { "epoch": 0.4, "learning_rate": 0.000609452736318408, "loss": 0.0712, "theoretical_loss": 3.5876136152020117, "tokens_seen": 1308884992 }, { "epoch": 0.4, "learning_rate": 0.0006093724923768256, "loss": 0.0715, "theoretical_loss": 3.587549119156276, "tokens_seen": 1309147136 }, { "epoch": 0.4, "learning_rate": 0.0006092922484352431, "loss": 0.0747, "theoretical_loss": 3.5874846396392472, "tokens_seen": 1309409280 }, { "epoch": 0.4, "learning_rate": 0.0006092120044936608, "loss": 0.0718, "theoretical_loss": 3.5874201766433815, "tokens_seen": 1309671424 }, { "epoch": 0.4, "learning_rate": 0.0006091317605520783, "loss": 0.0729, "theoretical_loss": 3.58735573016114, "tokens_seen": 1309933568 }, { "epoch": 0.4, "learning_rate": 0.0006090515166104959, "loss": 0.0722, "theoretical_loss": 3.5872913001849884, "tokens_seen": 1310195712 }, { "epoch": 0.4, "learning_rate": 0.0006089712726689135, "loss": 0.0694, "theoretical_loss": 3.5872268867073993, "tokens_seen": 1310457856 }, { "debugging/Compilability": 0.9473684210526315, "debugging/distinct-1-grams": 0.7438439469491407, "debugging/entropy-1-grams": 5.528104507223894, "debugging/length": 493.7894736842105, "debugging/num_segments": 19, "debugging/raw_token_scores_avg": 0.008907916024327278, "debugging/raw_token_scores_std": 0.03160863742232323, "debugging/score": 0.005095041850402146, "debugging/score_std": 0.005375833718357644, "epoch": 0.4, "objective/train/advantage_avg": 0.0013337053824216127, "objective/train/docs_used": 479375, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4477362632751465, "objective/train/original_loss": 1.4477365016937256, "objective/train/theoretical_loss": 3.587162489720847, "objective/train/tokens_used": 1331180000, "objective/train/value_avg": -0.0102386474609375, "objective/train/value_loss": 0.00042610923992469907, "objective/train/value_max": -0.00011235475540161133, "objective/train/value_min": -0.8984375, "objective/train/value_reward_corr": 0.7585738573996782, "objective/train/value_std": 0.0240631103515625, "objective/train/weight_avg": 1.0015311241149902, "objective/train/weighted_lm_loss": 1.4506300687789917, "objective/train/weights_max": 1.8746426105499268, "objective/train/weights_min": 0.3877526521682739, "theoretical_loss": 3.587162489720847, "tokens_seen": 1310720000 }, { "epoch": 0.4, "learning_rate": 0.0006088910287273311, "loss": 0.0695, "theoretical_loss": 3.587162489720847, "tokens_seen": 1310720000 }, { "epoch": 0.4, "learning_rate": 0.0006088107847857486, "loss": 0.0699, "theoretical_loss": 3.5870981092178136, "tokens_seen": 1310982144 }, { "epoch": 0.4, "learning_rate": 0.0006087305408441663, "loss": 0.0719, "theoretical_loss": 3.5870337451907854, "tokens_seen": 1311244288 }, { "epoch": 0.4, "learning_rate": 0.0006086502969025839, "loss": 0.0723, "theoretical_loss": 3.5869693976322523, "tokens_seen": 1311506432 }, { "epoch": 0.4, "learning_rate": 0.0006085700529610015, "loss": 0.0676, "theoretical_loss": 3.586905066534711, "tokens_seen": 1311768576 }, { "epoch": 0.4, "learning_rate": 0.0006084898090194191, "loss": 0.0664, "theoretical_loss": 3.5868407518906618, "tokens_seen": 1312030720 }, { "epoch": 0.4, "learning_rate": 0.0006084095650778366, "loss": 0.0729, "theoretical_loss": 3.586776453692611, "tokens_seen": 1312292864 }, { "epoch": 0.4, "learning_rate": 0.0006083293211362543, "loss": 0.0704, "theoretical_loss": 3.5867121719330677, "tokens_seen": 1312555008 }, { "epoch": 0.4, "learning_rate": 0.0006082490771946718, "loss": 0.0711, "theoretical_loss": 3.586647906604549, "tokens_seen": 1312817152 }, { "epoch": 0.4, "learning_rate": 0.0006081688332530893, "loss": 0.0709, "theoretical_loss": 3.5865836576995744, "tokens_seen": 1313079296 }, { "epoch": 0.4, "learning_rate": 0.000608088589311507, "loss": 0.0707, "theoretical_loss": 3.5865194252106694, "tokens_seen": 1313341440 }, { "epoch": 0.4, "learning_rate": 0.0006080083453699246, "loss": 0.0727, "theoretical_loss": 3.586455209130364, "tokens_seen": 1313603584 }, { "epoch": 0.4, "learning_rate": 0.0006079281014283422, "loss": 0.0739, "theoretical_loss": 3.5863910094511935, "tokens_seen": 1313865728 }, { "epoch": 0.4, "learning_rate": 0.0006078478574867598, "loss": 0.0712, "theoretical_loss": 3.586326826165698, "tokens_seen": 1314127872 }, { "epoch": 0.4, "learning_rate": 0.0006077676135451774, "loss": 0.0713, "theoretical_loss": 3.5862626592664215, "tokens_seen": 1314390016 }, { "epoch": 0.4, "learning_rate": 0.0006076873696035949, "loss": 0.0703, "theoretical_loss": 3.586198508745915, "tokens_seen": 1314652160 }, { "epoch": 0.4, "learning_rate": 0.0006076071256620125, "loss": 0.0705, "theoretical_loss": 3.586134374596732, "tokens_seen": 1314914304 }, { "epoch": 0.4, "learning_rate": 0.0006075268817204301, "loss": 0.0721, "theoretical_loss": 3.586070256811432, "tokens_seen": 1315176448 }, { "epoch": 0.4, "learning_rate": 0.0006074466377788477, "loss": 0.069, "theoretical_loss": 3.58600615538258, "tokens_seen": 1315438592 }, { "epoch": 0.4, "learning_rate": 0.0006073663938372653, "loss": 0.0738, "theoretical_loss": 3.5859420703027447, "tokens_seen": 1315700736 }, { "epoch": 0.4, "learning_rate": 0.0006072861498956828, "loss": 0.0701, "theoretical_loss": 3.5858780015644998, "tokens_seen": 1315962880 }, { "epoch": 0.4, "learning_rate": 0.0006072059059541006, "loss": 0.0751, "theoretical_loss": 3.585813949160425, "tokens_seen": 1316225024 }, { "epoch": 0.4, "learning_rate": 0.0006071256620125181, "loss": 0.0699, "theoretical_loss": 3.585749913083103, "tokens_seen": 1316487168 }, { "epoch": 0.4, "learning_rate": 0.0006070454180709356, "loss": 0.0754, "theoretical_loss": 3.5856858933251234, "tokens_seen": 1316749312 }, { "epoch": 0.4, "learning_rate": 0.0006069651741293533, "loss": 0.0727, "theoretical_loss": 3.5856218898790786, "tokens_seen": 1317011456 }, { "epoch": 0.4, "objective/train/advantage_avg": -0.0005380894290283322, "objective/train/docs_used": 481887, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4740736484527588, "objective/train/original_loss": 1.4740736484527588, "objective/train/theoretical_loss": 3.585557902737568, "objective/train/tokens_used": 1337733600, "objective/train/value_avg": -0.009063720703125, "objective/train/value_loss": 0.00029063946567475796, "objective/train/value_max": -6.014108657836914e-05, "objective/train/value_min": -0.982421875, "objective/train/value_reward_corr": 0.906233165609036, "objective/train/value_std": 0.030059814453125, "objective/train/weight_avg": 0.9995934963226318, "objective/train/weighted_lm_loss": 1.4738991260528564, "objective/train/weights_max": 1.3234916925430298, "objective/train/weights_min": 0.37601205706596375, "theoretical_loss": 3.585557902737568, "tokens_seen": 1317273600 }, { "epoch": 0.4, "learning_rate": 0.0006068849301877708, "loss": 0.0742, "theoretical_loss": 3.585557902737568, "tokens_seen": 1317273600 }, { "epoch": 0.4, "learning_rate": 0.0006068046862461884, "loss": 0.077, "theoretical_loss": 3.5854939318931933, "tokens_seen": 1317535744 }, { "epoch": 0.4, "learning_rate": 0.000606724442304606, "loss": 0.0721, "theoretical_loss": 3.5854299773385634, "tokens_seen": 1317797888 }, { "epoch": 0.4, "learning_rate": 0.0006066441983630236, "loss": 0.0726, "theoretical_loss": 3.585366039066291, "tokens_seen": 1318060032 }, { "epoch": 0.4, "learning_rate": 0.0006065639544214411, "loss": 0.0716, "theoretical_loss": 3.585302117068993, "tokens_seen": 1318322176 }, { "epoch": 0.4, "learning_rate": 0.0006064837104798588, "loss": 0.072, "theoretical_loss": 3.585238211339292, "tokens_seen": 1318584320 }, { "epoch": 0.4, "learning_rate": 0.0006064034665382764, "loss": 0.0703, "theoretical_loss": 3.5851743218698156, "tokens_seen": 1318846464 }, { "epoch": 0.4, "learning_rate": 0.0006063232225966939, "loss": 0.0713, "theoretical_loss": 3.585110448653195, "tokens_seen": 1319108608 }, { "epoch": 0.4, "learning_rate": 0.0006062429786551116, "loss": 0.0699, "theoretical_loss": 3.585046591682068, "tokens_seen": 1319370752 }, { "epoch": 0.4, "learning_rate": 0.0006061627347135291, "loss": 0.0718, "theoretical_loss": 3.5849827509490746, "tokens_seen": 1319632896 }, { "epoch": 0.4, "learning_rate": 0.0006060824907719468, "loss": 0.0718, "theoretical_loss": 3.584918926446863, "tokens_seen": 1319895040 }, { "epoch": 0.4, "learning_rate": 0.0006060022468303643, "loss": 0.0706, "theoretical_loss": 3.5848551181680826, "tokens_seen": 1320157184 }, { "epoch": 0.4, "learning_rate": 0.0006059220028887819, "loss": 0.0678, "theoretical_loss": 3.5847913261053908, "tokens_seen": 1320419328 }, { "epoch": 0.4, "learning_rate": 0.0006058417589471995, "loss": 0.0719, "theoretical_loss": 3.584727550251447, "tokens_seen": 1320681472 }, { "epoch": 0.4, "learning_rate": 0.000605761515005617, "loss": 0.0733, "theoretical_loss": 3.5846637905989183, "tokens_seen": 1320943616 }, { "epoch": 0.4, "learning_rate": 0.0006056812710640347, "loss": 0.0709, "theoretical_loss": 3.5846000471404738, "tokens_seen": 1321205760 }, { "epoch": 0.4, "learning_rate": 0.0006056010271224523, "loss": 0.0763, "theoretical_loss": 3.5845363198687883, "tokens_seen": 1321467904 }, { "epoch": 0.4, "learning_rate": 0.0006055207831808699, "loss": 0.0726, "theoretical_loss": 3.584472608776542, "tokens_seen": 1321730048 }, { "epoch": 0.4, "learning_rate": 0.0006054405392392874, "loss": 0.0732, "theoretical_loss": 3.5844089138564197, "tokens_seen": 1321992192 }, { "epoch": 0.4, "learning_rate": 0.0006053602952977051, "loss": 0.0693, "theoretical_loss": 3.584345235101111, "tokens_seen": 1322254336 }, { "epoch": 0.4, "learning_rate": 0.0006052800513561226, "loss": 0.0709, "theoretical_loss": 3.584281572503309, "tokens_seen": 1322516480 }, { "epoch": 0.4, "learning_rate": 0.0006051998074145401, "loss": 0.0728, "theoretical_loss": 3.584217926055713, "tokens_seen": 1322778624 }, { "epoch": 0.4, "learning_rate": 0.0006051195634729578, "loss": 0.0734, "theoretical_loss": 3.584154295751027, "tokens_seen": 1323040768 }, { "epoch": 0.4, "learning_rate": 0.0006050393195313753, "loss": 0.0718, "theoretical_loss": 3.5840906815819586, "tokens_seen": 1323302912 }, { "epoch": 0.4, "learning_rate": 0.0006049590755897931, "loss": 0.0726, "theoretical_loss": 3.584027083541222, "tokens_seen": 1323565056 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.00013670318003278226, "objective/train/docs_used": 484332, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4303979873657227, "objective/train/original_loss": 1.4303977489471436, "objective/train/theoretical_loss": 3.583963501621533, "objective/train/tokens_used": 1344287200, "objective/train/value_avg": -0.00768280029296875, "objective/train/value_loss": 0.0003027576894965023, "objective/train/value_max": -9.244680404663086e-05, "objective/train/value_min": -0.53466796875, "objective/train/value_reward_corr": 0.6955805663532895, "objective/train/value_std": 0.0156707763671875, "objective/train/weight_avg": 1.0002702474594116, "objective/train/weighted_lm_loss": 1.4310005903244019, "objective/train/weights_max": 1.3974761962890625, "objective/train/weights_min": 0.3838087022304535, "theoretical_loss": 3.583963501621533, "tokens_seen": 1323827200 }, { "epoch": 0.4, "learning_rate": 0.0006048788316482106, "loss": 0.0721, "theoretical_loss": 3.583963501621533, "tokens_seen": 1323827200 }, { "epoch": 0.4, "learning_rate": 0.0006047985877066282, "loss": 0.0715, "theoretical_loss": 3.583899935815616, "tokens_seen": 1324089344 }, { "epoch": 0.4, "learning_rate": 0.0006047183437650458, "loss": 0.0715, "theoretical_loss": 3.583836386116197, "tokens_seen": 1324351488 }, { "epoch": 0.4, "learning_rate": 0.0006046380998234633, "loss": 0.072, "theoretical_loss": 3.5837728525160086, "tokens_seen": 1324613632 }, { "epoch": 0.4, "learning_rate": 0.0006045578558818809, "loss": 0.0765, "theoretical_loss": 3.5837093350077875, "tokens_seen": 1324875776 }, { "epoch": 0.4, "learning_rate": 0.0006044776119402985, "loss": 0.0711, "theoretical_loss": 3.5836458335842747, "tokens_seen": 1325137920 }, { "epoch": 0.4, "learning_rate": 0.0006043973679987161, "loss": 0.0728, "theoretical_loss": 3.5835823482382163, "tokens_seen": 1325400064 }, { "epoch": 0.4, "learning_rate": 0.0006043171240571336, "loss": 0.0712, "theoretical_loss": 3.583518878962364, "tokens_seen": 1325662208 }, { "epoch": 0.4, "learning_rate": 0.0006042368801155514, "loss": 0.0708, "theoretical_loss": 3.583455425749472, "tokens_seen": 1325924352 }, { "epoch": 0.4, "learning_rate": 0.0006041566361739689, "loss": 0.0725, "theoretical_loss": 3.583391988592301, "tokens_seen": 1326186496 }, { "epoch": 0.4, "learning_rate": 0.0006040763922323864, "loss": 0.0747, "theoretical_loss": 3.5833285674836164, "tokens_seen": 1326448640 }, { "epoch": 0.4, "learning_rate": 0.0006039961482908041, "loss": 0.0724, "theoretical_loss": 3.583265162416187, "tokens_seen": 1326710784 }, { "epoch": 0.4, "learning_rate": 0.0006039159043492216, "loss": 0.0722, "theoretical_loss": 3.583201773382788, "tokens_seen": 1326972928 }, { "epoch": 0.4, "learning_rate": 0.0006038356604076392, "loss": 0.0731, "theoretical_loss": 3.583138400376197, "tokens_seen": 1327235072 }, { "epoch": 0.4, "learning_rate": 0.0006037554164660568, "loss": 0.071, "theoretical_loss": 3.583075043389199, "tokens_seen": 1327497216 }, { "epoch": 0.4, "learning_rate": 0.0006036751725244744, "loss": 0.0729, "theoretical_loss": 3.583011702414581, "tokens_seen": 1327759360 }, { "epoch": 0.4, "learning_rate": 0.000603594928582892, "loss": 0.0736, "theoretical_loss": 3.5829483774451374, "tokens_seen": 1328021504 }, { "epoch": 0.4, "learning_rate": 0.0006035146846413096, "loss": 0.0711, "theoretical_loss": 3.582885068473665, "tokens_seen": 1328283648 }, { "epoch": 0.4, "learning_rate": 0.0006034344406997272, "loss": 0.0724, "theoretical_loss": 3.582821775492966, "tokens_seen": 1328545792 }, { "epoch": 0.4, "learning_rate": 0.0006033541967581448, "loss": 0.0712, "theoretical_loss": 3.5827584984958474, "tokens_seen": 1328807936 }, { "epoch": 0.4, "learning_rate": 0.0006032739528165624, "loss": 0.0733, "theoretical_loss": 3.582695237475121, "tokens_seen": 1329070080 }, { "epoch": 0.4, "learning_rate": 0.0006031937088749799, "loss": 0.0711, "theoretical_loss": 3.582631992423603, "tokens_seen": 1329332224 }, { "epoch": 0.4, "learning_rate": 0.0006031134649333976, "loss": 0.0709, "theoretical_loss": 3.582568763334115, "tokens_seen": 1329594368 }, { "epoch": 0.4, "learning_rate": 0.0006030332209918151, "loss": 0.0682, "theoretical_loss": 3.582505550199481, "tokens_seen": 1329856512 }, { "epoch": 0.4, "learning_rate": 0.0006029529770502326, "loss": 0.0706, "theoretical_loss": 3.5824423530125324, "tokens_seen": 1330118656 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.0006204114179126918, "objective/train/docs_used": 486514, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3864256143569946, "objective/train/original_loss": 1.386425495147705, "objective/train/theoretical_loss": 3.582379171766104, "objective/train/tokens_used": 1350840800, "objective/train/value_avg": -0.00786590576171875, "objective/train/value_loss": 0.00019436066213529557, "objective/train/value_max": -8.219480514526367e-05, "objective/train/value_min": -0.68359375, "objective/train/value_reward_corr": 0.6674128947141043, "objective/train/value_std": 0.0129241943359375, "objective/train/weight_avg": 1.0007107257843018, "objective/train/weighted_lm_loss": 1.3870501518249512, "objective/train/weights_max": 1.2174729108810425, "objective/train/weights_min": 0.3684675693511963, "theoretical_loss": 3.582379171766104, "tokens_seen": 1330380800 }, { "epoch": 0.4, "learning_rate": 0.0006028727331086503, "loss": 0.0716, "theoretical_loss": 3.582379171766104, "tokens_seen": 1330380800 }, { "epoch": 0.4, "learning_rate": 0.0006027924891670679, "loss": 0.0721, "theoretical_loss": 3.582316006453034, "tokens_seen": 1330642944 }, { "epoch": 0.4, "learning_rate": 0.0006027122452254855, "loss": 0.072, "theoretical_loss": 3.5822528570661683, "tokens_seen": 1330905088 }, { "epoch": 0.4, "learning_rate": 0.0006026320012839031, "loss": 0.073, "theoretical_loss": 3.582189723598354, "tokens_seen": 1331167232 }, { "epoch": 0.4, "learning_rate": 0.0006025517573423207, "loss": 0.0721, "theoretical_loss": 3.582126606042446, "tokens_seen": 1331429376 }, { "epoch": 0.4, "learning_rate": 0.0006024715134007382, "loss": 0.0719, "theoretical_loss": 3.5820635043913005, "tokens_seen": 1331691520 }, { "epoch": 0.4, "learning_rate": 0.0006023912694591559, "loss": 0.073, "theoretical_loss": 3.582000418637781, "tokens_seen": 1331953664 }, { "epoch": 0.4, "learning_rate": 0.0006023110255175734, "loss": 0.0686, "theoretical_loss": 3.581937348774755, "tokens_seen": 1332215808 }, { "epoch": 0.4, "learning_rate": 0.000602230781575991, "loss": 0.0723, "theoretical_loss": 3.5818742947950932, "tokens_seen": 1332477952 }, { "epoch": 0.4, "learning_rate": 0.0006021505376344086, "loss": 0.0726, "theoretical_loss": 3.5818112566916724, "tokens_seen": 1332740096 }, { "epoch": 0.4, "learning_rate": 0.0006020702936928261, "loss": 0.0708, "theoretical_loss": 3.5817482344573746, "tokens_seen": 1333002240 }, { "epoch": 0.4, "learning_rate": 0.0006019900497512439, "loss": 0.0727, "theoretical_loss": 3.5816852280850835, "tokens_seen": 1333264384 }, { "epoch": 0.4, "learning_rate": 0.0006019098058096614, "loss": 0.0704, "theoretical_loss": 3.5816222375676903, "tokens_seen": 1333526528 }, { "epoch": 0.4, "learning_rate": 0.000601829561868079, "loss": 0.0721, "theoretical_loss": 3.58155926289809, "tokens_seen": 1333788672 }, { "epoch": 0.4, "learning_rate": 0.0006017493179264966, "loss": 0.0687, "theoretical_loss": 3.581496304069181, "tokens_seen": 1334050816 }, { "epoch": 0.4, "learning_rate": 0.0006016690739849141, "loss": 0.0735, "theoretical_loss": 3.5814333610738673, "tokens_seen": 1334312960 }, { "epoch": 0.4, "learning_rate": 0.0006015888300433317, "loss": 0.0718, "theoretical_loss": 3.5813704339050583, "tokens_seen": 1334575104 }, { "epoch": 0.4, "learning_rate": 0.0006015085861017493, "loss": 0.0717, "theoretical_loss": 3.581307522555666, "tokens_seen": 1334837248 }, { "epoch": 0.4, "learning_rate": 0.0006014283421601669, "loss": 0.0693, "theoretical_loss": 3.5812446270186085, "tokens_seen": 1335099392 }, { "epoch": 0.4, "learning_rate": 0.0006013480982185844, "loss": 0.0709, "theoretical_loss": 3.5811817472868075, "tokens_seen": 1335361536 }, { "epoch": 0.4, "learning_rate": 0.0006012678542770022, "loss": 0.0703, "theoretical_loss": 3.5811188833531897, "tokens_seen": 1335623680 }, { "epoch": 0.4, "learning_rate": 0.0006011876103354197, "loss": 0.0681, "theoretical_loss": 3.5810560352106866, "tokens_seen": 1335885824 }, { "epoch": 0.4, "learning_rate": 0.0006011073663938372, "loss": 0.0685, "theoretical_loss": 3.580993202852234, "tokens_seen": 1336147968 }, { "epoch": 0.4, "learning_rate": 0.0006010271224522549, "loss": 0.0675, "theoretical_loss": 3.580930386270772, "tokens_seen": 1336410112 }, { "epoch": 0.41, "learning_rate": 0.0006009468785106724, "loss": 0.0716, "theoretical_loss": 3.5808675854592464, "tokens_seen": 1336672256 }, { "epoch": 0.41, "objective/train/advantage_avg": -0.00022589701984543353, "objective/train/docs_used": 488954, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3641271591186523, "objective/train/original_loss": 1.3641269207000732, "objective/train/theoretical_loss": 3.5808048004106054, "objective/train/tokens_used": 1357394400, "objective/train/value_avg": -0.006866455078125, "objective/train/value_loss": 0.00021092750830575824, "objective/train/value_max": -9.608268737792969e-05, "objective/train/value_min": -0.333984375, "objective/train/value_reward_corr": 0.6062614211122952, "objective/train/value_std": 0.0108489990234375, "objective/train/weight_avg": 0.9998708367347717, "objective/train/weighted_lm_loss": 1.3648322820663452, "objective/train/weights_max": 1.2626718282699585, "objective/train/weights_min": 0.3777256906032562, "theoretical_loss": 3.5808048004106054, "tokens_seen": 1336934400 }, { "epoch": 0.41, "learning_rate": 0.0006008666345690901, "loss": 0.0717, "theoretical_loss": 3.5808048004106054, "tokens_seen": 1336934400 }, { "epoch": 0.41, "learning_rate": 0.0006007863906275076, "loss": 0.071, "theoretical_loss": 3.5807420311178033, "tokens_seen": 1337196544 }, { "epoch": 0.41, "learning_rate": 0.0006007061466859252, "loss": 0.071, "theoretical_loss": 3.580679277573799, "tokens_seen": 1337458688 }, { "epoch": 0.41, "learning_rate": 0.0006006259027443429, "loss": 0.0713, "theoretical_loss": 3.5806165397715546, "tokens_seen": 1337720832 }, { "epoch": 0.41, "learning_rate": 0.0006005456588027604, "loss": 0.0719, "theoretical_loss": 3.580553817704039, "tokens_seen": 1337982976 }, { "epoch": 0.41, "learning_rate": 0.000600465414861178, "loss": 0.0697, "theoretical_loss": 3.580491111364223, "tokens_seen": 1338245120 }, { "epoch": 0.41, "learning_rate": 0.0006003851709195956, "loss": 0.0693, "theoretical_loss": 3.5804284207450836, "tokens_seen": 1338507264 }, { "epoch": 0.41, "learning_rate": 0.0006003049269780132, "loss": 0.0682, "theoretical_loss": 3.580365745839602, "tokens_seen": 1338769408 }, { "epoch": 0.41, "learning_rate": 0.0006002246830364307, "loss": 0.0719, "theoretical_loss": 3.5803030866407637, "tokens_seen": 1339031552 }, { "epoch": 0.41, "learning_rate": 0.0006001444390948484, "loss": 0.0701, "theoretical_loss": 3.580240443141559, "tokens_seen": 1339293696 }, { "epoch": 0.41, "learning_rate": 0.0006000641951532659, "loss": 0.0716, "theoretical_loss": 3.5801778153349817, "tokens_seen": 1339555840 }, { "epoch": 0.41, "learning_rate": 0.0005999839512116834, "loss": 0.0722, "theoretical_loss": 3.5801152032140315, "tokens_seen": 1339817984 }, { "epoch": 0.41, "learning_rate": 0.0005999037072701011, "loss": 0.0698, "theoretical_loss": 3.580052606771712, "tokens_seen": 1340080128 }, { "epoch": 0.41, "learning_rate": 0.0005998234633285187, "loss": 0.0697, "theoretical_loss": 3.579990026001031, "tokens_seen": 1340342272 }, { "epoch": 0.41, "learning_rate": 0.0005997432193869364, "loss": 0.072, "theoretical_loss": 3.579927460895002, "tokens_seen": 1340604416 }, { "epoch": 0.41, "learning_rate": 0.0005996629754453539, "loss": 0.0739, "theoretical_loss": 3.5798649114466405, "tokens_seen": 1340866560 }, { "epoch": 0.41, "learning_rate": 0.0005995827315037715, "loss": 0.0703, "theoretical_loss": 3.579802377648969, "tokens_seen": 1341128704 }, { "epoch": 0.41, "learning_rate": 0.0005995024875621891, "loss": 0.0724, "theoretical_loss": 3.579739859495013, "tokens_seen": 1341390848 }, { "epoch": 0.41, "learning_rate": 0.0005994222436206067, "loss": 0.072, "theoretical_loss": 3.5796773569778026, "tokens_seen": 1341652992 }, { "epoch": 0.41, "learning_rate": 0.0005993419996790242, "loss": 0.0713, "theoretical_loss": 3.579614870090374, "tokens_seen": 1341915136 }, { "epoch": 0.41, "learning_rate": 0.0005992617557374418, "loss": 0.0749, "theoretical_loss": 3.5795523988257654, "tokens_seen": 1342177280 }, { "epoch": 0.41, "learning_rate": 0.0005991815117958594, "loss": 0.072, "theoretical_loss": 3.5794899431770215, "tokens_seen": 1342439424 }, { "epoch": 0.41, "learning_rate": 0.000599101267854277, "loss": 0.0733, "theoretical_loss": 3.5794275031371896, "tokens_seen": 1342701568 }, { "epoch": 0.41, "learning_rate": 0.0005990210239126947, "loss": 0.0694, "theoretical_loss": 3.579365078699323, "tokens_seen": 1342963712 }, { "epoch": 0.41, "learning_rate": 0.0005989407799711122, "loss": 0.072, "theoretical_loss": 3.579302669856479, "tokens_seen": 1343225856 }, { "epoch": 0.41, "objective/train/advantage_avg": -2.97992642117606e-06, "objective/train/docs_used": 490931, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4495521783828735, "objective/train/original_loss": 1.449552059173584, "objective/train/theoretical_loss": 3.5792402766017197, "objective/train/tokens_used": 1363948000, "objective/train/value_avg": -0.00823211669921875, "objective/train/value_loss": 0.0004934009630233049, "objective/train/value_max": -8.547306060791016e-05, "objective/train/value_min": -0.64599609375, "objective/train/value_reward_corr": 0.603769803863347, "objective/train/value_std": 0.01506805419921875, "objective/train/weight_avg": 1.000206470489502, "objective/train/weighted_lm_loss": 1.4500666856765747, "objective/train/weights_max": 1.8629971742630005, "objective/train/weights_min": 0.3684026002883911, "theoretical_loss": 3.5792402766017197, "tokens_seen": 1343488000 }, { "epoch": 0.41, "learning_rate": 0.0005988605360295298, "loss": 0.0703, "theoretical_loss": 3.5792402766017197, "tokens_seen": 1343488000 }, { "epoch": 0.41, "learning_rate": 0.0005987802920879474, "loss": 0.0692, "theoretical_loss": 3.57917789892811, "tokens_seen": 1343750144 }, { "epoch": 0.41, "learning_rate": 0.0005987000481463649, "loss": 0.0691, "theoretical_loss": 3.579115536828721, "tokens_seen": 1344012288 }, { "epoch": 0.41, "learning_rate": 0.0005986198042047825, "loss": 0.07, "theoretical_loss": 3.5790531902966274, "tokens_seen": 1344274432 }, { "epoch": 0.41, "learning_rate": 0.0005985395602632001, "loss": 0.0698, "theoretical_loss": 3.578990859324909, "tokens_seen": 1344536576 }, { "epoch": 0.41, "learning_rate": 0.0005984593163216177, "loss": 0.0697, "theoretical_loss": 3.5789285439066494, "tokens_seen": 1344798720 }, { "epoch": 0.41, "learning_rate": 0.0005983790723800354, "loss": 0.0701, "theoretical_loss": 3.578866244034937, "tokens_seen": 1345060864 }, { "epoch": 0.41, "learning_rate": 0.000598298828438453, "loss": 0.0733, "theoretical_loss": 3.5788039597028636, "tokens_seen": 1345323008 }, { "epoch": 0.41, "learning_rate": 0.0005982185844968705, "loss": 0.0677, "theoretical_loss": 3.5787416909035272, "tokens_seen": 1345585152 }, { "epoch": 0.41, "learning_rate": 0.0005981383405552881, "loss": 0.0707, "theoretical_loss": 3.578679437630029, "tokens_seen": 1345847296 }, { "epoch": 0.41, "learning_rate": 0.0005980580966137057, "loss": 0.072, "theoretical_loss": 3.5786171998754748, "tokens_seen": 1346109440 }, { "epoch": 0.41, "learning_rate": 0.0005979778526721232, "loss": 0.0697, "theoretical_loss": 3.5785549776329746, "tokens_seen": 1346371584 }, { "epoch": 0.41, "learning_rate": 0.0005978976087305409, "loss": 0.0695, "theoretical_loss": 3.578492770895643, "tokens_seen": 1346633728 }, { "epoch": 0.41, "learning_rate": 0.0005978173647889584, "loss": 0.0674, "theoretical_loss": 3.5784305796566, "tokens_seen": 1346895872 }, { "epoch": 0.41, "learning_rate": 0.000597737120847376, "loss": 0.0706, "theoretical_loss": 3.5783684039089687, "tokens_seen": 1347158016 }, { "epoch": 0.41, "learning_rate": 0.0005976568769057937, "loss": 0.0689, "theoretical_loss": 3.578306243645876, "tokens_seen": 1347420160 }, { "epoch": 0.41, "learning_rate": 0.0005975766329642112, "loss": 0.0684, "theoretical_loss": 3.5782440988604547, "tokens_seen": 1347682304 }, { "epoch": 0.41, "learning_rate": 0.0005974963890226288, "loss": 0.0699, "theoretical_loss": 3.5781819695458417, "tokens_seen": 1347944448 }, { "epoch": 0.41, "learning_rate": 0.0005974161450810464, "loss": 0.0717, "theoretical_loss": 3.578119855695178, "tokens_seen": 1348206592 }, { "epoch": 0.41, "learning_rate": 0.000597335901139464, "loss": 0.0701, "theoretical_loss": 3.5780577573016084, "tokens_seen": 1348468736 }, { "epoch": 0.41, "learning_rate": 0.0005972556571978816, "loss": 0.0726, "theoretical_loss": 3.5779956743582835, "tokens_seen": 1348730880 }, { "epoch": 0.41, "learning_rate": 0.0005971754132562992, "loss": 0.0705, "theoretical_loss": 3.5779336068583563, "tokens_seen": 1348993024 }, { "epoch": 0.41, "learning_rate": 0.0005970951693147167, "loss": 0.0716, "theoretical_loss": 3.577871554794986, "tokens_seen": 1349255168 }, { "epoch": 0.41, "learning_rate": 0.0005970149253731343, "loss": 0.0678, "theoretical_loss": 3.5778095181613354, "tokens_seen": 1349517312 }, { "epoch": 0.41, "learning_rate": 0.000596934681431552, "loss": 0.0713, "theoretical_loss": 3.577747496950572, "tokens_seen": 1349779456 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.0013337216805666685, "objective/train/docs_used": 493399, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.339576244354248, "objective/train/original_loss": 1.3395761251449585, "objective/train/theoretical_loss": 3.577685491155866, "objective/train/tokens_used": 1370501600, "objective/train/value_avg": -0.00826263427734375, "objective/train/value_loss": 0.00026566104497760534, "objective/train/value_max": -0.0001252889633178711, "objective/train/value_min": -0.6865234375, "objective/train/value_reward_corr": 0.5799900901845774, "objective/train/value_std": 0.01233673095703125, "objective/train/weight_avg": 1.0014503002166748, "objective/train/weighted_lm_loss": 1.341780185699463, "objective/train/weights_max": 1.6780211925506592, "objective/train/weights_min": 0.3697971701622009, "theoretical_loss": 3.577685491155866, "tokens_seen": 1350041600 }, { "epoch": 0.41, "learning_rate": 0.0005968544374899695, "loss": 0.0717, "theoretical_loss": 3.577685491155866, "tokens_seen": 1350041600 }, { "epoch": 0.41, "learning_rate": 0.0005967741935483872, "loss": 0.0714, "theoretical_loss": 3.577623500770394, "tokens_seen": 1350303744 }, { "epoch": 0.41, "learning_rate": 0.0005966939496068047, "loss": 0.0704, "theoretical_loss": 3.577561525787337, "tokens_seen": 1350565888 }, { "epoch": 0.41, "learning_rate": 0.0005966137056652223, "loss": 0.0696, "theoretical_loss": 3.5774995661998785, "tokens_seen": 1350828032 }, { "epoch": 0.41, "learning_rate": 0.0005965334617236399, "loss": 0.0701, "theoretical_loss": 3.5774376220012085, "tokens_seen": 1351090176 }, { "epoch": 0.41, "learning_rate": 0.0005964532177820575, "loss": 0.0732, "theoretical_loss": 3.5773756931845186, "tokens_seen": 1351352320 }, { "epoch": 0.41, "learning_rate": 0.000596372973840475, "loss": 0.071, "theoretical_loss": 3.5773137797430077, "tokens_seen": 1351614464 }, { "epoch": 0.41, "learning_rate": 0.0005962927298988926, "loss": 0.0719, "theoretical_loss": 3.577251881669877, "tokens_seen": 1351876608 }, { "epoch": 0.41, "learning_rate": 0.0005962124859573102, "loss": 0.0755, "theoretical_loss": 3.5771899989583336, "tokens_seen": 1352138752 }, { "epoch": 0.41, "learning_rate": 0.0005961322420157277, "loss": 0.0694, "theoretical_loss": 3.577128131601587, "tokens_seen": 1352400896 }, { "epoch": 0.41, "learning_rate": 0.0005960519980741455, "loss": 0.0678, "theoretical_loss": 3.5770662795928527, "tokens_seen": 1352663040 }, { "epoch": 0.41, "learning_rate": 0.000595971754132563, "loss": 0.0681, "theoretical_loss": 3.5770044429253494, "tokens_seen": 1352925184 }, { "epoch": 0.41, "learning_rate": 0.0005958915101909807, "loss": 0.0701, "theoretical_loss": 3.576942621592301, "tokens_seen": 1353187328 }, { "epoch": 0.41, "learning_rate": 0.0005958112662493982, "loss": 0.0709, "theoretical_loss": 3.576880815586935, "tokens_seen": 1353449472 }, { "epoch": 0.41, "learning_rate": 0.0005957310223078157, "loss": 0.0713, "theoretical_loss": 3.576819024902483, "tokens_seen": 1353711616 }, { "epoch": 0.41, "learning_rate": 0.0005956507783662334, "loss": 0.0739, "theoretical_loss": 3.576757249532183, "tokens_seen": 1353973760 }, { "epoch": 0.41, "learning_rate": 0.0005955705344246509, "loss": 0.0697, "theoretical_loss": 3.576695489469274, "tokens_seen": 1354235904 }, { "epoch": 0.41, "learning_rate": 0.0005954902904830685, "loss": 0.0713, "theoretical_loss": 3.5766337447070016, "tokens_seen": 1354498048 }, { "epoch": 0.41, "learning_rate": 0.0005954100465414862, "loss": 0.0706, "theoretical_loss": 3.5765720152386153, "tokens_seen": 1354760192 }, { "epoch": 0.41, "learning_rate": 0.0005953298025999038, "loss": 0.0706, "theoretical_loss": 3.5765103010573682, "tokens_seen": 1355022336 }, { "epoch": 0.41, "learning_rate": 0.0005952495586583213, "loss": 0.071, "theoretical_loss": 3.576448602156518, "tokens_seen": 1355284480 }, { "epoch": 0.41, "learning_rate": 0.0005951693147167389, "loss": 0.0693, "theoretical_loss": 3.5763869185293276, "tokens_seen": 1355546624 }, { "epoch": 0.41, "learning_rate": 0.0005950890707751565, "loss": 0.0729, "theoretical_loss": 3.576325250169062, "tokens_seen": 1355808768 }, { "epoch": 0.41, "learning_rate": 0.000595008826833574, "loss": 0.0696, "theoretical_loss": 3.5762635970689933, "tokens_seen": 1356070912 }, { "epoch": 0.41, "learning_rate": 0.0005949285828919917, "loss": 0.0689, "theoretical_loss": 3.576201959222396, "tokens_seen": 1356333056 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.0003139876062050462, "objective/train/docs_used": 495730, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4432828426361084, "objective/train/original_loss": 1.443282961845398, "objective/train/theoretical_loss": 3.576140336622548, "objective/train/tokens_used": 1377055200, "objective/train/value_avg": -0.0080413818359375, "objective/train/value_loss": 0.0003055589913856238, "objective/train/value_max": -7.545948028564453e-05, "objective/train/value_min": -0.8916015625, "objective/train/value_reward_corr": 0.8504052999798968, "objective/train/value_std": 0.0272674560546875, "objective/train/weight_avg": 1.0004558563232422, "objective/train/weighted_lm_loss": 1.4439336061477661, "objective/train/weights_max": 1.514553189277649, "objective/train/weights_min": 0.37005260586738586, "theoretical_loss": 3.576140336622548, "tokens_seen": 1356595200 }, { "epoch": 0.41, "learning_rate": 0.0005948483389504092, "loss": 0.0699, "theoretical_loss": 3.576140336622548, "tokens_seen": 1356595200 }, { "epoch": 0.41, "learning_rate": 0.0005947680950088269, "loss": 0.0727, "theoretical_loss": 3.5760787292627345, "tokens_seen": 1356857344 }, { "epoch": 0.41, "learning_rate": 0.0005946878510672444, "loss": 0.0728, "theoretical_loss": 3.576017137136242, "tokens_seen": 1357119488 }, { "epoch": 0.41, "learning_rate": 0.000594607607125662, "loss": 0.0709, "theoretical_loss": 3.5759555602363635, "tokens_seen": 1357381632 }, { "epoch": 0.41, "learning_rate": 0.0005945273631840797, "loss": 0.068, "theoretical_loss": 3.5758939985563942, "tokens_seen": 1357643776 }, { "epoch": 0.41, "learning_rate": 0.0005944471192424972, "loss": 0.0709, "theoretical_loss": 3.5758324520896347, "tokens_seen": 1357905920 }, { "epoch": 0.41, "learning_rate": 0.0005943668753009148, "loss": 0.0719, "theoretical_loss": 3.57577092082939, "tokens_seen": 1358168064 }, { "epoch": 0.41, "learning_rate": 0.0005942866313593324, "loss": 0.0709, "theoretical_loss": 3.5757094047689684, "tokens_seen": 1358430208 }, { "epoch": 0.41, "learning_rate": 0.00059420638741775, "loss": 0.0696, "theoretical_loss": 3.575647903901684, "tokens_seen": 1358692352 }, { "epoch": 0.41, "learning_rate": 0.0005941261434761675, "loss": 0.0713, "theoretical_loss": 3.575586418220853, "tokens_seen": 1358954496 }, { "epoch": 0.41, "learning_rate": 0.0005940458995345851, "loss": 0.0717, "theoretical_loss": 3.5755249477197983, "tokens_seen": 1359216640 }, { "epoch": 0.41, "learning_rate": 0.0005939656555930027, "loss": 0.0695, "theoretical_loss": 3.5754634923918447, "tokens_seen": 1359478784 }, { "epoch": 0.41, "learning_rate": 0.0005938854116514203, "loss": 0.0694, "theoretical_loss": 3.5754020522303227, "tokens_seen": 1359740928 }, { "epoch": 0.41, "learning_rate": 0.000593805167709838, "loss": 0.0697, "theoretical_loss": 3.575340627228566, "tokens_seen": 1360003072 }, { "epoch": 0.41, "learning_rate": 0.0005937249237682555, "loss": 0.0711, "theoretical_loss": 3.575279217379914, "tokens_seen": 1360265216 }, { "epoch": 0.41, "learning_rate": 0.0005936446798266731, "loss": 0.0737, "theoretical_loss": 3.575217822677709, "tokens_seen": 1360527360 }, { "epoch": 0.41, "learning_rate": 0.0005935644358850907, "loss": 0.0698, "theoretical_loss": 3.575156443115297, "tokens_seen": 1360789504 }, { "epoch": 0.41, "learning_rate": 0.0005934841919435082, "loss": 0.0699, "theoretical_loss": 3.5750950786860307, "tokens_seen": 1361051648 }, { "epoch": 0.41, "learning_rate": 0.0005934039480019259, "loss": 0.0703, "theoretical_loss": 3.5750337293832644, "tokens_seen": 1361313792 }, { "epoch": 0.41, "learning_rate": 0.0005933237040603434, "loss": 0.0682, "theoretical_loss": 3.5749723952003576, "tokens_seen": 1361575936 }, { "epoch": 0.41, "learning_rate": 0.000593243460118761, "loss": 0.0742, "theoretical_loss": 3.5749110761306744, "tokens_seen": 1361838080 }, { "epoch": 0.41, "learning_rate": 0.0005931632161771787, "loss": 0.0693, "theoretical_loss": 3.5748497721675823, "tokens_seen": 1362100224 }, { "epoch": 0.41, "learning_rate": 0.0005930829722355963, "loss": 0.0723, "theoretical_loss": 3.574788483304453, "tokens_seen": 1362362368 }, { "epoch": 0.41, "learning_rate": 0.0005930027282940138, "loss": 0.07, "theoretical_loss": 3.5747272095346636, "tokens_seen": 1362624512 }, { "epoch": 0.41, "learning_rate": 0.0005929224843524315, "loss": 0.0717, "theoretical_loss": 3.5746659508515943, "tokens_seen": 1362886656 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.0005423167604021728, "objective/train/docs_used": 498147, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3880330324172974, "objective/train/original_loss": 1.3880329132080078, "objective/train/theoretical_loss": 3.5746047072486293, "objective/train/tokens_used": 1383608800, "objective/train/value_avg": -0.00732421875, "objective/train/value_loss": 0.0004709110071416944, "objective/train/value_max": -7.724761962890625e-05, "objective/train/value_min": -0.80078125, "objective/train/value_reward_corr": 0.6727173948210152, "objective/train/value_std": 0.0166168212890625, "objective/train/weight_avg": 1.0007424354553223, "objective/train/weighted_lm_loss": 1.3886373043060303, "objective/train/weights_max": 1.4593173265457153, "objective/train/weights_min": 0.3683730661869049, "theoretical_loss": 3.5746047072486293, "tokens_seen": 1363148800 }, { "epoch": 0.41, "learning_rate": 0.000592842240410849, "loss": 0.0711, "theoretical_loss": 3.5746047072486293, "tokens_seen": 1363148800 }, { "epoch": 0.41, "learning_rate": 0.0005927619964692665, "loss": 0.0691, "theoretical_loss": 3.574543478719158, "tokens_seen": 1363410944 }, { "epoch": 0.41, "learning_rate": 0.0005926817525276842, "loss": 0.07, "theoretical_loss": 3.5744822652565724, "tokens_seen": 1363673088 }, { "epoch": 0.41, "learning_rate": 0.0005926015085861017, "loss": 0.0727, "theoretical_loss": 3.5744210668542706, "tokens_seen": 1363935232 }, { "epoch": 0.41, "learning_rate": 0.0005925212646445193, "loss": 0.0721, "theoretical_loss": 3.574359883505653, "tokens_seen": 1364197376 }, { "epoch": 0.41, "learning_rate": 0.000592441020702937, "loss": 0.0725, "theoretical_loss": 3.5742987152041255, "tokens_seen": 1364459520 }, { "epoch": 0.41, "learning_rate": 0.0005923607767613546, "loss": 0.0688, "theoretical_loss": 3.574237561943098, "tokens_seen": 1364721664 }, { "epoch": 0.41, "learning_rate": 0.0005922805328197721, "loss": 0.0713, "theoretical_loss": 3.5741764237159837, "tokens_seen": 1364983808 }, { "epoch": 0.41, "learning_rate": 0.0005922002888781897, "loss": 0.0721, "theoretical_loss": 3.5741153005162003, "tokens_seen": 1365245952 }, { "epoch": 0.41, "learning_rate": 0.0005921200449366073, "loss": 0.0729, "theoretical_loss": 3.57405419233717, "tokens_seen": 1365508096 }, { "epoch": 0.41, "learning_rate": 0.0005920398009950249, "loss": 0.071, "theoretical_loss": 3.5739930991723194, "tokens_seen": 1365770240 }, { "epoch": 0.41, "learning_rate": 0.0005919595570534425, "loss": 0.0735, "theoretical_loss": 3.5739320210150787, "tokens_seen": 1366032384 }, { "epoch": 0.41, "learning_rate": 0.00059187931311186, "loss": 0.0714, "theoretical_loss": 3.5738709578588814, "tokens_seen": 1366294528 }, { "epoch": 0.41, "learning_rate": 0.0005917990691702777, "loss": 0.0687, "theoretical_loss": 3.573809909697167, "tokens_seen": 1366556672 }, { "epoch": 0.41, "learning_rate": 0.0005917188252286952, "loss": 0.07, "theoretical_loss": 3.573748876523379, "tokens_seen": 1366818816 }, { "epoch": 0.41, "learning_rate": 0.0005916385812871128, "loss": 0.0717, "theoretical_loss": 3.5736878583309624, "tokens_seen": 1367080960 }, { "epoch": 0.41, "learning_rate": 0.0005915583373455305, "loss": 0.0716, "theoretical_loss": 3.5736268551133694, "tokens_seen": 1367343104 }, { "epoch": 0.41, "learning_rate": 0.000591478093403948, "loss": 0.0725, "theoretical_loss": 3.5735658668640538, "tokens_seen": 1367605248 }, { "epoch": 0.41, "learning_rate": 0.0005913978494623656, "loss": 0.0698, "theoretical_loss": 3.573504893576476, "tokens_seen": 1367867392 }, { "epoch": 0.41, "learning_rate": 0.0005913176055207832, "loss": 0.0698, "theoretical_loss": 3.573443935244099, "tokens_seen": 1368129536 }, { "epoch": 0.41, "learning_rate": 0.0005912373615792008, "loss": 0.0702, "theoretical_loss": 3.5733829918603903, "tokens_seen": 1368391680 }, { "epoch": 0.41, "learning_rate": 0.0005911571176376183, "loss": 0.0728, "theoretical_loss": 3.573322063418821, "tokens_seen": 1368653824 }, { "epoch": 0.41, "learning_rate": 0.0005910768736960359, "loss": 0.0712, "theoretical_loss": 3.5732611499128666, "tokens_seen": 1368915968 }, { "epoch": 0.41, "learning_rate": 0.0005909966297544535, "loss": 0.07, "theoretical_loss": 3.5732002513360075, "tokens_seen": 1369178112 }, { "epoch": 0.41, "learning_rate": 0.0005909163858128712, "loss": 0.0698, "theoretical_loss": 3.5731393676817267, "tokens_seen": 1369440256 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.0006691030575893819, "objective/train/docs_used": 500422, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.462990403175354, "objective/train/original_loss": 1.462990164756775, "objective/train/theoretical_loss": 3.573078498943513, "objective/train/tokens_used": 1390162400, "objective/train/value_avg": -0.01076507568359375, "objective/train/value_loss": 0.00031313643557950854, "objective/train/value_max": -6.812810897827148e-05, "objective/train/value_min": -0.68896484375, "objective/train/value_reward_corr": 0.7292926677177232, "objective/train/value_std": 0.0184326171875, "objective/train/weight_avg": 1.0008124113082886, "objective/train/weighted_lm_loss": 1.4639732837677002, "objective/train/weights_max": 1.8267512321472168, "objective/train/weights_min": 0.37275585532188416, "theoretical_loss": 3.573078498943513, "tokens_seen": 1369702400 }, { "epoch": 0.42, "learning_rate": 0.0005908361418712888, "loss": 0.0711, "theoretical_loss": 3.573078498943513, "tokens_seen": 1369702400 }, { "epoch": 0.42, "learning_rate": 0.0005907558979297063, "loss": 0.0716, "theoretical_loss": 3.5730176451148568, "tokens_seen": 1369964544 }, { "epoch": 0.42, "learning_rate": 0.000590675653988124, "loss": 0.0698, "theoretical_loss": 3.572956806189256, "tokens_seen": 1370226688 }, { "epoch": 0.42, "learning_rate": 0.0005905954100465415, "loss": 0.0707, "theoretical_loss": 3.5728959821602095, "tokens_seen": 1370488832 }, { "epoch": 0.42, "learning_rate": 0.000590515166104959, "loss": 0.0699, "theoretical_loss": 3.5728351730212218, "tokens_seen": 1370750976 }, { "epoch": 0.42, "learning_rate": 0.0005904349221633767, "loss": 0.0707, "theoretical_loss": 3.5727743787658017, "tokens_seen": 1371013120 }, { "epoch": 0.42, "learning_rate": 0.0005903546782217942, "loss": 0.0747, "theoretical_loss": 3.572713599387461, "tokens_seen": 1371275264 }, { "epoch": 0.42, "learning_rate": 0.0005902744342802118, "loss": 0.0695, "theoretical_loss": 3.572652834879716, "tokens_seen": 1371537408 }, { "epoch": 0.42, "learning_rate": 0.0005901941903386295, "loss": 0.0691, "theoretical_loss": 3.5725920852360877, "tokens_seen": 1371799552 }, { "epoch": 0.42, "learning_rate": 0.0005901139463970471, "loss": 0.0688, "theoretical_loss": 3.5725313504501006, "tokens_seen": 1372061696 }, { "epoch": 0.42, "learning_rate": 0.0005900337024554646, "loss": 0.0725, "theoretical_loss": 3.5724706305152827, "tokens_seen": 1372323840 }, { "epoch": 0.42, "learning_rate": 0.0005899534585138823, "loss": 0.0667, "theoretical_loss": 3.572409925425167, "tokens_seen": 1372585984 }, { "epoch": 0.42, "learning_rate": 0.0005898732145722998, "loss": 0.0709, "theoretical_loss": 3.5723492351732906, "tokens_seen": 1372848128 }, { "epoch": 0.42, "learning_rate": 0.0005897929706307173, "loss": 0.0702, "theoretical_loss": 3.572288559753194, "tokens_seen": 1373110272 }, { "epoch": 0.42, "learning_rate": 0.000589712726689135, "loss": 0.0747, "theoretical_loss": 3.5722278991584218, "tokens_seen": 1373372416 }, { "epoch": 0.42, "learning_rate": 0.0005896324827475525, "loss": 0.071, "theoretical_loss": 3.572167253382523, "tokens_seen": 1373634560 }, { "epoch": 0.42, "learning_rate": 0.0005895522388059702, "loss": 0.0739, "theoretical_loss": 3.5721066224190503, "tokens_seen": 1373896704 }, { "epoch": 0.42, "learning_rate": 0.0005894719948643878, "loss": 0.0702, "theoretical_loss": 3.572046006261561, "tokens_seen": 1374158848 }, { "epoch": 0.42, "learning_rate": 0.0005893917509228054, "loss": 0.0677, "theoretical_loss": 3.5719854049036153, "tokens_seen": 1374420992 }, { "epoch": 0.42, "learning_rate": 0.000589311506981223, "loss": 0.0692, "theoretical_loss": 3.571924818338779, "tokens_seen": 1374683136 }, { "epoch": 0.42, "learning_rate": 0.0005892312630396405, "loss": 0.0685, "theoretical_loss": 3.5718642465606214, "tokens_seen": 1374945280 }, { "epoch": 0.42, "learning_rate": 0.0005891510190980581, "loss": 0.0684, "theoretical_loss": 3.571803689562714, "tokens_seen": 1375207424 }, { "epoch": 0.42, "learning_rate": 0.0005890707751564757, "loss": 0.0722, "theoretical_loss": 3.571743147338635, "tokens_seen": 1375469568 }, { "epoch": 0.42, "learning_rate": 0.0005889905312148933, "loss": 0.0718, "theoretical_loss": 3.5716826198819653, "tokens_seen": 1375731712 }, { "epoch": 0.42, "learning_rate": 0.0005889102872733108, "loss": 0.0732, "theoretical_loss": 3.57162210718629, "tokens_seen": 1375993856 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.001140189473517239, "objective/train/docs_used": 502650, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.329345464706421, "objective/train/original_loss": 1.329345464706421, "objective/train/theoretical_loss": 3.5715616092451983, "objective/train/tokens_used": 1396716000, "objective/train/value_avg": -0.009979248046875, "objective/train/value_loss": 0.0002272567362524569, "objective/train/value_max": -6.35385513305664e-05, "objective/train/value_min": -0.2432861328125, "objective/train/value_reward_corr": 0.7523422487981606, "objective/train/value_std": 0.01526641845703125, "objective/train/weight_avg": 1.0012439489364624, "objective/train/weighted_lm_loss": 1.3301997184753418, "objective/train/weights_max": 1.2260957956314087, "objective/train/weights_min": 0.3730090260505676, "theoretical_loss": 3.5715616092451983, "tokens_seen": 1376256000 }, { "epoch": 0.42, "learning_rate": 0.0005888300433317285, "loss": 0.0693, "theoretical_loss": 3.5715616092451983, "tokens_seen": 1376256000 }, { "epoch": 0.42, "learning_rate": 0.000588749799390146, "loss": 0.0676, "theoretical_loss": 3.5715011260522824, "tokens_seen": 1376518144 }, { "epoch": 0.42, "learning_rate": 0.0005886695554485636, "loss": 0.0701, "theoretical_loss": 3.57144065760114, "tokens_seen": 1376780288 }, { "epoch": 0.42, "learning_rate": 0.0005885893115069813, "loss": 0.071, "theoretical_loss": 3.5713802038853726, "tokens_seen": 1377042432 }, { "epoch": 0.42, "learning_rate": 0.0005885090675653988, "loss": 0.0677, "theoretical_loss": 3.5713197648985844, "tokens_seen": 1377304576 }, { "epoch": 0.42, "learning_rate": 0.0005884288236238165, "loss": 0.0722, "theoretical_loss": 3.571259340634385, "tokens_seen": 1377566720 }, { "epoch": 0.42, "learning_rate": 0.000588348579682234, "loss": 0.0681, "theoretical_loss": 3.5711989310863874, "tokens_seen": 1377828864 }, { "epoch": 0.42, "learning_rate": 0.0005882683357406516, "loss": 0.0663, "theoretical_loss": 3.571138536248209, "tokens_seen": 1378091008 }, { "epoch": 0.42, "learning_rate": 0.0005881880917990692, "loss": 0.0715, "theoretical_loss": 3.57107815611347, "tokens_seen": 1378353152 }, { "epoch": 0.42, "learning_rate": 0.0005881078478574867, "loss": 0.0718, "theoretical_loss": 3.571017790675796, "tokens_seen": 1378615296 }, { "epoch": 0.42, "learning_rate": 0.0005880276039159043, "loss": 0.0712, "theoretical_loss": 3.570957439928815, "tokens_seen": 1378877440 }, { "epoch": 0.42, "learning_rate": 0.000587947359974322, "loss": 0.0708, "theoretical_loss": 3.5708971038661614, "tokens_seen": 1379139584 }, { "epoch": 0.42, "learning_rate": 0.0005878671160327396, "loss": 0.0693, "theoretical_loss": 3.5708367824814715, "tokens_seen": 1379401728 }, { "epoch": 0.42, "learning_rate": 0.0005877868720911571, "loss": 0.0706, "theoretical_loss": 3.570776475768386, "tokens_seen": 1379663872 }, { "epoch": 0.42, "learning_rate": 0.0005877066281495748, "loss": 0.0715, "theoretical_loss": 3.57071618372055, "tokens_seen": 1379926016 }, { "epoch": 0.42, "learning_rate": 0.0005876263842079923, "loss": 0.0719, "theoretical_loss": 3.570655906331612, "tokens_seen": 1380188160 }, { "epoch": 0.42, "learning_rate": 0.0005875461402664098, "loss": 0.0708, "theoretical_loss": 3.570595643595225, "tokens_seen": 1380450304 }, { "epoch": 0.42, "learning_rate": 0.0005874658963248275, "loss": 0.0709, "theoretical_loss": 3.570535395505045, "tokens_seen": 1380712448 }, { "epoch": 0.42, "learning_rate": 0.000587385652383245, "loss": 0.0682, "theoretical_loss": 3.570475162054734, "tokens_seen": 1380974592 }, { "epoch": 0.42, "learning_rate": 0.0005873054084416626, "loss": 0.0705, "theoretical_loss": 3.570414943237956, "tokens_seen": 1381236736 }, { "epoch": 0.42, "learning_rate": 0.0005872251645000803, "loss": 0.0705, "theoretical_loss": 3.570354739048379, "tokens_seen": 1381498880 }, { "epoch": 0.42, "learning_rate": 0.0005871449205584979, "loss": 0.0694, "theoretical_loss": 3.5702945494796765, "tokens_seen": 1381761024 }, { "epoch": 0.42, "learning_rate": 0.0005870646766169155, "loss": 0.0708, "theoretical_loss": 3.5702343745255236, "tokens_seen": 1382023168 }, { "epoch": 0.42, "learning_rate": 0.0005869844326753331, "loss": 0.0707, "theoretical_loss": 3.5701742141796022, "tokens_seen": 1382285312 }, { "epoch": 0.42, "learning_rate": 0.0005869041887337506, "loss": 0.0684, "theoretical_loss": 3.570114068435595, "tokens_seen": 1382547456 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.0004002986242994666, "objective/train/docs_used": 505055, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4260497093200684, "objective/train/original_loss": 1.4260497093200684, "objective/train/theoretical_loss": 3.570053937287192, "objective/train/tokens_used": 1403269600, "objective/train/value_avg": -0.01218414306640625, "objective/train/value_loss": 0.00020151071657892317, "objective/train/value_max": -9.244680404663086e-05, "objective/train/value_min": -0.576171875, "objective/train/value_reward_corr": 0.8434513792631848, "objective/train/value_std": 0.022979736328125, "objective/train/weight_avg": 1.0004942417144775, "objective/train/weighted_lm_loss": 1.4266375303268433, "objective/train/weights_max": 1.1901416778564453, "objective/train/weights_min": 0.3686957061290741, "theoretical_loss": 3.570053937287192, "tokens_seen": 1382809600 }, { "epoch": 0.42, "learning_rate": 0.0005868239447921682, "loss": 0.0716, "theoretical_loss": 3.570053937287192, "tokens_seen": 1382809600 }, { "epoch": 0.42, "learning_rate": 0.0005867437008505858, "loss": 0.07, "theoretical_loss": 3.569993820728084, "tokens_seen": 1383071744 }, { "epoch": 0.42, "learning_rate": 0.0005866634569090033, "loss": 0.0686, "theoretical_loss": 3.569933718751967, "tokens_seen": 1383333888 }, { "epoch": 0.42, "learning_rate": 0.000586583212967421, "loss": 0.0708, "theoretical_loss": 3.569873631352542, "tokens_seen": 1383596032 }, { "epoch": 0.42, "learning_rate": 0.0005865029690258386, "loss": 0.0705, "theoretical_loss": 3.5698135585235122, "tokens_seen": 1383858176 }, { "epoch": 0.42, "learning_rate": 0.0005864227250842562, "loss": 0.0684, "theoretical_loss": 3.5697535002585856, "tokens_seen": 1384120320 }, { "epoch": 0.42, "learning_rate": 0.0005863424811426738, "loss": 0.0703, "theoretical_loss": 3.569693456551474, "tokens_seen": 1384382464 }, { "epoch": 0.42, "learning_rate": 0.0005862622372010913, "loss": 0.0721, "theoretical_loss": 3.5696334273958925, "tokens_seen": 1384644608 }, { "epoch": 0.42, "learning_rate": 0.0005861819932595089, "loss": 0.0702, "theoretical_loss": 3.569573412785561, "tokens_seen": 1384906752 }, { "epoch": 0.42, "learning_rate": 0.0005861017493179265, "loss": 0.0719, "theoretical_loss": 3.569513412714203, "tokens_seen": 1385168896 }, { "epoch": 0.42, "learning_rate": 0.0005860215053763441, "loss": 0.073, "theoretical_loss": 3.569453427175546, "tokens_seen": 1385431040 }, { "epoch": 0.42, "learning_rate": 0.0005859412614347616, "loss": 0.0689, "theoretical_loss": 3.5693934561633203, "tokens_seen": 1385693184 }, { "epoch": 0.42, "learning_rate": 0.0005858610174931793, "loss": 0.0692, "theoretical_loss": 3.5693334996712625, "tokens_seen": 1385955328 }, { "epoch": 0.42, "learning_rate": 0.0005857807735515968, "loss": 0.0708, "theoretical_loss": 3.5692735576931103, "tokens_seen": 1386217472 }, { "epoch": 0.42, "learning_rate": 0.0005857005296100145, "loss": 0.0675, "theoretical_loss": 3.569213630222607, "tokens_seen": 1386479616 }, { "epoch": 0.42, "learning_rate": 0.0005856202856684321, "loss": 0.07, "theoretical_loss": 3.5691537172535, "tokens_seen": 1386741760 }, { "epoch": 0.42, "learning_rate": 0.0005855400417268496, "loss": 0.0676, "theoretical_loss": 3.569093818779539, "tokens_seen": 1387003904 }, { "epoch": 0.42, "learning_rate": 0.0005854597977852673, "loss": 0.0737, "theoretical_loss": 3.5690339347944784, "tokens_seen": 1387266048 }, { "epoch": 0.42, "learning_rate": 0.0005853795538436848, "loss": 0.0684, "theoretical_loss": 3.568974065292077, "tokens_seen": 1387528192 }, { "epoch": 0.42, "learning_rate": 0.0005852993099021024, "loss": 0.0723, "theoretical_loss": 3.5689142102660973, "tokens_seen": 1387790336 }, { "epoch": 0.42, "learning_rate": 0.00058521906596052, "loss": 0.0679, "theoretical_loss": 3.568854369710305, "tokens_seen": 1388052480 }, { "epoch": 0.42, "learning_rate": 0.0005851388220189375, "loss": 0.067, "theoretical_loss": 3.5687945436184703, "tokens_seen": 1388314624 }, { "epoch": 0.42, "learning_rate": 0.0005850585780773551, "loss": 0.0702, "theoretical_loss": 3.5687347319843665, "tokens_seen": 1388576768 }, { "epoch": 0.42, "learning_rate": 0.0005849783341357728, "loss": 0.0707, "theoretical_loss": 3.5686749348017726, "tokens_seen": 1388838912 }, { "epoch": 0.42, "learning_rate": 0.0005848980901941904, "loss": 0.0717, "theoretical_loss": 3.5686151520644684, "tokens_seen": 1389101056 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.0012033990351483226, "objective/train/docs_used": 507487, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3310184478759766, "objective/train/original_loss": 1.3310184478759766, "objective/train/theoretical_loss": 3.56855538376624, "objective/train/tokens_used": 1409823200, "objective/train/value_avg": -0.006999969482421875, "objective/train/value_loss": 9.323181438958272e-05, "objective/train/value_max": -7.599592208862305e-05, "objective/train/value_min": -0.252685546875, "objective/train/value_reward_corr": 0.81285075839567, "objective/train/value_std": 0.01432037353515625, "objective/train/weight_avg": 1.0012476444244385, "objective/train/weighted_lm_loss": 1.3323184251785278, "objective/train/weights_max": 1.1576061248779297, "objective/train/weights_min": 0.3713989555835724, "theoretical_loss": 3.56855538376624, "tokens_seen": 1389363200 }, { "epoch": 0.42, "learning_rate": 0.0005848178462526079, "loss": 0.0704, "theoretical_loss": 3.56855538376624, "tokens_seen": 1389363200 }, { "epoch": 0.42, "learning_rate": 0.0005847376023110256, "loss": 0.0683, "theoretical_loss": 3.568495629900877, "tokens_seen": 1389625344 }, { "epoch": 0.42, "learning_rate": 0.0005846573583694431, "loss": 0.0729, "theoretical_loss": 3.5684358904621725, "tokens_seen": 1389887488 }, { "epoch": 0.42, "learning_rate": 0.0005845771144278606, "loss": 0.0675, "theoretical_loss": 3.5683761654439223, "tokens_seen": 1390149632 }, { "epoch": 0.42, "learning_rate": 0.0005844968704862783, "loss": 0.067, "theoretical_loss": 3.5683164548399287, "tokens_seen": 1390411776 }, { "epoch": 0.42, "learning_rate": 0.0005844166265446958, "loss": 0.0721, "theoretical_loss": 3.568256758643995, "tokens_seen": 1390673920 }, { "epoch": 0.42, "learning_rate": 0.0005843363826031135, "loss": 0.0669, "theoretical_loss": 3.5681970768499305, "tokens_seen": 1390936064 }, { "epoch": 0.42, "learning_rate": 0.000584256138661531, "loss": 0.0683, "theoretical_loss": 3.5681374094515466, "tokens_seen": 1391198208 }, { "epoch": 0.42, "learning_rate": 0.0005841758947199487, "loss": 0.0698, "theoretical_loss": 3.5680777564426602, "tokens_seen": 1391460352 }, { "epoch": 0.42, "learning_rate": 0.0005840956507783663, "loss": 0.0667, "theoretical_loss": 3.56801811781709, "tokens_seen": 1391722496 }, { "epoch": 0.42, "learning_rate": 0.0005840154068367838, "loss": 0.0687, "theoretical_loss": 3.5679584935686615, "tokens_seen": 1391984640 }, { "epoch": 0.42, "learning_rate": 0.0005839351628952014, "loss": 0.068, "theoretical_loss": 3.5678988836912007, "tokens_seen": 1392246784 }, { "epoch": 0.42, "learning_rate": 0.000583854918953619, "loss": 0.0691, "theoretical_loss": 3.567839288178539, "tokens_seen": 1392508928 }, { "epoch": 0.42, "learning_rate": 0.0005837746750120366, "loss": 0.0703, "theoretical_loss": 3.5677797070245125, "tokens_seen": 1392771072 }, { "epoch": 0.42, "learning_rate": 0.0005836944310704541, "loss": 0.0688, "theoretical_loss": 3.567720140222959, "tokens_seen": 1393033216 }, { "epoch": 0.42, "learning_rate": 0.0005836141871288718, "loss": 0.0713, "theoretical_loss": 3.567660587767722, "tokens_seen": 1393295360 }, { "epoch": 0.42, "learning_rate": 0.0005835339431872894, "loss": 0.0693, "theoretical_loss": 3.567601049652648, "tokens_seen": 1393557504 }, { "epoch": 0.42, "learning_rate": 0.000583453699245707, "loss": 0.0689, "theoretical_loss": 3.567541525871587, "tokens_seen": 1393819648 }, { "epoch": 0.42, "learning_rate": 0.0005833734553041246, "loss": 0.0691, "theoretical_loss": 3.5674820164183934, "tokens_seen": 1394081792 }, { "epoch": 0.42, "learning_rate": 0.0005832932113625421, "loss": 0.0717, "theoretical_loss": 3.567422521286925, "tokens_seen": 1394343936 }, { "epoch": 0.42, "learning_rate": 0.0005832129674209598, "loss": 0.071, "theoretical_loss": 3.5673630404710432, "tokens_seen": 1394606080 }, { "epoch": 0.42, "learning_rate": 0.0005831327234793773, "loss": 0.0704, "theoretical_loss": 3.567303573964614, "tokens_seen": 1394868224 }, { "epoch": 0.42, "learning_rate": 0.0005830524795377949, "loss": 0.0704, "theoretical_loss": 3.5672441217615063, "tokens_seen": 1395130368 }, { "epoch": 0.42, "learning_rate": 0.0005829722355962125, "loss": 0.0732, "theoretical_loss": 3.5671846838555936, "tokens_seen": 1395392512 }, { "epoch": 0.42, "learning_rate": 0.0005828919916546301, "loss": 0.0706, "theoretical_loss": 3.567125260240752, "tokens_seen": 1395654656 }, { "epoch": 0.42, "objective/train/advantage_avg": -0.0006299561937339604, "objective/train/docs_used": 509897, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3458755016326904, "objective/train/original_loss": 1.3458755016326904, "objective/train/theoretical_loss": 3.5670658509108626, "objective/train/tokens_used": 1416376800, "objective/train/value_avg": -0.006847381591796875, "objective/train/value_loss": 0.0002548126794863492, "objective/train/value_max": -9.763240814208984e-05, "objective/train/value_min": -0.53857421875, "objective/train/value_reward_corr": 0.7935790469163092, "objective/train/value_std": 0.016387939453125, "objective/train/weight_avg": 0.9994886517524719, "objective/train/weighted_lm_loss": 1.345136046409607, "objective/train/weights_max": 1.3395164012908936, "objective/train/weights_min": 0.3683066666126251, "theoretical_loss": 3.5670658509108626, "tokens_seen": 1395916800 }, { "epoch": 0.42, "learning_rate": 0.0005828117477130476, "loss": 0.0683, "theoretical_loss": 3.5670658509108626, "tokens_seen": 1395916800 }, { "epoch": 0.42, "learning_rate": 0.0005827315037714653, "loss": 0.0726, "theoretical_loss": 3.5670064558598096, "tokens_seen": 1396178944 }, { "epoch": 0.42, "learning_rate": 0.0005826512598298829, "loss": 0.0703, "theoretical_loss": 3.5669470750814813, "tokens_seen": 1396441088 }, { "epoch": 0.42, "learning_rate": 0.0005825710158883004, "loss": 0.0712, "theoretical_loss": 3.5668877085697694, "tokens_seen": 1396703232 }, { "epoch": 0.42, "learning_rate": 0.0005824907719467181, "loss": 0.073, "theoretical_loss": 3.5668283563185703, "tokens_seen": 1396965376 }, { "epoch": 0.42, "learning_rate": 0.0005824105280051356, "loss": 0.0695, "theoretical_loss": 3.566769018321782, "tokens_seen": 1397227520 }, { "epoch": 0.42, "learning_rate": 0.0005823302840635532, "loss": 0.0731, "theoretical_loss": 3.5667096945733086, "tokens_seen": 1397489664 }, { "epoch": 0.42, "learning_rate": 0.0005822500401219708, "loss": 0.0695, "theoretical_loss": 3.566650385067057, "tokens_seen": 1397751808 }, { "epoch": 0.42, "learning_rate": 0.0005821697961803883, "loss": 0.0699, "theoretical_loss": 3.5665910897969377, "tokens_seen": 1398013952 }, { "epoch": 0.42, "learning_rate": 0.0005820895522388059, "loss": 0.0724, "theoretical_loss": 3.5665318087568645, "tokens_seen": 1398276096 }, { "epoch": 0.42, "learning_rate": 0.0005820093082972236, "loss": 0.071, "theoretical_loss": 3.5664725419407564, "tokens_seen": 1398538240 }, { "epoch": 0.42, "learning_rate": 0.0005819290643556412, "loss": 0.0704, "theoretical_loss": 3.566413289342535, "tokens_seen": 1398800384 }, { "epoch": 0.42, "learning_rate": 0.0005818488204140588, "loss": 0.0702, "theoretical_loss": 3.566354050956126, "tokens_seen": 1399062528 }, { "epoch": 0.42, "learning_rate": 0.0005817685764724764, "loss": 0.0721, "theoretical_loss": 3.566294826775459, "tokens_seen": 1399324672 }, { "epoch": 0.42, "learning_rate": 0.0005816883325308939, "loss": 0.0683, "theoretical_loss": 3.566235616794466, "tokens_seen": 1399586816 }, { "epoch": 0.42, "learning_rate": 0.0005816080885893115, "loss": 0.0697, "theoretical_loss": 3.566176421007085, "tokens_seen": 1399848960 }, { "epoch": 0.42, "learning_rate": 0.0005815278446477291, "loss": 0.0691, "theoretical_loss": 3.566117239407256, "tokens_seen": 1400111104 }, { "epoch": 0.42, "learning_rate": 0.0005814476007061466, "loss": 0.0718, "theoretical_loss": 3.5660580719889237, "tokens_seen": 1400373248 }, { "epoch": 0.42, "learning_rate": 0.0005813673567645643, "loss": 0.0727, "theoretical_loss": 3.5659989187460353, "tokens_seen": 1400635392 }, { "epoch": 0.42, "learning_rate": 0.0005812871128229819, "loss": 0.0741, "theoretical_loss": 3.5659397796725427, "tokens_seen": 1400897536 }, { "epoch": 0.42, "learning_rate": 0.0005812068688813995, "loss": 0.067, "theoretical_loss": 3.565880654762402, "tokens_seen": 1401159680 }, { "epoch": 0.42, "learning_rate": 0.0005811266249398171, "loss": 0.0684, "theoretical_loss": 3.5658215440095717, "tokens_seen": 1401421824 }, { "epoch": 0.42, "learning_rate": 0.0005810463809982346, "loss": 0.0698, "theoretical_loss": 3.565762447408015, "tokens_seen": 1401683968 }, { "epoch": 0.42, "learning_rate": 0.0005809661370566522, "loss": 0.0722, "theoretical_loss": 3.5657033649516974, "tokens_seen": 1401946112 }, { "epoch": 0.42, "learning_rate": 0.0005808858931150698, "loss": 0.0693, "theoretical_loss": 3.5656442966345905, "tokens_seen": 1402208256 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.00029971322510391474, "objective/train/docs_used": 512300, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5333430767059326, "objective/train/original_loss": 1.5333430767059326, "objective/train/theoretical_loss": 3.565585242450667, "objective/train/tokens_used": 1422930400, "objective/train/value_avg": -0.00736236572265625, "objective/train/value_loss": 0.00021842159912921488, "objective/train/value_max": -7.033348083496094e-05, "objective/train/value_min": -0.68017578125, "objective/train/value_reward_corr": 0.717017729068119, "objective/train/value_std": 0.01406097412109375, "objective/train/weight_avg": 1.0004018545150757, "objective/train/weighted_lm_loss": 1.5342278480529785, "objective/train/weights_max": 1.4047281742095947, "objective/train/weights_min": 0.3692037761211395, "theoretical_loss": 3.565585242450667, "tokens_seen": 1402470400 }, { "epoch": 0.43, "learning_rate": 0.0005808056491734874, "loss": 0.0744, "theoretical_loss": 3.565585242450667, "tokens_seen": 1402470400 }, { "epoch": 0.43, "learning_rate": 0.000580725405231905, "loss": 0.0715, "theoretical_loss": 3.5655262023939054, "tokens_seen": 1402732544 }, { "epoch": 0.43, "learning_rate": 0.0005806451612903226, "loss": 0.0688, "theoretical_loss": 3.5654671764582866, "tokens_seen": 1402994688 }, { "epoch": 0.43, "learning_rate": 0.0005805649173487401, "loss": 0.0703, "theoretical_loss": 3.5654081646377955, "tokens_seen": 1403256832 }, { "epoch": 0.43, "learning_rate": 0.0005804846734071579, "loss": 0.0705, "theoretical_loss": 3.5653491669264215, "tokens_seen": 1403518976 }, { "epoch": 0.43, "learning_rate": 0.0005804044294655754, "loss": 0.0709, "theoretical_loss": 3.565290183318156, "tokens_seen": 1403781120 }, { "epoch": 0.43, "learning_rate": 0.0005803241855239929, "loss": 0.0706, "theoretical_loss": 3.565231213806995, "tokens_seen": 1404043264 }, { "epoch": 0.43, "learning_rate": 0.0005802439415824106, "loss": 0.0692, "theoretical_loss": 3.5651722583869394, "tokens_seen": 1404305408 }, { "epoch": 0.43, "learning_rate": 0.0005801636976408281, "loss": 0.0723, "theoretical_loss": 3.565113317051991, "tokens_seen": 1404567552 }, { "epoch": 0.43, "learning_rate": 0.0005800834536992457, "loss": 0.07, "theoretical_loss": 3.5650543897961584, "tokens_seen": 1404829696 }, { "epoch": 0.43, "learning_rate": 0.0005800032097576633, "loss": 0.0722, "theoretical_loss": 3.5649954766134515, "tokens_seen": 1405091840 }, { "epoch": 0.43, "learning_rate": 0.0005799229658160809, "loss": 0.0725, "theoretical_loss": 3.5649365774978845, "tokens_seen": 1405353984 }, { "epoch": 0.43, "learning_rate": 0.0005798427218744984, "loss": 0.0698, "theoretical_loss": 3.5648776924434755, "tokens_seen": 1405616128 }, { "epoch": 0.43, "learning_rate": 0.0005797624779329161, "loss": 0.0725, "theoretical_loss": 3.5648188214442467, "tokens_seen": 1405878272 }, { "epoch": 0.43, "learning_rate": 0.0005796822339913337, "loss": 0.0734, "theoretical_loss": 3.5647599644942227, "tokens_seen": 1406140416 }, { "epoch": 0.43, "learning_rate": 0.0005796019900497512, "loss": 0.0713, "theoretical_loss": 3.564701121587434, "tokens_seen": 1406402560 }, { "epoch": 0.43, "learning_rate": 0.0005795217461081689, "loss": 0.067, "theoretical_loss": 3.5646422927179113, "tokens_seen": 1406664704 }, { "epoch": 0.43, "learning_rate": 0.0005794415021665864, "loss": 0.0693, "theoretical_loss": 3.564583477879692, "tokens_seen": 1406926848 }, { "epoch": 0.43, "learning_rate": 0.0005793612582250041, "loss": 0.0707, "theoretical_loss": 3.5645246770668164, "tokens_seen": 1407188992 }, { "epoch": 0.43, "learning_rate": 0.0005792810142834216, "loss": 0.0717, "theoretical_loss": 3.5644658902733273, "tokens_seen": 1407451136 }, { "epoch": 0.43, "learning_rate": 0.0005792007703418391, "loss": 0.0714, "theoretical_loss": 3.564407117493272, "tokens_seen": 1407713280 }, { "epoch": 0.43, "learning_rate": 0.0005791205264002569, "loss": 0.0696, "theoretical_loss": 3.564348358720702, "tokens_seen": 1407975424 }, { "epoch": 0.43, "learning_rate": 0.0005790402824586744, "loss": 0.0734, "theoretical_loss": 3.564289613949671, "tokens_seen": 1408237568 }, { "epoch": 0.43, "learning_rate": 0.000578960038517092, "loss": 0.0715, "theoretical_loss": 3.5642308831742384, "tokens_seen": 1408499712 }, { "epoch": 0.43, "learning_rate": 0.0005788797945755096, "loss": 0.0707, "theoretical_loss": 3.564172166388465, "tokens_seen": 1408761856 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.0017313800053671002, "objective/train/docs_used": 514827, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5287766456604004, "objective/train/original_loss": 1.5287766456604004, "objective/train/theoretical_loss": 3.5641134635864153, "objective/train/tokens_used": 1429484000, "objective/train/value_avg": -0.0082550048828125, "objective/train/value_loss": 0.00014103070134297013, "objective/train/value_max": -6.157159805297852e-05, "objective/train/value_min": -0.66455078125, "objective/train/value_reward_corr": 0.7031854825152273, "objective/train/value_std": 0.01306915283203125, "objective/train/weight_avg": 1.0018001794815063, "objective/train/weighted_lm_loss": 1.531276822090149, "objective/train/weights_max": 1.2216413021087646, "objective/train/weights_min": 0.5578534603118896, "theoretical_loss": 3.5641134635864153, "tokens_seen": 1409024000 }, { "epoch": 0.43, "learning_rate": 0.0005787995506339272, "loss": 0.0709, "theoretical_loss": 3.5641134635864153, "tokens_seen": 1409024000 }, { "epoch": 0.43, "learning_rate": 0.0005787193066923447, "loss": 0.0692, "theoretical_loss": 3.56405477476216, "tokens_seen": 1409286144 }, { "epoch": 0.43, "learning_rate": 0.0005786390627507623, "loss": 0.0711, "theoretical_loss": 3.563996099909771, "tokens_seen": 1409548288 }, { "epoch": 0.43, "learning_rate": 0.0005785588188091799, "loss": 0.0728, "theoretical_loss": 3.5639374390233245, "tokens_seen": 1409810432 }, { "epoch": 0.43, "learning_rate": 0.0005784785748675974, "loss": 0.0705, "theoretical_loss": 3.563878792096901, "tokens_seen": 1410072576 }, { "epoch": 0.43, "learning_rate": 0.0005783983309260151, "loss": 0.0707, "theoretical_loss": 3.5638201591245826, "tokens_seen": 1410334720 }, { "epoch": 0.43, "learning_rate": 0.0005783180869844327, "loss": 0.0696, "theoretical_loss": 3.563761540100457, "tokens_seen": 1410596864 }, { "epoch": 0.43, "learning_rate": 0.0005782378430428504, "loss": 0.0723, "theoretical_loss": 3.5637029350186156, "tokens_seen": 1410859008 }, { "epoch": 0.43, "learning_rate": 0.0005781575991012679, "loss": 0.0715, "theoretical_loss": 3.563644343873152, "tokens_seen": 1411121152 }, { "epoch": 0.43, "learning_rate": 0.0005780773551596854, "loss": 0.0741, "theoretical_loss": 3.5635857666581643, "tokens_seen": 1411383296 }, { "epoch": 0.43, "learning_rate": 0.0005779971112181031, "loss": 0.0722, "theoretical_loss": 3.5635272033677534, "tokens_seen": 1411645440 }, { "epoch": 0.43, "learning_rate": 0.0005779168672765206, "loss": 0.0734, "theoretical_loss": 3.5634686539960247, "tokens_seen": 1411907584 }, { "epoch": 0.43, "learning_rate": 0.0005778366233349382, "loss": 0.0723, "theoretical_loss": 3.5634101185370874, "tokens_seen": 1412169728 }, { "epoch": 0.43, "learning_rate": 0.0005777563793933558, "loss": 0.0709, "theoretical_loss": 3.5633515969850533, "tokens_seen": 1412431872 }, { "epoch": 0.43, "learning_rate": 0.0005776761354517734, "loss": 0.0711, "theoretical_loss": 3.5632930893340378, "tokens_seen": 1412694016 }, { "epoch": 0.43, "learning_rate": 0.000577595891510191, "loss": 0.0722, "theoretical_loss": 3.5632345955781606, "tokens_seen": 1412956160 }, { "epoch": 0.43, "learning_rate": 0.0005775156475686087, "loss": 0.0697, "theoretical_loss": 3.5631761157115456, "tokens_seen": 1413218304 }, { "epoch": 0.43, "learning_rate": 0.0005774354036270262, "loss": 0.0694, "theoretical_loss": 3.5631176497283175, "tokens_seen": 1413480448 }, { "epoch": 0.43, "learning_rate": 0.0005773551596854437, "loss": 0.0722, "theoretical_loss": 3.563059197622608, "tokens_seen": 1413742592 }, { "epoch": 0.43, "learning_rate": 0.0005772749157438614, "loss": 0.0722, "theoretical_loss": 3.56300075938855, "tokens_seen": 1414004736 }, { "epoch": 0.43, "learning_rate": 0.0005771946718022789, "loss": 0.072, "theoretical_loss": 3.5629423350202813, "tokens_seen": 1414266880 }, { "epoch": 0.43, "learning_rate": 0.0005771144278606965, "loss": 0.0708, "theoretical_loss": 3.5628839245119424, "tokens_seen": 1414529024 }, { "epoch": 0.43, "learning_rate": 0.0005770341839191141, "loss": 0.0734, "theoretical_loss": 3.5628255278576777, "tokens_seen": 1414791168 }, { "epoch": 0.43, "learning_rate": 0.0005769539399775317, "loss": 0.0721, "theoretical_loss": 3.5627671450516347, "tokens_seen": 1415053312 }, { "epoch": 0.43, "learning_rate": 0.0005768736960359494, "loss": 0.0721, "theoretical_loss": 3.5627087760879657, "tokens_seen": 1415315456 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.000985055579803884, "objective/train/docs_used": 517252, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4529213905334473, "objective/train/original_loss": 1.4529212713241577, "objective/train/theoretical_loss": 3.5626504209608254, "objective/train/tokens_used": 1436037600, "objective/train/value_avg": -0.006443023681640625, "objective/train/value_loss": 0.00012476177653297782, "objective/train/value_max": -7.033348083496094e-05, "objective/train/value_min": -0.61279296875, "objective/train/value_reward_corr": 0.6847537745656294, "objective/train/value_std": 0.01068115234375, "objective/train/weight_avg": 1.0010443925857544, "objective/train/weighted_lm_loss": 1.4547419548034668, "objective/train/weights_max": 1.409537672996521, "objective/train/weights_min": 0.3798121213912964, "theoretical_loss": 3.5626504209608254, "tokens_seen": 1415577600 }, { "epoch": 0.43, "learning_rate": 0.0005767934520943669, "loss": 0.0717, "theoretical_loss": 3.5626504209608254, "tokens_seen": 1415577600 }, { "epoch": 0.43, "learning_rate": 0.0005767132081527845, "loss": 0.0676, "theoretical_loss": 3.5625920796643724, "tokens_seen": 1415839744 }, { "epoch": 0.43, "learning_rate": 0.0005766329642112021, "loss": 0.074, "theoretical_loss": 3.5625337521927687, "tokens_seen": 1416101888 }, { "epoch": 0.43, "learning_rate": 0.0005765527202696197, "loss": 0.0724, "theoretical_loss": 3.5624754385401802, "tokens_seen": 1416364032 }, { "epoch": 0.43, "learning_rate": 0.0005764724763280372, "loss": 0.0704, "theoretical_loss": 3.562417138700776, "tokens_seen": 1416626176 }, { "epoch": 0.43, "learning_rate": 0.0005763922323864549, "loss": 0.0727, "theoretical_loss": 3.5623588526687295, "tokens_seen": 1416888320 }, { "epoch": 0.43, "learning_rate": 0.0005763119884448724, "loss": 0.0682, "theoretical_loss": 3.562300580438216, "tokens_seen": 1417150464 }, { "epoch": 0.43, "learning_rate": 0.0005762317445032899, "loss": 0.073, "theoretical_loss": 3.562242322003416, "tokens_seen": 1417412608 }, { "epoch": 0.43, "learning_rate": 0.0005761515005617077, "loss": 0.0713, "theoretical_loss": 3.5621840773585127, "tokens_seen": 1417674752 }, { "epoch": 0.43, "learning_rate": 0.0005760712566201252, "loss": 0.0694, "theoretical_loss": 3.5621258464976924, "tokens_seen": 1417936896 }, { "epoch": 0.43, "learning_rate": 0.0005759910126785428, "loss": 0.0689, "theoretical_loss": 3.5620676294151465, "tokens_seen": 1418199040 }, { "epoch": 0.43, "learning_rate": 0.0005759107687369604, "loss": 0.0696, "theoretical_loss": 3.562009426105069, "tokens_seen": 1418461184 }, { "epoch": 0.43, "learning_rate": 0.000575830524795378, "loss": 0.0714, "theoretical_loss": 3.561951236561656, "tokens_seen": 1418723328 }, { "epoch": 0.43, "learning_rate": 0.0005757502808537955, "loss": 0.0727, "theoretical_loss": 3.56189306077911, "tokens_seen": 1418985472 }, { "epoch": 0.43, "learning_rate": 0.0005756700369122131, "loss": 0.072, "theoretical_loss": 3.561834898751635, "tokens_seen": 1419247616 }, { "epoch": 0.43, "learning_rate": 0.0005755897929706307, "loss": 0.0709, "theoretical_loss": 3.561776750473439, "tokens_seen": 1419509760 }, { "epoch": 0.43, "learning_rate": 0.0005755095490290483, "loss": 0.0729, "theoretical_loss": 3.561718615938733, "tokens_seen": 1419771904 }, { "epoch": 0.43, "learning_rate": 0.000575429305087466, "loss": 0.0705, "theoretical_loss": 3.5616604951417328, "tokens_seen": 1420034048 }, { "epoch": 0.43, "learning_rate": 0.0005753490611458835, "loss": 0.0707, "theoretical_loss": 3.561602388076656, "tokens_seen": 1420296192 }, { "epoch": 0.43, "learning_rate": 0.0005752688172043012, "loss": 0.0697, "theoretical_loss": 3.5615442947377254, "tokens_seen": 1420558336 }, { "epoch": 0.43, "learning_rate": 0.0005751885732627187, "loss": 0.0722, "theoretical_loss": 3.5614862151191664, "tokens_seen": 1420820480 }, { "epoch": 0.43, "learning_rate": 0.0005751083293211362, "loss": 0.0719, "theoretical_loss": 3.561428149215208, "tokens_seen": 1421082624 }, { "epoch": 0.43, "learning_rate": 0.0005750280853795539, "loss": 0.0695, "theoretical_loss": 3.561370097020083, "tokens_seen": 1421344768 }, { "epoch": 0.43, "learning_rate": 0.0005749478414379714, "loss": 0.0712, "theoretical_loss": 3.561312058528026, "tokens_seen": 1421606912 }, { "epoch": 0.43, "learning_rate": 0.000574867597496389, "loss": 0.0708, "theoretical_loss": 3.5612540337332783, "tokens_seen": 1421869056 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.0007025928935036063, "objective/train/docs_used": 519513, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4979246854782104, "objective/train/original_loss": 1.497924566268921, "objective/train/theoretical_loss": 3.5611960226300816, "objective/train/tokens_used": 1442591200, "objective/train/value_avg": -0.007648468017578125, "objective/train/value_loss": 0.00016468969988636672, "objective/train/value_max": -7.545948028564453e-05, "objective/train/value_min": -0.35791015625, "objective/train/value_reward_corr": 0.7218915626636218, "objective/train/value_std": 0.01317596435546875, "objective/train/weight_avg": 1.0007803440093994, "objective/train/weighted_lm_loss": 1.4997540712356567, "objective/train/weights_max": 1.2718398571014404, "objective/train/weights_min": 0.3820907473564148, "theoretical_loss": 3.5611960226300816, "tokens_seen": 1422131200 }, { "epoch": 0.43, "learning_rate": 0.0005747873535548066, "loss": 0.0722, "theoretical_loss": 3.5611960226300816, "tokens_seen": 1422131200 }, { "epoch": 0.43, "learning_rate": 0.0005747071096132242, "loss": 0.0712, "theoretical_loss": 3.561138025212683, "tokens_seen": 1422393344 }, { "epoch": 0.43, "learning_rate": 0.0005746268656716417, "loss": 0.07, "theoretical_loss": 3.561080041475332, "tokens_seen": 1422655488 }, { "epoch": 0.43, "learning_rate": 0.0005745466217300594, "loss": 0.0693, "theoretical_loss": 3.5610220714122827, "tokens_seen": 1422917632 }, { "epoch": 0.43, "learning_rate": 0.000574466377788477, "loss": 0.0698, "theoretical_loss": 3.560964115017791, "tokens_seen": 1423179776 }, { "epoch": 0.43, "learning_rate": 0.0005743861338468946, "loss": 0.07, "theoretical_loss": 3.560906172286118, "tokens_seen": 1423441920 }, { "epoch": 0.43, "learning_rate": 0.0005743058899053122, "loss": 0.0703, "theoretical_loss": 3.5608482432115265, "tokens_seen": 1423704064 }, { "epoch": 0.43, "learning_rate": 0.0005742256459637297, "loss": 0.0699, "theoretical_loss": 3.5607903277882853, "tokens_seen": 1423966208 }, { "epoch": 0.43, "learning_rate": 0.0005741454020221474, "loss": 0.0712, "theoretical_loss": 3.560732426010664, "tokens_seen": 1424228352 }, { "epoch": 0.43, "learning_rate": 0.0005740651580805649, "loss": 0.0726, "theoretical_loss": 3.560674537872937, "tokens_seen": 1424490496 }, { "epoch": 0.43, "learning_rate": 0.0005739849141389825, "loss": 0.0693, "theoretical_loss": 3.560616663369382, "tokens_seen": 1424752640 }, { "epoch": 0.43, "learning_rate": 0.0005739046701974002, "loss": 0.0704, "theoretical_loss": 3.5605588024942803, "tokens_seen": 1425014784 }, { "epoch": 0.43, "learning_rate": 0.0005738244262558177, "loss": 0.0706, "theoretical_loss": 3.560500955241916, "tokens_seen": 1425276928 }, { "epoch": 0.43, "learning_rate": 0.0005737441823142353, "loss": 0.0714, "theoretical_loss": 3.5604431216065775, "tokens_seen": 1425539072 }, { "epoch": 0.43, "learning_rate": 0.0005736639383726529, "loss": 0.0704, "theoretical_loss": 3.5603853015825564, "tokens_seen": 1425801216 }, { "epoch": 0.43, "learning_rate": 0.0005735836944310705, "loss": 0.0693, "theoretical_loss": 3.560327495164147, "tokens_seen": 1426063360 }, { "epoch": 0.43, "learning_rate": 0.000573503450489488, "loss": 0.0681, "theoretical_loss": 3.5602697023456473, "tokens_seen": 1426325504 }, { "epoch": 0.43, "learning_rate": 0.0005734232065479057, "loss": 0.0706, "theoretical_loss": 3.5602119231213605, "tokens_seen": 1426587648 }, { "epoch": 0.43, "learning_rate": 0.0005733429626063232, "loss": 0.0703, "theoretical_loss": 3.5601541574855906, "tokens_seen": 1426849792 }, { "epoch": 0.43, "learning_rate": 0.0005732627186647407, "loss": 0.071, "theoretical_loss": 3.5600964054326463, "tokens_seen": 1427111936 }, { "epoch": 0.43, "learning_rate": 0.0005731824747231584, "loss": 0.0721, "theoretical_loss": 3.5600386669568405, "tokens_seen": 1427374080 }, { "epoch": 0.43, "learning_rate": 0.000573102230781576, "loss": 0.0753, "theoretical_loss": 3.559980942052488, "tokens_seen": 1427636224 }, { "epoch": 0.43, "learning_rate": 0.0005730219868399937, "loss": 0.0698, "theoretical_loss": 3.559923230713907, "tokens_seen": 1427898368 }, { "epoch": 0.43, "learning_rate": 0.0005729417428984112, "loss": 0.0702, "theoretical_loss": 3.5598655329354214, "tokens_seen": 1428160512 }, { "epoch": 0.43, "learning_rate": 0.0005728614989568288, "loss": 0.0721, "theoretical_loss": 3.5598078487113556, "tokens_seen": 1428422656 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.0015544474590569735, "objective/train/docs_used": 521864, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4286561012268066, "objective/train/original_loss": 1.4286558628082275, "objective/train/theoretical_loss": 3.55975017803604, "objective/train/tokens_used": 1449144800, "objective/train/value_avg": -0.00952911376953125, "objective/train/value_loss": 0.0003103648195974529, "objective/train/value_max": -4.83393669128418e-05, "objective/train/value_min": -0.6337890625, "objective/train/value_reward_corr": 0.6740575950147936, "objective/train/value_std": 0.0167236328125, "objective/train/weight_avg": 1.0016885995864868, "objective/train/weighted_lm_loss": 1.430799126625061, "objective/train/weights_max": 1.1823221445083618, "objective/train/weights_min": 0.3692566156387329, "theoretical_loss": 3.55975017803604, "tokens_seen": 1428684800 }, { "epoch": 0.43, "learning_rate": 0.0005727812550152464, "loss": 0.0732, "theoretical_loss": 3.55975017803604, "tokens_seen": 1428684800 }, { "epoch": 0.43, "learning_rate": 0.0005727010110736639, "loss": 0.0752, "theoretical_loss": 3.5596925209038055, "tokens_seen": 1428946944 }, { "epoch": 0.43, "learning_rate": 0.0005726207671320815, "loss": 0.072, "theoretical_loss": 3.55963487730899, "tokens_seen": 1429209088 }, { "epoch": 0.43, "learning_rate": 0.0005725405231904991, "loss": 0.0698, "theoretical_loss": 3.5595772472459313, "tokens_seen": 1429471232 }, { "epoch": 0.43, "learning_rate": 0.0005724602792489167, "loss": 0.0707, "theoretical_loss": 3.559519630708973, "tokens_seen": 1429733376 }, { "epoch": 0.43, "learning_rate": 0.0005723800353073343, "loss": 0.0711, "theoretical_loss": 3.5594620276924607, "tokens_seen": 1429995520 }, { "epoch": 0.43, "learning_rate": 0.000572299791365752, "loss": 0.0703, "theoretical_loss": 3.559404438190745, "tokens_seen": 1430257664 }, { "epoch": 0.43, "learning_rate": 0.0005722195474241695, "loss": 0.0706, "theoretical_loss": 3.559346862198178, "tokens_seen": 1430519808 }, { "epoch": 0.43, "learning_rate": 0.000572139303482587, "loss": 0.0717, "theoretical_loss": 3.559289299709116, "tokens_seen": 1430781952 }, { "epoch": 0.43, "learning_rate": 0.0005720590595410047, "loss": 0.0709, "theoretical_loss": 3.5592317507179194, "tokens_seen": 1431044096 }, { "epoch": 0.43, "learning_rate": 0.0005719788155994222, "loss": 0.072, "theoretical_loss": 3.5591742152189507, "tokens_seen": 1431306240 }, { "epoch": 0.43, "learning_rate": 0.0005718985716578399, "loss": 0.0694, "theoretical_loss": 3.559116693206577, "tokens_seen": 1431568384 }, { "epoch": 0.43, "learning_rate": 0.0005718183277162574, "loss": 0.0735, "theoretical_loss": 3.5590591846751685, "tokens_seen": 1431830528 }, { "epoch": 0.43, "learning_rate": 0.000571738083774675, "loss": 0.07, "theoretical_loss": 3.5590016896190977, "tokens_seen": 1432092672 }, { "epoch": 0.43, "learning_rate": 0.0005716578398330927, "loss": 0.07, "theoretical_loss": 3.5589442080327416, "tokens_seen": 1432354816 }, { "epoch": 0.43, "learning_rate": 0.0005715775958915102, "loss": 0.071, "theoretical_loss": 3.5588867399104798, "tokens_seen": 1432616960 }, { "epoch": 0.43, "learning_rate": 0.0005714973519499278, "loss": 0.0718, "theoretical_loss": 3.5588292852466967, "tokens_seen": 1432879104 }, { "epoch": 0.43, "learning_rate": 0.0005714171080083454, "loss": 0.0671, "theoretical_loss": 3.558771844035779, "tokens_seen": 1433141248 }, { "epoch": 0.43, "learning_rate": 0.000571336864066763, "loss": 0.0681, "theoretical_loss": 3.5587144162721156, "tokens_seen": 1433403392 }, { "epoch": 0.43, "learning_rate": 0.0005712566201251805, "loss": 0.0687, "theoretical_loss": 3.5586570019501016, "tokens_seen": 1433665536 }, { "epoch": 0.43, "learning_rate": 0.0005711763761835982, "loss": 0.0737, "theoretical_loss": 3.558599601064133, "tokens_seen": 1433927680 }, { "epoch": 0.43, "learning_rate": 0.0005710961322420157, "loss": 0.072, "theoretical_loss": 3.5585422136086104, "tokens_seen": 1434189824 }, { "epoch": 0.43, "learning_rate": 0.0005710158883004333, "loss": 0.0719, "theoretical_loss": 3.558484839577937, "tokens_seen": 1434451968 }, { "epoch": 0.43, "learning_rate": 0.000570935644358851, "loss": 0.0711, "theoretical_loss": 3.55842747896652, "tokens_seen": 1434714112 }, { "epoch": 0.43, "learning_rate": 0.0005708554004172685, "loss": 0.0678, "theoretical_loss": 3.55837013176877, "tokens_seen": 1434976256 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.001079781330190599, "objective/train/docs_used": 524207, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4617466926574707, "objective/train/original_loss": 1.4617464542388916, "objective/train/theoretical_loss": 3.5583127979791005, "objective/train/tokens_used": 1455698400, "objective/train/value_avg": -0.0064697265625, "objective/train/value_loss": 0.00015081166930031031, "objective/train/value_max": -6.657838821411133e-05, "objective/train/value_min": -0.29443359375, "objective/train/value_reward_corr": 0.6869328360522499, "objective/train/value_std": 0.012359619140625, "objective/train/weight_avg": 1.0011464357376099, "objective/train/weighted_lm_loss": 1.463802695274353, "objective/train/weights_max": 1.2564263343811035, "objective/train/weights_min": 0.3687688410282135, "theoretical_loss": 3.5583127979791005, "tokens_seen": 1435238400 }, { "epoch": 0.43, "learning_rate": 0.0005707751564756861, "loss": 0.0698, "theoretical_loss": 3.5583127979791005, "tokens_seen": 1435238400 }, { "epoch": 0.44, "learning_rate": 0.0005706949125341037, "loss": 0.0712, "theoretical_loss": 3.558255477591928, "tokens_seen": 1435500544 }, { "epoch": 0.44, "learning_rate": 0.0005706146685925213, "loss": 0.0694, "theoretical_loss": 3.558198170601674, "tokens_seen": 1435762688 }, { "epoch": 0.44, "learning_rate": 0.0005705344246509389, "loss": 0.0695, "theoretical_loss": 3.558140877002761, "tokens_seen": 1436024832 }, { "epoch": 0.44, "learning_rate": 0.0005704541807093565, "loss": 0.0726, "theoretical_loss": 3.5580835967896167, "tokens_seen": 1436286976 }, { "epoch": 0.44, "learning_rate": 0.000570373936767774, "loss": 0.0696, "theoretical_loss": 3.5580263299566712, "tokens_seen": 1436549120 }, { "epoch": 0.44, "learning_rate": 0.0005702936928261916, "loss": 0.0726, "theoretical_loss": 3.5579690764983587, "tokens_seen": 1436811264 }, { "epoch": 0.44, "learning_rate": 0.0005702134488846092, "loss": 0.0669, "theoretical_loss": 3.557911836409115, "tokens_seen": 1437073408 }, { "epoch": 0.44, "learning_rate": 0.0005701332049430268, "loss": 0.0695, "theoretical_loss": 3.557854609683382, "tokens_seen": 1437335552 }, { "epoch": 0.44, "learning_rate": 0.0005700529610014445, "loss": 0.0694, "theoretical_loss": 3.5577973963156024, "tokens_seen": 1437597696 }, { "epoch": 0.44, "learning_rate": 0.000569972717059862, "loss": 0.0692, "theoretical_loss": 3.557740196300224, "tokens_seen": 1437859840 }, { "epoch": 0.44, "learning_rate": 0.0005698924731182796, "loss": 0.0726, "theoretical_loss": 3.5576830096316963, "tokens_seen": 1438121984 }, { "epoch": 0.44, "learning_rate": 0.0005698122291766972, "loss": 0.0714, "theoretical_loss": 3.5576258363044735, "tokens_seen": 1438384128 }, { "epoch": 0.44, "learning_rate": 0.0005697319852351147, "loss": 0.073, "theoretical_loss": 3.5575686763130117, "tokens_seen": 1438646272 }, { "epoch": 0.44, "learning_rate": 0.0005696517412935323, "loss": 0.0717, "theoretical_loss": 3.5575115296517725, "tokens_seen": 1438908416 }, { "epoch": 0.44, "learning_rate": 0.0005695714973519499, "loss": 0.0707, "theoretical_loss": 3.5574543963152188, "tokens_seen": 1439170560 }, { "epoch": 0.44, "learning_rate": 0.0005694912534103675, "loss": 0.0688, "theoretical_loss": 3.5573972762978174, "tokens_seen": 1439432704 }, { "epoch": 0.44, "learning_rate": 0.000569411009468785, "loss": 0.0742, "theoretical_loss": 3.5573401695940383, "tokens_seen": 1439694848 }, { "epoch": 0.44, "learning_rate": 0.0005693307655272028, "loss": 0.0704, "theoretical_loss": 3.557283076198356, "tokens_seen": 1439956992 }, { "epoch": 0.44, "learning_rate": 0.0005692505215856203, "loss": 0.0725, "theoretical_loss": 3.557225996105246, "tokens_seen": 1440219136 }, { "epoch": 0.44, "learning_rate": 0.0005691702776440379, "loss": 0.0735, "theoretical_loss": 3.5571689293091895, "tokens_seen": 1440481280 }, { "epoch": 0.44, "learning_rate": 0.0005690900337024555, "loss": 0.0693, "theoretical_loss": 3.5571118758046696, "tokens_seen": 1440743424 }, { "epoch": 0.44, "learning_rate": 0.000569009789760873, "loss": 0.0682, "theoretical_loss": 3.5570548355861726, "tokens_seen": 1441005568 }, { "epoch": 0.44, "learning_rate": 0.0005689295458192907, "loss": 0.0733, "theoretical_loss": 3.5569978086481884, "tokens_seen": 1441267712 }, { "epoch": 0.44, "learning_rate": 0.0005688493018777082, "loss": 0.0682, "theoretical_loss": 3.556940794985211, "tokens_seen": 1441529856 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.001043870928697288, "objective/train/docs_used": 526671, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3674421310424805, "objective/train/original_loss": 1.3674421310424805, "objective/train/theoretical_loss": 3.5568837945917364, "objective/train/tokens_used": 1462252000, "objective/train/value_avg": -0.00707244873046875, "objective/train/value_loss": 0.00022587105922866613, "objective/train/value_max": -6.556510925292969e-05, "objective/train/value_min": -0.299560546875, "objective/train/value_reward_corr": 0.6144987543879608, "objective/train/value_std": 0.0108795166015625, "objective/train/weight_avg": 1.001141905784607, "objective/train/weighted_lm_loss": 1.3684946298599243, "objective/train/weights_max": 1.2918591499328613, "objective/train/weights_min": 0.37235790491104126, "theoretical_loss": 3.5568837945917364, "tokens_seen": 1441792000 }, { "epoch": 0.44, "learning_rate": 0.0005687690579361258, "loss": 0.0703, "theoretical_loss": 3.5568837945917364, "tokens_seen": 1441792000 }, { "epoch": 0.44, "learning_rate": 0.0005686888139945435, "loss": 0.0734, "theoretical_loss": 3.556826807462264, "tokens_seen": 1442054144 }, { "epoch": 0.44, "learning_rate": 0.000568608570052961, "loss": 0.0711, "theoretical_loss": 3.5567698335912983, "tokens_seen": 1442316288 }, { "epoch": 0.44, "learning_rate": 0.0005685283261113786, "loss": 0.0725, "theoretical_loss": 3.5567128729733444, "tokens_seen": 1442578432 }, { "epoch": 0.44, "learning_rate": 0.0005684480821697962, "loss": 0.0701, "theoretical_loss": 3.5566559256029118, "tokens_seen": 1442840576 }, { "epoch": 0.44, "learning_rate": 0.0005683678382282138, "loss": 0.0688, "theoretical_loss": 3.556598991474515, "tokens_seen": 1443102720 }, { "epoch": 0.44, "learning_rate": 0.0005682875942866313, "loss": 0.071, "theoretical_loss": 3.556542070582669, "tokens_seen": 1443364864 }, { "epoch": 0.44, "learning_rate": 0.000568207350345049, "loss": 0.0706, "theoretical_loss": 3.5564851629218928, "tokens_seen": 1443627008 }, { "epoch": 0.44, "learning_rate": 0.0005681271064034665, "loss": 0.0688, "theoretical_loss": 3.55642826848671, "tokens_seen": 1443889152 }, { "epoch": 0.44, "learning_rate": 0.000568046862461884, "loss": 0.0681, "theoretical_loss": 3.5563713872716467, "tokens_seen": 1444151296 }, { "epoch": 0.44, "learning_rate": 0.0005679666185203018, "loss": 0.0719, "theoretical_loss": 3.5563145192712318, "tokens_seen": 1444413440 }, { "epoch": 0.44, "learning_rate": 0.0005678863745787193, "loss": 0.0722, "theoretical_loss": 3.5562576644799977, "tokens_seen": 1444675584 }, { "epoch": 0.44, "learning_rate": 0.000567806130637137, "loss": 0.0719, "theoretical_loss": 3.5562008228924804, "tokens_seen": 1444937728 }, { "epoch": 0.44, "learning_rate": 0.0005677258866955545, "loss": 0.07, "theoretical_loss": 3.556143994503219, "tokens_seen": 1445199872 }, { "epoch": 0.44, "learning_rate": 0.0005676456427539721, "loss": 0.0672, "theoretical_loss": 3.5560871793067554, "tokens_seen": 1445462016 }, { "epoch": 0.44, "learning_rate": 0.0005675653988123897, "loss": 0.0695, "theoretical_loss": 3.556030377297635, "tokens_seen": 1445724160 }, { "epoch": 0.44, "learning_rate": 0.0005674851548708073, "loss": 0.0715, "theoretical_loss": 3.5559735884704073, "tokens_seen": 1445986304 }, { "epoch": 0.44, "learning_rate": 0.0005674049109292248, "loss": 0.0703, "theoretical_loss": 3.5559168128196235, "tokens_seen": 1446248448 }, { "epoch": 0.44, "learning_rate": 0.0005673246669876424, "loss": 0.0698, "theoretical_loss": 3.555860050339839, "tokens_seen": 1446510592 }, { "epoch": 0.44, "learning_rate": 0.00056724442304606, "loss": 0.0705, "theoretical_loss": 3.555803301025613, "tokens_seen": 1446772736 }, { "epoch": 0.44, "learning_rate": 0.0005671641791044776, "loss": 0.0694, "theoretical_loss": 3.555746564871506, "tokens_seen": 1447034880 }, { "epoch": 0.44, "learning_rate": 0.0005670839351628953, "loss": 0.068, "theoretical_loss": 3.5556898418720837, "tokens_seen": 1447297024 }, { "epoch": 0.44, "learning_rate": 0.0005670036912213128, "loss": 0.0674, "theoretical_loss": 3.555633132021914, "tokens_seen": 1447559168 }, { "epoch": 0.44, "learning_rate": 0.0005669234472797304, "loss": 0.07, "theoretical_loss": 3.5555764353155688, "tokens_seen": 1447821312 }, { "epoch": 0.44, "learning_rate": 0.000566843203338148, "loss": 0.07, "theoretical_loss": 3.555519751747622, "tokens_seen": 1448083456 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.0011815315810963511, "objective/train/docs_used": 529166, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4328693151474, "objective/train/original_loss": 1.4328691959381104, "objective/train/theoretical_loss": 3.5554630813126513, "objective/train/tokens_used": 1468805600, "objective/train/value_avg": -0.00795745849609375, "objective/train/value_loss": 0.0003213270101696253, "objective/train/value_max": -9.685754776000977e-05, "objective/train/value_min": -0.9267578125, "objective/train/value_reward_corr": 0.7253755597865478, "objective/train/value_std": 0.0184173583984375, "objective/train/weight_avg": 1.0013219118118286, "objective/train/weighted_lm_loss": 1.4348268508911133, "objective/train/weights_max": 1.3883624076843262, "objective/train/weights_min": 0.3690347969532013, "theoretical_loss": 3.5554630813126513, "tokens_seen": 1448345600 }, { "epoch": 0.44, "learning_rate": 0.0005667629593965655, "loss": 0.0709, "theoretical_loss": 3.5554630813126513, "tokens_seen": 1448345600 }, { "epoch": 0.44, "learning_rate": 0.0005666827154549832, "loss": 0.0718, "theoretical_loss": 3.5554064240052385, "tokens_seen": 1448607744 }, { "epoch": 0.44, "learning_rate": 0.0005666024715134007, "loss": 0.0695, "theoretical_loss": 3.5553497798199674, "tokens_seen": 1448869888 }, { "epoch": 0.44, "learning_rate": 0.0005665222275718183, "loss": 0.0723, "theoretical_loss": 3.555293148751426, "tokens_seen": 1449132032 }, { "epoch": 0.44, "learning_rate": 0.000566441983630236, "loss": 0.0721, "theoretical_loss": 3.555236530794204, "tokens_seen": 1449394176 }, { "epoch": 0.44, "learning_rate": 0.0005663617396886536, "loss": 0.07, "theoretical_loss": 3.5551799259428964, "tokens_seen": 1449656320 }, { "epoch": 0.44, "learning_rate": 0.0005662814957470711, "loss": 0.0696, "theoretical_loss": 3.5551233341920994, "tokens_seen": 1449918464 }, { "epoch": 0.44, "learning_rate": 0.0005662012518054887, "loss": 0.0717, "theoretical_loss": 3.555066755536414, "tokens_seen": 1450180608 }, { "epoch": 0.44, "learning_rate": 0.0005661210078639063, "loss": 0.0736, "theoretical_loss": 3.555010189970443, "tokens_seen": 1450442752 }, { "epoch": 0.44, "learning_rate": 0.0005660407639223238, "loss": 0.0681, "theoretical_loss": 3.5549536374887936, "tokens_seen": 1450704896 }, { "epoch": 0.44, "learning_rate": 0.0005659605199807415, "loss": 0.0708, "theoretical_loss": 3.5548970980860757, "tokens_seen": 1450967040 }, { "epoch": 0.44, "learning_rate": 0.000565880276039159, "loss": 0.0697, "theoretical_loss": 3.5548405717569023, "tokens_seen": 1451229184 }, { "epoch": 0.44, "learning_rate": 0.0005658000320975766, "loss": 0.0719, "theoretical_loss": 3.5547840584958896, "tokens_seen": 1451491328 }, { "epoch": 0.44, "learning_rate": 0.0005657197881559943, "loss": 0.0725, "theoretical_loss": 3.554727558297657, "tokens_seen": 1451753472 }, { "epoch": 0.44, "learning_rate": 0.0005656395442144118, "loss": 0.0701, "theoretical_loss": 3.554671071156828, "tokens_seen": 1452015616 }, { "epoch": 0.44, "learning_rate": 0.0005655593002728294, "loss": 0.0707, "theoretical_loss": 3.554614597068027, "tokens_seen": 1452277760 }, { "epoch": 0.44, "learning_rate": 0.000565479056331247, "loss": 0.0716, "theoretical_loss": 3.554558136025884, "tokens_seen": 1452539904 }, { "epoch": 0.44, "learning_rate": 0.0005653988123896646, "loss": 0.072, "theoretical_loss": 3.554501688025031, "tokens_seen": 1452802048 }, { "epoch": 0.44, "learning_rate": 0.0005653185684480822, "loss": 0.0703, "theoretical_loss": 3.554445253060103, "tokens_seen": 1453064192 }, { "epoch": 0.44, "learning_rate": 0.0005652383245064998, "loss": 0.07, "theoretical_loss": 3.5543888311257397, "tokens_seen": 1453326336 }, { "epoch": 0.44, "learning_rate": 0.0005651580805649173, "loss": 0.0716, "theoretical_loss": 3.5543324222165813, "tokens_seen": 1453588480 }, { "epoch": 0.44, "learning_rate": 0.0005650778366233349, "loss": 0.0693, "theoretical_loss": 3.554276026327274, "tokens_seen": 1453850624 }, { "epoch": 0.44, "learning_rate": 0.0005649975926817526, "loss": 0.0715, "theoretical_loss": 3.5542196434524653, "tokens_seen": 1454112768 }, { "epoch": 0.44, "learning_rate": 0.0005649173487401701, "loss": 0.0721, "theoretical_loss": 3.554163273586806, "tokens_seen": 1454374912 }, { "epoch": 0.44, "learning_rate": 0.0005648371047985878, "loss": 0.0696, "theoretical_loss": 3.554106916724951, "tokens_seen": 1454637056 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.0007077401387505233, "objective/train/docs_used": 531642, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3581182956695557, "objective/train/original_loss": 1.3581182956695557, "objective/train/theoretical_loss": 3.5540505728615583, "objective/train/tokens_used": 1475359200, "objective/train/value_avg": -0.006244659423828125, "objective/train/value_loss": 0.0002848842123057693, "objective/train/value_max": -7.599592208862305e-05, "objective/train/value_min": -0.51806640625, "objective/train/value_reward_corr": 0.5166717049484271, "objective/train/value_std": 0.01073455810546875, "objective/train/weight_avg": 1.0008255243301392, "objective/train/weighted_lm_loss": 1.3589487075805664, "objective/train/weights_max": 1.625488519668579, "objective/train/weights_min": 0.22773942351341248, "theoretical_loss": 3.5540505728615583, "tokens_seen": 1454899200 }, { "epoch": 0.44, "learning_rate": 0.0005647568608570053, "loss": 0.0699, "theoretical_loss": 3.5540505728615583, "tokens_seen": 1454899200 }, { "epoch": 0.44, "learning_rate": 0.0005646766169154229, "loss": 0.0704, "theoretical_loss": 3.5539942419912878, "tokens_seen": 1455161344 }, { "epoch": 0.44, "learning_rate": 0.0005645963729738405, "loss": 0.0703, "theoretical_loss": 3.553937924108804, "tokens_seen": 1455423488 }, { "epoch": 0.44, "learning_rate": 0.0005645161290322581, "loss": 0.0687, "theoretical_loss": 3.5538816192087728, "tokens_seen": 1455685632 }, { "epoch": 0.44, "learning_rate": 0.0005644358850906756, "loss": 0.0693, "theoretical_loss": 3.5538253272858658, "tokens_seen": 1455947776 }, { "epoch": 0.44, "learning_rate": 0.0005643556411490932, "loss": 0.07, "theoretical_loss": 3.5537690483347557, "tokens_seen": 1456209920 }, { "epoch": 0.44, "learning_rate": 0.0005642753972075108, "loss": 0.0681, "theoretical_loss": 3.5537127823501184, "tokens_seen": 1456472064 }, { "epoch": 0.44, "learning_rate": 0.0005641951532659285, "loss": 0.0694, "theoretical_loss": 3.5536565293266342, "tokens_seen": 1456734208 }, { "epoch": 0.44, "learning_rate": 0.0005641149093243461, "loss": 0.0754, "theoretical_loss": 3.553600289258986, "tokens_seen": 1456996352 }, { "epoch": 0.44, "learning_rate": 0.0005640346653827636, "loss": 0.0712, "theoretical_loss": 3.553544062141859, "tokens_seen": 1457258496 }, { "epoch": 0.44, "learning_rate": 0.0005639544214411813, "loss": 0.0704, "theoretical_loss": 3.5534878479699423, "tokens_seen": 1457520640 }, { "epoch": 0.44, "learning_rate": 0.0005638741774995988, "loss": 0.073, "theoretical_loss": 3.5534316467379288, "tokens_seen": 1457782784 }, { "epoch": 0.44, "learning_rate": 0.0005637939335580163, "loss": 0.0727, "theoretical_loss": 3.5533754584405126, "tokens_seen": 1458044928 }, { "epoch": 0.44, "learning_rate": 0.000563713689616434, "loss": 0.0697, "theoretical_loss": 3.553319283072393, "tokens_seen": 1458307072 }, { "epoch": 0.44, "learning_rate": 0.0005636334456748515, "loss": 0.0725, "theoretical_loss": 3.553263120628271, "tokens_seen": 1458569216 }, { "epoch": 0.44, "learning_rate": 0.0005635532017332691, "loss": 0.0706, "theoretical_loss": 3.553206971102852, "tokens_seen": 1458831360 }, { "epoch": 0.44, "learning_rate": 0.0005634729577916868, "loss": 0.0699, "theoretical_loss": 3.5531508344908436, "tokens_seen": 1459093504 }, { "epoch": 0.44, "learning_rate": 0.0005633927138501044, "loss": 0.0711, "theoretical_loss": 3.5530947107869557, "tokens_seen": 1459355648 }, { "epoch": 0.44, "learning_rate": 0.0005633124699085219, "loss": 0.0722, "theoretical_loss": 3.5530385999859035, "tokens_seen": 1459617792 }, { "epoch": 0.44, "learning_rate": 0.0005632322259669395, "loss": 0.0709, "theoretical_loss": 3.5529825020824033, "tokens_seen": 1459879936 }, { "epoch": 0.44, "learning_rate": 0.0005631519820253571, "loss": 0.0713, "theoretical_loss": 3.5529264170711756, "tokens_seen": 1460142080 }, { "epoch": 0.44, "learning_rate": 0.0005630717380837746, "loss": 0.0715, "theoretical_loss": 3.552870344946944, "tokens_seen": 1460404224 }, { "epoch": 0.44, "learning_rate": 0.0005629914941421923, "loss": 0.0713, "theoretical_loss": 3.5528142857044345, "tokens_seen": 1460666368 }, { "epoch": 0.44, "learning_rate": 0.0005629112502006098, "loss": 0.0707, "theoretical_loss": 3.5527582393383765, "tokens_seen": 1460928512 }, { "epoch": 0.44, "learning_rate": 0.0005628310062590275, "loss": 0.0727, "theoretical_loss": 3.5527022058435036, "tokens_seen": 1461190656 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.0008472163463011384, "objective/train/docs_used": 534190, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4311131238937378, "objective/train/original_loss": 1.4311130046844482, "objective/train/theoretical_loss": 3.552646185214551, "objective/train/tokens_used": 1481912800, "objective/train/value_avg": -0.00859832763671875, "objective/train/value_loss": 0.00034199925721623003, "objective/train/value_max": -5.692243576049805e-05, "objective/train/value_min": -0.50927734375, "objective/train/value_reward_corr": 0.6411651572047227, "objective/train/value_std": 0.01556396484375, "objective/train/weight_avg": 1.00099515914917, "objective/train/weighted_lm_loss": 1.4326586723327637, "objective/train/weights_max": 1.275532603263855, "objective/train/weights_min": 0.24629011750221252, "theoretical_loss": 3.552646185214551, "tokens_seen": 1461452800 }, { "epoch": 0.44, "learning_rate": 0.000562750762317445, "loss": 0.0702, "theoretical_loss": 3.552646185214551, "tokens_seen": 1461452800 }, { "epoch": 0.44, "learning_rate": 0.0005626705183758626, "loss": 0.07, "theoretical_loss": 3.552590177446257, "tokens_seen": 1461714944 }, { "epoch": 0.44, "learning_rate": 0.0005625902744342803, "loss": 0.0701, "theoretical_loss": 3.5525341825333645, "tokens_seen": 1461977088 }, { "epoch": 0.44, "learning_rate": 0.0005625100304926978, "loss": 0.0727, "theoretical_loss": 3.552478200470618, "tokens_seen": 1462239232 }, { "epoch": 0.44, "learning_rate": 0.0005624297865511154, "loss": 0.072, "theoretical_loss": 3.552422231252766, "tokens_seen": 1462501376 }, { "epoch": 0.44, "learning_rate": 0.000562349542609533, "loss": 0.0699, "theoretical_loss": 3.552366274874559, "tokens_seen": 1462763520 }, { "epoch": 0.44, "learning_rate": 0.0005622692986679506, "loss": 0.0707, "theoretical_loss": 3.5523103313307516, "tokens_seen": 1463025664 }, { "epoch": 0.44, "learning_rate": 0.0005621890547263681, "loss": 0.0729, "theoretical_loss": 3.5522544006161016, "tokens_seen": 1463287808 }, { "epoch": 0.44, "learning_rate": 0.0005621088107847857, "loss": 0.0703, "theoretical_loss": 3.5521984827253688, "tokens_seen": 1463549952 }, { "epoch": 0.44, "learning_rate": 0.0005620285668432034, "loss": 0.0686, "theoretical_loss": 3.5521425776533175, "tokens_seen": 1463812096 }, { "epoch": 0.44, "learning_rate": 0.0005619483229016209, "loss": 0.0696, "theoretical_loss": 3.5520866853947135, "tokens_seen": 1464074240 }, { "epoch": 0.44, "learning_rate": 0.0005618680789600386, "loss": 0.0688, "theoretical_loss": 3.5520308059443275, "tokens_seen": 1464336384 }, { "epoch": 0.44, "learning_rate": 0.0005617878350184561, "loss": 0.0723, "theoretical_loss": 3.5519749392969313, "tokens_seen": 1464598528 }, { "epoch": 0.44, "learning_rate": 0.0005617075910768738, "loss": 0.0714, "theoretical_loss": 3.5519190854473006, "tokens_seen": 1464860672 }, { "epoch": 0.44, "learning_rate": 0.0005616273471352913, "loss": 0.0707, "theoretical_loss": 3.5518632443902156, "tokens_seen": 1465122816 }, { "epoch": 0.44, "learning_rate": 0.0005615471031937089, "loss": 0.0703, "theoretical_loss": 3.5518074161204565, "tokens_seen": 1465384960 }, { "epoch": 0.44, "learning_rate": 0.0005614668592521265, "loss": 0.0731, "theoretical_loss": 3.5517516006328096, "tokens_seen": 1465647104 }, { "epoch": 0.44, "learning_rate": 0.000561386615310544, "loss": 0.0709, "theoretical_loss": 3.5516957979220627, "tokens_seen": 1465909248 }, { "epoch": 0.44, "learning_rate": 0.0005613063713689616, "loss": 0.071, "theoretical_loss": 3.551640007983007, "tokens_seen": 1466171392 }, { "epoch": 0.44, "learning_rate": 0.0005612261274273793, "loss": 0.0683, "theoretical_loss": 3.551584230810436, "tokens_seen": 1466433536 }, { "epoch": 0.44, "learning_rate": 0.0005611458834857969, "loss": 0.0708, "theoretical_loss": 3.551528466399148, "tokens_seen": 1466695680 }, { "epoch": 0.44, "learning_rate": 0.0005610656395442144, "loss": 0.0711, "theoretical_loss": 3.551472714743942, "tokens_seen": 1466957824 }, { "epoch": 0.44, "learning_rate": 0.0005609853956026321, "loss": 0.069, "theoretical_loss": 3.551416975839623, "tokens_seen": 1467219968 }, { "epoch": 0.44, "learning_rate": 0.0005609051516610496, "loss": 0.0699, "theoretical_loss": 3.551361249680996, "tokens_seen": 1467482112 }, { "epoch": 0.44, "learning_rate": 0.0005608249077194671, "loss": 0.069, "theoretical_loss": 3.5513055362628707, "tokens_seen": 1467744256 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.0009279769728891551, "objective/train/docs_used": 535989, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4145874977111816, "objective/train/original_loss": 1.4145876169204712, "objective/train/theoretical_loss": 3.5512498355800597, "objective/train/tokens_used": 1488466400, "objective/train/value_avg": -0.00853729248046875, "objective/train/value_loss": 0.0003833299851976335, "objective/train/value_max": -5.519390106201172e-05, "objective/train/value_min": -0.701171875, "objective/train/value_reward_corr": 0.6353014795865455, "objective/train/value_std": 0.017333984375, "objective/train/weight_avg": 1.0010976791381836, "objective/train/weighted_lm_loss": 1.4164438247680664, "objective/train/weights_max": 1.698567271232605, "objective/train/weights_min": 0.36841312050819397, "theoretical_loss": 3.5512498355800597, "tokens_seen": 1468006400 }, { "epoch": 0.44, "learning_rate": 0.0005607446637778848, "loss": 0.0726, "theoretical_loss": 3.5512498355800597, "tokens_seen": 1468006400 }, { "epoch": 0.44, "learning_rate": 0.0005606644198363023, "loss": 0.0696, "theoretical_loss": 3.5511941476273785, "tokens_seen": 1468268544 }, { "epoch": 0.45, "learning_rate": 0.0005605841758947199, "loss": 0.0688, "theoretical_loss": 3.551138472399646, "tokens_seen": 1468530688 }, { "epoch": 0.45, "learning_rate": 0.0005605039319531376, "loss": 0.0703, "theoretical_loss": 3.5510828098916836, "tokens_seen": 1468792832 }, { "epoch": 0.45, "learning_rate": 0.0005604236880115552, "loss": 0.0705, "theoretical_loss": 3.5510271600983154, "tokens_seen": 1469054976 }, { "epoch": 0.45, "learning_rate": 0.0005603434440699728, "loss": 0.0702, "theoretical_loss": 3.5509715230143692, "tokens_seen": 1469317120 }, { "epoch": 0.45, "learning_rate": 0.0005602632001283903, "loss": 0.0702, "theoretical_loss": 3.550915898634676, "tokens_seen": 1469579264 }, { "epoch": 0.45, "learning_rate": 0.0005601829561868079, "loss": 0.0678, "theoretical_loss": 3.550860286954069, "tokens_seen": 1469841408 }, { "epoch": 0.45, "learning_rate": 0.0005601027122452255, "loss": 0.068, "theoretical_loss": 3.5508046879673856, "tokens_seen": 1470103552 }, { "epoch": 0.45, "learning_rate": 0.0005600224683036431, "loss": 0.0721, "theoretical_loss": 3.550749101669465, "tokens_seen": 1470365696 }, { "epoch": 0.45, "learning_rate": 0.0005599422243620606, "loss": 0.0678, "theoretical_loss": 3.5506935280551497, "tokens_seen": 1470627840 }, { "epoch": 0.45, "learning_rate": 0.0005598619804204783, "loss": 0.0711, "theoretical_loss": 3.5506379671192865, "tokens_seen": 1470889984 }, { "epoch": 0.45, "learning_rate": 0.0005597817364788959, "loss": 0.0711, "theoretical_loss": 3.550582418856723, "tokens_seen": 1471152128 }, { "epoch": 0.45, "learning_rate": 0.0005597014925373134, "loss": 0.069, "theoretical_loss": 3.550526883262312, "tokens_seen": 1471414272 }, { "epoch": 0.45, "learning_rate": 0.0005596212485957311, "loss": 0.0711, "theoretical_loss": 3.550471360330907, "tokens_seen": 1471676416 }, { "epoch": 0.45, "learning_rate": 0.0005595410046541486, "loss": 0.069, "theoretical_loss": 3.550415850057367, "tokens_seen": 1471938560 }, { "epoch": 0.45, "learning_rate": 0.0005594607607125662, "loss": 0.0695, "theoretical_loss": 3.5503603524365523, "tokens_seen": 1472200704 }, { "epoch": 0.45, "learning_rate": 0.0005593805167709838, "loss": 0.0735, "theoretical_loss": 3.5503048674633266, "tokens_seen": 1472462848 }, { "epoch": 0.45, "learning_rate": 0.0005593002728294014, "loss": 0.0694, "theoretical_loss": 3.5502493951325564, "tokens_seen": 1472724992 }, { "epoch": 0.45, "learning_rate": 0.0005592200288878189, "loss": 0.0739, "theoretical_loss": 3.550193935439112, "tokens_seen": 1472987136 }, { "epoch": 0.45, "learning_rate": 0.0005591397849462365, "loss": 0.0697, "theoretical_loss": 3.5501384883778666, "tokens_seen": 1473249280 }, { "epoch": 0.45, "learning_rate": 0.0005590595410046541, "loss": 0.0729, "theoretical_loss": 3.5500830539436956, "tokens_seen": 1473511424 }, { "epoch": 0.45, "learning_rate": 0.0005589792970630718, "loss": 0.0692, "theoretical_loss": 3.550027632131477, "tokens_seen": 1473773568 }, { "epoch": 0.45, "learning_rate": 0.0005588990531214894, "loss": 0.0708, "theoretical_loss": 3.549972222936094, "tokens_seen": 1474035712 }, { "epoch": 0.45, "learning_rate": 0.0005588188091799069, "loss": 0.0696, "theoretical_loss": 3.5499168263524297, "tokens_seen": 1474297856 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.000858014915138483, "objective/train/docs_used": 538469, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.343380331993103, "objective/train/original_loss": 1.343380331993103, "objective/train/theoretical_loss": 3.549861442375373, "objective/train/tokens_used": 1495020000, "objective/train/value_avg": -0.0080413818359375, "objective/train/value_loss": 0.00021646420645993203, "objective/train/value_max": -5.9664249420166016e-05, "objective/train/value_min": -0.485107421875, "objective/train/value_reward_corr": 0.6710323577309563, "objective/train/value_std": 0.015411376953125, "objective/train/weight_avg": 1.0009558200836182, "objective/train/weighted_lm_loss": 1.3450497388839722, "objective/train/weights_max": 1.3448151350021362, "objective/train/weights_min": 0.36848655343055725, "theoretical_loss": 3.549861442375373, "tokens_seen": 1474560000 }, { "epoch": 0.45, "learning_rate": 0.0005587385652383246, "loss": 0.0717, "theoretical_loss": 3.549861442375373, "tokens_seen": 1474560000 }, { "epoch": 0.45, "learning_rate": 0.0005586583212967421, "loss": 0.0681, "theoretical_loss": 3.5498060709998143, "tokens_seen": 1474822144 }, { "epoch": 0.45, "learning_rate": 0.0005585780773551596, "loss": 0.071, "theoretical_loss": 3.5497507122206473, "tokens_seen": 1475084288 }, { "epoch": 0.45, "learning_rate": 0.0005584978334135773, "loss": 0.0705, "theoretical_loss": 3.5496953660327684, "tokens_seen": 1475346432 }, { "epoch": 0.45, "learning_rate": 0.0005584175894719948, "loss": 0.0705, "theoretical_loss": 3.5496400324310775, "tokens_seen": 1475608576 }, { "epoch": 0.45, "learning_rate": 0.0005583373455304124, "loss": 0.0668, "theoretical_loss": 3.5495847114104766, "tokens_seen": 1475870720 }, { "epoch": 0.45, "learning_rate": 0.0005582571015888301, "loss": 0.0721, "theoretical_loss": 3.549529402965873, "tokens_seen": 1476132864 }, { "epoch": 0.45, "learning_rate": 0.0005581768576472477, "loss": 0.0699, "theoretical_loss": 3.549474107092173, "tokens_seen": 1476395008 }, { "epoch": 0.45, "learning_rate": 0.0005580966137056652, "loss": 0.0734, "theoretical_loss": 3.5494188237842894, "tokens_seen": 1476657152 }, { "epoch": 0.45, "learning_rate": 0.0005580163697640829, "loss": 0.0716, "theoretical_loss": 3.5493635530371366, "tokens_seen": 1476919296 }, { "epoch": 0.45, "learning_rate": 0.0005579361258225004, "loss": 0.0694, "theoretical_loss": 3.5493082948456314, "tokens_seen": 1477181440 }, { "epoch": 0.45, "learning_rate": 0.0005578558818809179, "loss": 0.0722, "theoretical_loss": 3.549253049204695, "tokens_seen": 1477443584 }, { "epoch": 0.45, "learning_rate": 0.0005577756379393356, "loss": 0.0679, "theoretical_loss": 3.549197816109251, "tokens_seen": 1477705728 }, { "epoch": 0.45, "learning_rate": 0.0005576953939977531, "loss": 0.0693, "theoretical_loss": 3.549142595554224, "tokens_seen": 1477967872 }, { "epoch": 0.45, "learning_rate": 0.0005576151500561709, "loss": 0.0721, "theoretical_loss": 3.5490873875345446, "tokens_seen": 1478230016 }, { "epoch": 0.45, "learning_rate": 0.0005575349061145884, "loss": 0.0691, "theoretical_loss": 3.5490321920451446, "tokens_seen": 1478492160 }, { "epoch": 0.45, "learning_rate": 0.000557454662173006, "loss": 0.0703, "theoretical_loss": 3.54897700908096, "tokens_seen": 1478754304 }, { "epoch": 0.45, "learning_rate": 0.0005573744182314236, "loss": 0.0725, "theoretical_loss": 3.548921838636927, "tokens_seen": 1479016448 }, { "epoch": 0.45, "learning_rate": 0.0005572941742898411, "loss": 0.0726, "theoretical_loss": 3.5488666807079885, "tokens_seen": 1479278592 }, { "epoch": 0.45, "learning_rate": 0.0005572139303482587, "loss": 0.0703, "theoretical_loss": 3.5488115352890874, "tokens_seen": 1479540736 }, { "epoch": 0.45, "learning_rate": 0.0005571336864066763, "loss": 0.071, "theoretical_loss": 3.5487564023751714, "tokens_seen": 1479802880 }, { "epoch": 0.45, "learning_rate": 0.0005570534424650939, "loss": 0.0715, "theoretical_loss": 3.5487012819611894, "tokens_seen": 1480065024 }, { "epoch": 0.45, "learning_rate": 0.0005569731985235114, "loss": 0.0716, "theoretical_loss": 3.548646174042095, "tokens_seen": 1480327168 }, { "epoch": 0.45, "learning_rate": 0.0005568929545819291, "loss": 0.0727, "theoretical_loss": 3.5485910786128434, "tokens_seen": 1480589312 }, { "epoch": 0.45, "learning_rate": 0.0005568127106403467, "loss": 0.0721, "theoretical_loss": 3.5485359956683933, "tokens_seen": 1480851456 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.0011171478545293212, "objective/train/docs_used": 540822, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4887363910675049, "objective/train/original_loss": 1.4887363910675049, "objective/train/theoretical_loss": 3.548480925203706, "objective/train/tokens_used": 1501573600, "objective/train/value_avg": -0.01031494140625, "objective/train/value_loss": 0.0003310543834231794, "objective/train/value_max": -8.749961853027344e-05, "objective/train/value_min": -0.446533203125, "objective/train/value_reward_corr": 0.6941375660042437, "objective/train/value_std": 0.017333984375, "objective/train/weight_avg": 1.0012686252593994, "objective/train/weighted_lm_loss": 1.490714430809021, "objective/train/weights_max": 1.4081617593765259, "objective/train/weights_min": 0.3702827990055084, "theoretical_loss": 3.548480925203706, "tokens_seen": 1481113600 }, { "epoch": 0.45, "learning_rate": 0.0005567324666987642, "loss": 0.0724, "theoretical_loss": 3.548480925203706, "tokens_seen": 1481113600 }, { "epoch": 0.45, "learning_rate": 0.0005566522227571819, "loss": 0.0711, "theoretical_loss": 3.548425867213747, "tokens_seen": 1481375744 }, { "epoch": 0.45, "learning_rate": 0.0005565719788155994, "loss": 0.0707, "theoretical_loss": 3.5483708216934833, "tokens_seen": 1481637888 }, { "epoch": 0.45, "learning_rate": 0.0005564917348740171, "loss": 0.0705, "theoretical_loss": 3.5483157886378844, "tokens_seen": 1481900032 }, { "epoch": 0.45, "learning_rate": 0.0005564114909324346, "loss": 0.0725, "theoretical_loss": 3.5482607680419243, "tokens_seen": 1482162176 }, { "epoch": 0.45, "learning_rate": 0.0005563312469908522, "loss": 0.0706, "theoretical_loss": 3.548205759900579, "tokens_seen": 1482424320 }, { "epoch": 0.45, "learning_rate": 0.0005562510030492698, "loss": 0.0706, "theoretical_loss": 3.548150764208828, "tokens_seen": 1482686464 }, { "epoch": 0.45, "learning_rate": 0.0005561707591076873, "loss": 0.0699, "theoretical_loss": 3.5480957809616527, "tokens_seen": 1482948608 }, { "epoch": 0.45, "learning_rate": 0.000556090515166105, "loss": 0.0725, "theoretical_loss": 3.548040810154038, "tokens_seen": 1483210752 }, { "epoch": 0.45, "learning_rate": 0.0005560102712245226, "loss": 0.0741, "theoretical_loss": 3.5479858517809717, "tokens_seen": 1483472896 }, { "epoch": 0.45, "learning_rate": 0.0005559300272829402, "loss": 0.0722, "theoretical_loss": 3.547930905837445, "tokens_seen": 1483735040 }, { "epoch": 0.45, "learning_rate": 0.0005558497833413577, "loss": 0.0748, "theoretical_loss": 3.547875972318451, "tokens_seen": 1483997184 }, { "epoch": 0.45, "learning_rate": 0.0005557695393997754, "loss": 0.0691, "theoretical_loss": 3.547821051218987, "tokens_seen": 1484259328 }, { "epoch": 0.45, "learning_rate": 0.0005556892954581929, "loss": 0.071, "theoretical_loss": 3.5477661425340514, "tokens_seen": 1484521472 }, { "epoch": 0.45, "learning_rate": 0.0005556090515166104, "loss": 0.0719, "theoretical_loss": 3.547711246258647, "tokens_seen": 1484783616 }, { "epoch": 0.45, "learning_rate": 0.0005555288075750281, "loss": 0.0731, "theoretical_loss": 3.547656362387779, "tokens_seen": 1485045760 }, { "epoch": 0.45, "learning_rate": 0.0005554485636334456, "loss": 0.0733, "theoretical_loss": 3.5476014909164553, "tokens_seen": 1485307904 }, { "epoch": 0.45, "learning_rate": 0.0005553683196918632, "loss": 0.0697, "theoretical_loss": 3.547546631839687, "tokens_seen": 1485570048 }, { "epoch": 0.45, "learning_rate": 0.0005552880757502809, "loss": 0.0705, "theoretical_loss": 3.547491785152488, "tokens_seen": 1485832192 }, { "epoch": 0.45, "learning_rate": 0.0005552078318086985, "loss": 0.0716, "theoretical_loss": 3.5474369508498755, "tokens_seen": 1486094336 }, { "epoch": 0.45, "learning_rate": 0.0005551275878671161, "loss": 0.0717, "theoretical_loss": 3.547382128926868, "tokens_seen": 1486356480 }, { "epoch": 0.45, "learning_rate": 0.0005550473439255337, "loss": 0.0698, "theoretical_loss": 3.5473273193784896, "tokens_seen": 1486618624 }, { "epoch": 0.45, "learning_rate": 0.0005549670999839512, "loss": 0.0696, "theoretical_loss": 3.547272522199764, "tokens_seen": 1486880768 }, { "epoch": 0.45, "learning_rate": 0.0005548868560423688, "loss": 0.072, "theoretical_loss": 3.5472177373857208, "tokens_seen": 1487142912 }, { "epoch": 0.45, "learning_rate": 0.0005548066121007864, "loss": 0.0682, "theoretical_loss": 3.5471629649313905, "tokens_seen": 1487405056 }, { "epoch": 0.45, "objective/train/advantage_avg": -3.483461114228703e-05, "objective/train/docs_used": 543335, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3849729299545288, "objective/train/original_loss": 1.3849728107452393, "objective/train/theoretical_loss": 3.547108204831807, "objective/train/tokens_used": 1508127200, "objective/train/value_avg": -0.007526397705078125, "objective/train/value_loss": 0.000498010718729347, "objective/train/value_max": -7.253885269165039e-05, "objective/train/value_min": -0.91748046875, "objective/train/value_reward_corr": 0.6365641012412713, "objective/train/value_std": 0.0164794921875, "objective/train/weight_avg": 1.0001769065856934, "objective/train/weighted_lm_loss": 1.3848627805709839, "objective/train/weights_max": 2.075793504714966, "objective/train/weights_min": 0.23170025646686554, "theoretical_loss": 3.547108204831807, "tokens_seen": 1487667200 }, { "epoch": 0.45, "learning_rate": 0.0005547263681592039, "loss": 0.0689, "theoretical_loss": 3.547108204831807, "tokens_seen": 1487667200 }, { "epoch": 0.45, "learning_rate": 0.0005546461242176217, "loss": 0.0726, "theoretical_loss": 3.547053457082008, "tokens_seen": 1487929344 }, { "epoch": 0.45, "learning_rate": 0.0005545658802760392, "loss": 0.0718, "theoretical_loss": 3.5469987216770322, "tokens_seen": 1488191488 }, { "epoch": 0.45, "learning_rate": 0.0005544856363344568, "loss": 0.0724, "theoretical_loss": 3.5469439986119227, "tokens_seen": 1488453632 }, { "epoch": 0.45, "learning_rate": 0.0005544053923928744, "loss": 0.07, "theoretical_loss": 3.5468892878817253, "tokens_seen": 1488715776 }, { "epoch": 0.45, "learning_rate": 0.0005543251484512919, "loss": 0.0714, "theoretical_loss": 3.546834589481488, "tokens_seen": 1488977920 }, { "epoch": 0.45, "learning_rate": 0.0005542449045097095, "loss": 0.07, "theoretical_loss": 3.5467799034062617, "tokens_seen": 1489240064 }, { "epoch": 0.45, "learning_rate": 0.0005541646605681271, "loss": 0.0688, "theoretical_loss": 3.546725229651101, "tokens_seen": 1489502208 }, { "epoch": 0.45, "learning_rate": 0.0005540844166265447, "loss": 0.071, "theoretical_loss": 3.5466705682110633, "tokens_seen": 1489764352 }, { "epoch": 0.45, "learning_rate": 0.0005540041726849623, "loss": 0.0715, "theoretical_loss": 3.546615919081207, "tokens_seen": 1490026496 }, { "epoch": 0.45, "learning_rate": 0.00055392392874338, "loss": 0.0679, "theoretical_loss": 3.546561282256596, "tokens_seen": 1490288640 }, { "epoch": 0.45, "learning_rate": 0.0005538436848017975, "loss": 0.0677, "theoretical_loss": 3.546506657732295, "tokens_seen": 1490550784 }, { "epoch": 0.45, "learning_rate": 0.0005537634408602151, "loss": 0.0712, "theoretical_loss": 3.546452045503372, "tokens_seen": 1490812928 }, { "epoch": 0.45, "learning_rate": 0.0005536831969186327, "loss": 0.0699, "theoretical_loss": 3.5463974455648994, "tokens_seen": 1491075072 }, { "epoch": 0.45, "learning_rate": 0.0005536029529770502, "loss": 0.0696, "theoretical_loss": 3.5463428579119505, "tokens_seen": 1491337216 }, { "epoch": 0.45, "learning_rate": 0.0005535227090354679, "loss": 0.0693, "theoretical_loss": 3.546288282539602, "tokens_seen": 1491599360 }, { "epoch": 0.45, "learning_rate": 0.0005534424650938854, "loss": 0.0701, "theoretical_loss": 3.5462337194429336, "tokens_seen": 1491861504 }, { "epoch": 0.45, "learning_rate": 0.000553362221152303, "loss": 0.0722, "theoretical_loss": 3.546179168617028, "tokens_seen": 1492123648 }, { "epoch": 0.45, "learning_rate": 0.0005532819772107206, "loss": 0.0689, "theoretical_loss": 3.54612463005697, "tokens_seen": 1492385792 }, { "epoch": 0.45, "learning_rate": 0.0005532017332691381, "loss": 0.0731, "theoretical_loss": 3.546070103757849, "tokens_seen": 1492647936 }, { "epoch": 0.45, "learning_rate": 0.0005531214893275557, "loss": 0.0681, "theoretical_loss": 3.546015589714755, "tokens_seen": 1492910080 }, { "epoch": 0.45, "learning_rate": 0.0005530412453859734, "loss": 0.0715, "theoretical_loss": 3.545961087922782, "tokens_seen": 1493172224 }, { "epoch": 0.45, "learning_rate": 0.000552961001444391, "loss": 0.0696, "theoretical_loss": 3.5459065983770266, "tokens_seen": 1493434368 }, { "epoch": 0.45, "learning_rate": 0.0005528807575028085, "loss": 0.0669, "theoretical_loss": 3.5458521210725893, "tokens_seen": 1493696512 }, { "epoch": 0.45, "learning_rate": 0.0005528005135612262, "loss": 0.069, "theoretical_loss": 3.545797656004571, "tokens_seen": 1493958656 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.002028031274676323, "objective/train/docs_used": 545793, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.288960337638855, "objective/train/original_loss": 1.2889604568481445, "objective/train/theoretical_loss": 3.545743203168077, "objective/train/tokens_used": 1514680800, "objective/train/value_avg": -0.0094146728515625, "objective/train/value_loss": 0.00047856621677055955, "objective/train/value_max": -6.973743438720703e-05, "objective/train/value_min": -0.8828125, "objective/train/value_reward_corr": 0.7416871098120547, "objective/train/value_std": 0.0251922607421875, "objective/train/weight_avg": 1.0022516250610352, "objective/train/weighted_lm_loss": 1.2908267974853516, "objective/train/weights_max": 2.2083611488342285, "objective/train/weights_min": 0.36811721324920654, "theoretical_loss": 3.545743203168077, "tokens_seen": 1494220800 }, { "epoch": 0.45, "learning_rate": 0.0005527202696196437, "loss": 0.0683, "theoretical_loss": 3.545743203168077, "tokens_seen": 1494220800 }, { "epoch": 0.45, "learning_rate": 0.0005526400256780613, "loss": 0.0692, "theoretical_loss": 3.545688762558216, "tokens_seen": 1494482944 }, { "epoch": 0.45, "learning_rate": 0.0005525597817364789, "loss": 0.0704, "theoretical_loss": 3.5456343341700984, "tokens_seen": 1494745088 }, { "epoch": 0.45, "learning_rate": 0.0005524795377948964, "loss": 0.07, "theoretical_loss": 3.545579917998838, "tokens_seen": 1495007232 }, { "epoch": 0.45, "learning_rate": 0.0005523992938533142, "loss": 0.0689, "theoretical_loss": 3.5455255140395505, "tokens_seen": 1495269376 }, { "epoch": 0.45, "learning_rate": 0.0005523190499117317, "loss": 0.071, "theoretical_loss": 3.5454711222873554, "tokens_seen": 1495531520 }, { "epoch": 0.45, "learning_rate": 0.0005522388059701493, "loss": 0.0712, "theoretical_loss": 3.545416742737375, "tokens_seen": 1495793664 }, { "epoch": 0.45, "learning_rate": 0.0005521585620285669, "loss": 0.0714, "theoretical_loss": 3.5453623753847343, "tokens_seen": 1496055808 }, { "epoch": 0.45, "learning_rate": 0.0005520783180869845, "loss": 0.0688, "theoretical_loss": 3.54530802022456, "tokens_seen": 1496317952 }, { "epoch": 0.45, "learning_rate": 0.000551998074145402, "loss": 0.0744, "theoretical_loss": 3.545253677251983, "tokens_seen": 1496580096 }, { "epoch": 0.45, "learning_rate": 0.0005519178302038196, "loss": 0.0698, "theoretical_loss": 3.5451993464621365, "tokens_seen": 1496842240 }, { "epoch": 0.45, "learning_rate": 0.0005518375862622372, "loss": 0.0688, "theoretical_loss": 3.5451450278501566, "tokens_seen": 1497104384 }, { "epoch": 0.45, "learning_rate": 0.0005517573423206547, "loss": 0.0736, "theoretical_loss": 3.545090721411182, "tokens_seen": 1497366528 }, { "epoch": 0.45, "learning_rate": 0.0005516770983790724, "loss": 0.0708, "theoretical_loss": 3.545036427140354, "tokens_seen": 1497628672 }, { "epoch": 0.45, "learning_rate": 0.00055159685443749, "loss": 0.0727, "theoretical_loss": 3.544982145032817, "tokens_seen": 1497890816 }, { "epoch": 0.45, "learning_rate": 0.0005515166104959077, "loss": 0.0694, "theoretical_loss": 3.5449278750837188, "tokens_seen": 1498152960 }, { "epoch": 0.45, "learning_rate": 0.0005514363665543252, "loss": 0.0734, "theoretical_loss": 3.5448736172882085, "tokens_seen": 1498415104 }, { "epoch": 0.45, "learning_rate": 0.0005513561226127427, "loss": 0.069, "theoretical_loss": 3.5448193716414393, "tokens_seen": 1498677248 }, { "epoch": 0.45, "learning_rate": 0.0005512758786711604, "loss": 0.071, "theoretical_loss": 3.5447651381385668, "tokens_seen": 1498939392 }, { "epoch": 0.45, "learning_rate": 0.0005511956347295779, "loss": 0.0708, "theoretical_loss": 3.544710916774749, "tokens_seen": 1499201536 }, { "epoch": 0.45, "learning_rate": 0.0005511153907879955, "loss": 0.0701, "theoretical_loss": 3.5446567075451463, "tokens_seen": 1499463680 }, { "epoch": 0.45, "learning_rate": 0.0005510351468464131, "loss": 0.0713, "theoretical_loss": 3.544602510444924, "tokens_seen": 1499725824 }, { "epoch": 0.45, "learning_rate": 0.0005509549029048307, "loss": 0.0701, "theoretical_loss": 3.544548325469247, "tokens_seen": 1499987968 }, { "epoch": 0.45, "learning_rate": 0.0005508746589632483, "loss": 0.0736, "theoretical_loss": 3.5444941526132863, "tokens_seen": 1500250112 }, { "epoch": 0.45, "learning_rate": 0.0005507944150216659, "loss": 0.0725, "theoretical_loss": 3.5444399918722134, "tokens_seen": 1500512256 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.00016552148736082017, "objective/train/docs_used": 548178, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3277549743652344, "objective/train/original_loss": 1.327755093574524, "objective/train/theoretical_loss": 3.5443858432412028, "objective/train/tokens_used": 1521234400, "objective/train/value_avg": -0.0071868896484375, "objective/train/value_loss": 0.00020315258007030934, "objective/train/value_max": -4.988908767700195e-05, "objective/train/value_min": -0.98486328125, "objective/train/value_reward_corr": 0.6999380374007601, "objective/train/value_std": 0.01276397705078125, "objective/train/weight_avg": 1.0002577304840088, "objective/train/weighted_lm_loss": 1.3282614946365356, "objective/train/weights_max": 1.1451897621154785, "objective/train/weights_min": 0.23797065019607544, "theoretical_loss": 3.5443858432412028, "tokens_seen": 1500774400 }, { "epoch": 0.45, "learning_rate": 0.0005507141710800835, "loss": 0.0685, "theoretical_loss": 3.5443858432412028, "tokens_seen": 1500774400 }, { "epoch": 0.45, "learning_rate": 0.000550633927138501, "loss": 0.0691, "theoretical_loss": 3.5443317067154325, "tokens_seen": 1501036544 }, { "epoch": 0.45, "learning_rate": 0.0005505536831969187, "loss": 0.0717, "theoretical_loss": 3.544277582290083, "tokens_seen": 1501298688 }, { "epoch": 0.46, "learning_rate": 0.0005504734392553362, "loss": 0.0713, "theoretical_loss": 3.544223469960337, "tokens_seen": 1501560832 }, { "epoch": 0.46, "learning_rate": 0.0005503931953137538, "loss": 0.0723, "theoretical_loss": 3.5441693697213816, "tokens_seen": 1501822976 }, { "epoch": 0.46, "learning_rate": 0.0005503129513721714, "loss": 0.0712, "theoretical_loss": 3.5441152815684043, "tokens_seen": 1502085120 }, { "epoch": 0.46, "learning_rate": 0.0005502327074305889, "loss": 0.0697, "theoretical_loss": 3.5440612054965968, "tokens_seen": 1502347264 }, { "epoch": 0.46, "learning_rate": 0.0005501524634890067, "loss": 0.0722, "theoretical_loss": 3.544007141501154, "tokens_seen": 1502609408 }, { "epoch": 0.46, "learning_rate": 0.0005500722195474242, "loss": 0.0697, "theoretical_loss": 3.543953089577272, "tokens_seen": 1502871552 }, { "epoch": 0.46, "learning_rate": 0.0005499919756058418, "loss": 0.0717, "theoretical_loss": 3.5438990497201512, "tokens_seen": 1503133696 }, { "epoch": 0.46, "learning_rate": 0.0005499117316642594, "loss": 0.0692, "theoretical_loss": 3.5438450219249935, "tokens_seen": 1503395840 }, { "epoch": 0.46, "learning_rate": 0.000549831487722677, "loss": 0.0688, "theoretical_loss": 3.5437910061870044, "tokens_seen": 1503657984 }, { "epoch": 0.46, "learning_rate": 0.0005497512437810945, "loss": 0.071, "theoretical_loss": 3.543737002501392, "tokens_seen": 1503920128 }, { "epoch": 0.46, "learning_rate": 0.0005496709998395121, "loss": 0.0702, "theoretical_loss": 3.5436830108633663, "tokens_seen": 1504182272 }, { "epoch": 0.46, "learning_rate": 0.0005495907558979297, "loss": 0.0679, "theoretical_loss": 3.5436290312681415, "tokens_seen": 1504444416 }, { "epoch": 0.46, "learning_rate": 0.0005495105119563472, "loss": 0.0685, "theoretical_loss": 3.543575063710933, "tokens_seen": 1504706560 }, { "epoch": 0.46, "learning_rate": 0.000549430268014765, "loss": 0.0702, "theoretical_loss": 3.543521108186961, "tokens_seen": 1504968704 }, { "epoch": 0.46, "learning_rate": 0.0005493500240731825, "loss": 0.0731, "theoretical_loss": 3.543467164691445, "tokens_seen": 1505230848 }, { "epoch": 0.46, "learning_rate": 0.0005492697801316001, "loss": 0.0678, "theoretical_loss": 3.5434132332196113, "tokens_seen": 1505492992 }, { "epoch": 0.46, "learning_rate": 0.0005491895361900177, "loss": 0.0698, "theoretical_loss": 3.543359313766686, "tokens_seen": 1505755136 }, { "epoch": 0.46, "learning_rate": 0.0005491092922484352, "loss": 0.0703, "theoretical_loss": 3.543305406327899, "tokens_seen": 1506017280 }, { "epoch": 0.46, "learning_rate": 0.0005490290483068528, "loss": 0.0682, "theoretical_loss": 3.5432515108984832, "tokens_seen": 1506279424 }, { "epoch": 0.46, "learning_rate": 0.0005489488043652704, "loss": 0.0672, "theoretical_loss": 3.543197627473673, "tokens_seen": 1506541568 }, { "epoch": 0.46, "learning_rate": 0.000548868560423688, "loss": 0.0713, "theoretical_loss": 3.543143756048708, "tokens_seen": 1506803712 }, { "epoch": 0.46, "learning_rate": 0.0005487883164821056, "loss": 0.0692, "theoretical_loss": 3.5430898966188265, "tokens_seen": 1507065856 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.00014458467194344848, "objective/train/docs_used": 550589, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3810093402862549, "objective/train/original_loss": 1.3810093402862549, "objective/train/theoretical_loss": 3.543036049179274, "objective/train/tokens_used": 1527788000, "objective/train/value_avg": -0.0096435546875, "objective/train/value_loss": 0.00022767337213736027, "objective/train/value_max": -8.547306060791016e-05, "objective/train/value_min": -0.396728515625, "objective/train/value_reward_corr": 0.7315385161777496, "objective/train/value_std": 0.01538848876953125, "objective/train/weight_avg": 1.0002518892288208, "objective/train/weighted_lm_loss": 1.3807989358901978, "objective/train/weights_max": 1.3733340501785278, "objective/train/weights_min": 0.3778582811355591, "theoretical_loss": 3.543036049179274, "tokens_seen": 1507328000 }, { "epoch": 0.46, "learning_rate": 0.0005487080725405232, "loss": 0.0707, "theoretical_loss": 3.543036049179274, "tokens_seen": 1507328000 }, { "epoch": 0.46, "learning_rate": 0.0005486278285989408, "loss": 0.073, "theoretical_loss": 3.5429822137252955, "tokens_seen": 1507590144 }, { "epoch": 0.46, "learning_rate": 0.0005485475846573585, "loss": 0.0696, "theoretical_loss": 3.54292839025214, "tokens_seen": 1507852288 }, { "epoch": 0.46, "learning_rate": 0.000548467340715776, "loss": 0.0697, "theoretical_loss": 3.5428745787550593, "tokens_seen": 1508114432 }, { "epoch": 0.46, "learning_rate": 0.0005483870967741935, "loss": 0.0715, "theoretical_loss": 3.5428207792293076, "tokens_seen": 1508376576 }, { "epoch": 0.46, "learning_rate": 0.0005483068528326112, "loss": 0.0698, "theoretical_loss": 3.542766991670142, "tokens_seen": 1508638720 }, { "epoch": 0.46, "learning_rate": 0.0005482266088910287, "loss": 0.0706, "theoretical_loss": 3.542713216072821, "tokens_seen": 1508900864 }, { "epoch": 0.46, "learning_rate": 0.0005481463649494463, "loss": 0.0721, "theoretical_loss": 3.542659452432608, "tokens_seen": 1509163008 }, { "epoch": 0.46, "learning_rate": 0.0005480661210078639, "loss": 0.0705, "theoretical_loss": 3.542605700744768, "tokens_seen": 1509425152 }, { "epoch": 0.46, "learning_rate": 0.0005479858770662815, "loss": 0.0694, "theoretical_loss": 3.542551961004568, "tokens_seen": 1509687296 }, { "epoch": 0.46, "learning_rate": 0.000547905633124699, "loss": 0.0716, "theoretical_loss": 3.5424982332072794, "tokens_seen": 1509949440 }, { "epoch": 0.46, "learning_rate": 0.0005478253891831167, "loss": 0.0721, "theoretical_loss": 3.5424445173481756, "tokens_seen": 1510211584 }, { "epoch": 0.46, "learning_rate": 0.0005477451452415343, "loss": 0.0711, "theoretical_loss": 3.5423908134225304, "tokens_seen": 1510473728 }, { "epoch": 0.46, "learning_rate": 0.0005476649012999519, "loss": 0.0708, "theoretical_loss": 3.5423371214256245, "tokens_seen": 1510735872 }, { "epoch": 0.46, "learning_rate": 0.0005475846573583695, "loss": 0.0708, "theoretical_loss": 3.5422834413527378, "tokens_seen": 1510998016 }, { "epoch": 0.46, "learning_rate": 0.000547504413416787, "loss": 0.0716, "theoretical_loss": 3.5422297731991548, "tokens_seen": 1511260160 }, { "epoch": 0.46, "learning_rate": 0.0005474241694752047, "loss": 0.0686, "theoretical_loss": 3.542176116960162, "tokens_seen": 1511522304 }, { "epoch": 0.46, "learning_rate": 0.0005473439255336222, "loss": 0.0718, "theoretical_loss": 3.542122472631048, "tokens_seen": 1511784448 }, { "epoch": 0.46, "learning_rate": 0.0005472636815920397, "loss": 0.072, "theoretical_loss": 3.542068840207105, "tokens_seen": 1512046592 }, { "epoch": 0.46, "learning_rate": 0.0005471834376504575, "loss": 0.0696, "theoretical_loss": 3.5420152196836288, "tokens_seen": 1512308736 }, { "epoch": 0.46, "learning_rate": 0.000547103193708875, "loss": 0.0683, "theoretical_loss": 3.541961611055915, "tokens_seen": 1512570880 }, { "epoch": 0.46, "learning_rate": 0.0005470229497672926, "loss": 0.0701, "theoretical_loss": 3.5419080143192643, "tokens_seen": 1512833024 }, { "epoch": 0.46, "learning_rate": 0.0005469427058257102, "loss": 0.0704, "theoretical_loss": 3.541854429468979, "tokens_seen": 1513095168 }, { "epoch": 0.46, "learning_rate": 0.0005468624618841278, "loss": 0.0708, "theoretical_loss": 3.5418008565003647, "tokens_seen": 1513357312 }, { "epoch": 0.46, "learning_rate": 0.0005467822179425453, "loss": 0.0718, "theoretical_loss": 3.541747295408729, "tokens_seen": 1513619456 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.0009963659103959799, "objective/train/docs_used": 552897, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3523280620574951, "objective/train/original_loss": 1.3523280620574951, "objective/train/theoretical_loss": 3.541693746189383, "objective/train/tokens_used": 1534341600, "objective/train/value_avg": -0.0100250244140625, "objective/train/value_loss": 0.00032382109202444553, "objective/train/value_max": -6.252527236938477e-05, "objective/train/value_min": -0.81396484375, "objective/train/value_reward_corr": 0.6972783877515103, "objective/train/value_std": 0.01727294921875, "objective/train/weight_avg": 1.0011416673660278, "objective/train/weighted_lm_loss": 1.3530480861663818, "objective/train/weights_max": 1.5500072240829468, "objective/train/weights_min": 0.3707548677921295, "theoretical_loss": 3.541693746189383, "tokens_seen": 1513881600 }, { "epoch": 0.46, "learning_rate": 0.0005467019740009629, "loss": 0.0701, "theoretical_loss": 3.541693746189383, "tokens_seen": 1513881600 }, { "epoch": 0.46, "learning_rate": 0.0005466217300593805, "loss": 0.0687, "theoretical_loss": 3.5416402088376397, "tokens_seen": 1514143744 }, { "epoch": 0.46, "learning_rate": 0.000546541486117798, "loss": 0.0701, "theoretical_loss": 3.5415866833488154, "tokens_seen": 1514405888 }, { "epoch": 0.46, "learning_rate": 0.0005464612421762158, "loss": 0.0709, "theoretical_loss": 3.541533169718228, "tokens_seen": 1514668032 }, { "epoch": 0.46, "learning_rate": 0.0005463809982346333, "loss": 0.0701, "theoretical_loss": 3.541479667941199, "tokens_seen": 1514930176 }, { "epoch": 0.46, "learning_rate": 0.000546300754293051, "loss": 0.0706, "theoretical_loss": 3.5414261780130527, "tokens_seen": 1515192320 }, { "epoch": 0.46, "learning_rate": 0.0005462205103514685, "loss": 0.0698, "theoretical_loss": 3.5413726999291155, "tokens_seen": 1515454464 }, { "epoch": 0.46, "learning_rate": 0.000546140266409886, "loss": 0.0724, "theoretical_loss": 3.5413192336847166, "tokens_seen": 1515716608 }, { "epoch": 0.46, "learning_rate": 0.0005460600224683037, "loss": 0.0686, "theoretical_loss": 3.5412657792751876, "tokens_seen": 1515978752 }, { "epoch": 0.46, "learning_rate": 0.0005459797785267212, "loss": 0.0698, "theoretical_loss": 3.541212336695863, "tokens_seen": 1516240896 }, { "epoch": 0.46, "learning_rate": 0.0005458995345851388, "loss": 0.0702, "theoretical_loss": 3.54115890594208, "tokens_seen": 1516503040 }, { "epoch": 0.46, "learning_rate": 0.0005458192906435564, "loss": 0.0677, "theoretical_loss": 3.5411054870091787, "tokens_seen": 1516765184 }, { "epoch": 0.46, "learning_rate": 0.000545739046701974, "loss": 0.0701, "theoretical_loss": 3.541052079892502, "tokens_seen": 1517027328 }, { "epoch": 0.46, "learning_rate": 0.0005456588027603916, "loss": 0.0709, "theoretical_loss": 3.540998684587394, "tokens_seen": 1517289472 }, { "epoch": 0.46, "learning_rate": 0.0005455785588188093, "loss": 0.071, "theoretical_loss": 3.5409453010892022, "tokens_seen": 1517551616 }, { "epoch": 0.46, "learning_rate": 0.0005454983148772268, "loss": 0.0696, "theoretical_loss": 3.540891929393278, "tokens_seen": 1517813760 }, { "epoch": 0.46, "learning_rate": 0.0005454180709356443, "loss": 0.0704, "theoretical_loss": 3.540838569494974, "tokens_seen": 1518075904 }, { "epoch": 0.46, "learning_rate": 0.000545337826994062, "loss": 0.0676, "theoretical_loss": 3.540785221389646, "tokens_seen": 1518338048 }, { "epoch": 0.46, "learning_rate": 0.0005452575830524795, "loss": 0.072, "theoretical_loss": 3.5407318850726517, "tokens_seen": 1518600192 }, { "epoch": 0.46, "learning_rate": 0.0005451773391108972, "loss": 0.0713, "theoretical_loss": 3.5406785605393525, "tokens_seen": 1518862336 }, { "epoch": 0.46, "learning_rate": 0.0005450970951693147, "loss": 0.0713, "theoretical_loss": 3.540625247785111, "tokens_seen": 1519124480 }, { "epoch": 0.46, "learning_rate": 0.0005450168512277323, "loss": 0.0701, "theoretical_loss": 3.5405719468052945, "tokens_seen": 1519386624 }, { "epoch": 0.46, "learning_rate": 0.00054493660728615, "loss": 0.0682, "theoretical_loss": 3.5405186575952716, "tokens_seen": 1519648768 }, { "epoch": 0.46, "learning_rate": 0.0005448563633445675, "loss": 0.0697, "theoretical_loss": 3.5404653801504127, "tokens_seen": 1519910912 }, { "epoch": 0.46, "learning_rate": 0.0005447761194029851, "loss": 0.0713, "theoretical_loss": 3.540412114466093, "tokens_seen": 1520173056 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.000899738457519561, "objective/train/docs_used": 555119, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4808071851730347, "objective/train/original_loss": 1.4808071851730347, "objective/train/theoretical_loss": 3.5403588605376886, "objective/train/tokens_used": 1540895200, "objective/train/value_avg": -0.00998687744140625, "objective/train/value_loss": 0.0002470078179612756, "objective/train/value_max": -8.153915405273438e-05, "objective/train/value_min": -0.38720703125, "objective/train/value_reward_corr": 0.7553053169241293, "objective/train/value_std": 0.01812744140625, "objective/train/weight_avg": 1.0010114908218384, "objective/train/weighted_lm_loss": 1.4823392629623413, "objective/train/weights_max": 1.1878193616867065, "objective/train/weights_min": 0.3683270514011383, "theoretical_loss": 3.5403588605376886, "tokens_seen": 1520435200 }, { "epoch": 0.46, "learning_rate": 0.0005446958754614027, "loss": 0.0679, "theoretical_loss": 3.5403588605376886, "tokens_seen": 1520435200 }, { "epoch": 0.46, "learning_rate": 0.0005446156315198203, "loss": 0.0681, "theoretical_loss": 3.540305618360578, "tokens_seen": 1520697344 }, { "epoch": 0.46, "learning_rate": 0.0005445353875782378, "loss": 0.0732, "theoretical_loss": 3.540252387930144, "tokens_seen": 1520959488 }, { "epoch": 0.46, "learning_rate": 0.0005444551436366555, "loss": 0.0688, "theoretical_loss": 3.540199169241771, "tokens_seen": 1521221632 }, { "epoch": 0.46, "learning_rate": 0.000544374899695073, "loss": 0.0711, "theoretical_loss": 3.540145962290845, "tokens_seen": 1521483776 }, { "epoch": 0.46, "learning_rate": 0.0005442946557534905, "loss": 0.0694, "theoretical_loss": 3.5400927670727573, "tokens_seen": 1521745920 }, { "epoch": 0.46, "learning_rate": 0.0005442144118119083, "loss": 0.0708, "theoretical_loss": 3.5400395835828986, "tokens_seen": 1522008064 }, { "epoch": 0.46, "learning_rate": 0.0005441341678703258, "loss": 0.0726, "theoretical_loss": 3.539986411816665, "tokens_seen": 1522270208 }, { "epoch": 0.46, "learning_rate": 0.0005440539239287434, "loss": 0.0723, "theoretical_loss": 3.5399332517694533, "tokens_seen": 1522532352 }, { "epoch": 0.46, "learning_rate": 0.000543973679987161, "loss": 0.0685, "theoretical_loss": 3.539880103436664, "tokens_seen": 1522794496 }, { "epoch": 0.46, "learning_rate": 0.0005438934360455786, "loss": 0.0696, "theoretical_loss": 3.5398269668136986, "tokens_seen": 1523056640 }, { "epoch": 0.46, "learning_rate": 0.0005438131921039962, "loss": 0.0677, "theoretical_loss": 3.539773841895964, "tokens_seen": 1523318784 }, { "epoch": 0.46, "learning_rate": 0.0005437329481624137, "loss": 0.0707, "theoretical_loss": 3.5397207286788666, "tokens_seen": 1523580928 }, { "epoch": 0.46, "learning_rate": 0.0005436527042208313, "loss": 0.0736, "theoretical_loss": 3.539667627157818, "tokens_seen": 1523843072 }, { "epoch": 0.46, "learning_rate": 0.0005435724602792489, "loss": 0.0704, "theoretical_loss": 3.53961453732823, "tokens_seen": 1524105216 }, { "epoch": 0.46, "learning_rate": 0.0005434922163376666, "loss": 0.0697, "theoretical_loss": 3.53956145918552, "tokens_seen": 1524367360 }, { "epoch": 0.46, "learning_rate": 0.0005434119723960841, "loss": 0.0698, "theoretical_loss": 3.5395083927251045, "tokens_seen": 1524629504 }, { "epoch": 0.46, "learning_rate": 0.0005433317284545018, "loss": 0.0737, "theoretical_loss": 3.539455337942405, "tokens_seen": 1524891648 }, { "epoch": 0.46, "learning_rate": 0.0005432514845129193, "loss": 0.068, "theoretical_loss": 3.5394022948328447, "tokens_seen": 1525153792 }, { "epoch": 0.46, "learning_rate": 0.0005431712405713368, "loss": 0.069, "theoretical_loss": 3.5393492633918497, "tokens_seen": 1525415936 }, { "epoch": 0.46, "learning_rate": 0.0005430909966297545, "loss": 0.0703, "theoretical_loss": 3.5392962436148485, "tokens_seen": 1525678080 }, { "epoch": 0.46, "learning_rate": 0.000543010752688172, "loss": 0.0682, "theoretical_loss": 3.5392432354972723, "tokens_seen": 1525940224 }, { "epoch": 0.46, "learning_rate": 0.0005429305087465896, "loss": 0.0719, "theoretical_loss": 3.5391902390345544, "tokens_seen": 1526202368 }, { "epoch": 0.46, "learning_rate": 0.0005428502648050072, "loss": 0.0695, "theoretical_loss": 3.5391372542221315, "tokens_seen": 1526464512 }, { "epoch": 0.46, "learning_rate": 0.0005427700208634248, "loss": 0.0692, "theoretical_loss": 3.539084281055443, "tokens_seen": 1526726656 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.001032950938679278, "objective/train/docs_used": 557485, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3883706331253052, "objective/train/original_loss": 1.3883706331253052, "objective/train/theoretical_loss": 3.5390313195299283, "objective/train/tokens_used": 1547448800, "objective/train/value_avg": -0.00554656982421875, "objective/train/value_loss": 0.00018550669483374804, "objective/train/value_max": -2.9087066650390625e-05, "objective/train/value_min": -0.630859375, "objective/train/value_reward_corr": 0.6172755507616114, "objective/train/value_std": 0.00994110107421875, "objective/train/weight_avg": 1.0011130571365356, "objective/train/weighted_lm_loss": 1.39009690284729, "objective/train/weights_max": 1.3466330766677856, "objective/train/weights_min": 0.3682669997215271, "theoretical_loss": 3.5390313195299283, "tokens_seen": 1526988800 }, { "epoch": 0.46, "learning_rate": 0.0005426897769218424, "loss": 0.0712, "theoretical_loss": 3.5390313195299283, "tokens_seen": 1526988800 }, { "epoch": 0.46, "learning_rate": 0.0005426095329802601, "loss": 0.067, "theoretical_loss": 3.538978369641033, "tokens_seen": 1527250944 }, { "epoch": 0.46, "learning_rate": 0.0005425292890386776, "loss": 0.0697, "theoretical_loss": 3.538925431384203, "tokens_seen": 1527513088 }, { "epoch": 0.46, "learning_rate": 0.0005424490450970952, "loss": 0.0702, "theoretical_loss": 3.538872504754888, "tokens_seen": 1527775232 }, { "epoch": 0.46, "learning_rate": 0.0005423688011555128, "loss": 0.0697, "theoretical_loss": 3.538819589748539, "tokens_seen": 1528037376 }, { "epoch": 0.46, "learning_rate": 0.0005422885572139303, "loss": 0.0688, "theoretical_loss": 3.5387666863606104, "tokens_seen": 1528299520 }, { "epoch": 0.46, "learning_rate": 0.000542208313272348, "loss": 0.0718, "theoretical_loss": 3.5387137945865588, "tokens_seen": 1528561664 }, { "epoch": 0.46, "learning_rate": 0.0005421280693307655, "loss": 0.0701, "theoretical_loss": 3.538660914421844, "tokens_seen": 1528823808 }, { "epoch": 0.46, "learning_rate": 0.0005420478253891831, "loss": 0.0718, "theoretical_loss": 3.5386080458619276, "tokens_seen": 1529085952 }, { "epoch": 0.46, "learning_rate": 0.0005419675814476008, "loss": 0.0709, "theoretical_loss": 3.538555188902274, "tokens_seen": 1529348096 }, { "epoch": 0.46, "learning_rate": 0.0005418873375060183, "loss": 0.0701, "theoretical_loss": 3.53850234353835, "tokens_seen": 1529610240 }, { "epoch": 0.46, "learning_rate": 0.0005418070935644359, "loss": 0.0691, "theoretical_loss": 3.5384495097656252, "tokens_seen": 1529872384 }, { "epoch": 0.46, "learning_rate": 0.0005417268496228535, "loss": 0.0723, "theoretical_loss": 3.5383966875795716, "tokens_seen": 1530134528 }, { "epoch": 0.46, "learning_rate": 0.0005416466056812711, "loss": 0.0719, "theoretical_loss": 3.538343876975664, "tokens_seen": 1530396672 }, { "epoch": 0.46, "learning_rate": 0.0005415663617396886, "loss": 0.0714, "theoretical_loss": 3.5382910779493795, "tokens_seen": 1530658816 }, { "epoch": 0.46, "learning_rate": 0.0005414861177981063, "loss": 0.07, "theoretical_loss": 3.538238290496198, "tokens_seen": 1530920960 }, { "epoch": 0.46, "learning_rate": 0.0005414058738565238, "loss": 0.0688, "theoretical_loss": 3.5381855146116017, "tokens_seen": 1531183104 }, { "epoch": 0.46, "learning_rate": 0.0005413256299149413, "loss": 0.0717, "theoretical_loss": 3.5381327502910747, "tokens_seen": 1531445248 }, { "epoch": 0.46, "learning_rate": 0.000541245385973359, "loss": 0.0708, "theoretical_loss": 3.538079997530105, "tokens_seen": 1531707392 }, { "epoch": 0.46, "learning_rate": 0.0005411651420317766, "loss": 0.072, "theoretical_loss": 3.538027256324182, "tokens_seen": 1531969536 }, { "epoch": 0.46, "learning_rate": 0.0005410848980901943, "loss": 0.0696, "theoretical_loss": 3.5379745266687985, "tokens_seen": 1532231680 }, { "epoch": 0.46, "learning_rate": 0.0005410046541486118, "loss": 0.0676, "theoretical_loss": 3.537921808559449, "tokens_seen": 1532493824 }, { "epoch": 0.46, "learning_rate": 0.0005409244102070294, "loss": 0.07, "theoretical_loss": 3.537869101991631, "tokens_seen": 1532755968 }, { "epoch": 0.46, "learning_rate": 0.000540844166265447, "loss": 0.0731, "theoretical_loss": 3.537816406960845, "tokens_seen": 1533018112 }, { "epoch": 0.46, "learning_rate": 0.0005407639223238645, "loss": 0.0715, "theoretical_loss": 3.537763723462593, "tokens_seen": 1533280256 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.0005670369719155133, "objective/train/docs_used": 559697, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.5663200616836548, "objective/train/original_loss": 1.5663201808929443, "objective/train/theoretical_loss": 3.537711051492379, "objective/train/tokens_used": 1554002400, "objective/train/value_avg": -0.00865936279296875, "objective/train/value_loss": 0.000221301379497163, "objective/train/value_max": -4.267692565917969e-05, "objective/train/value_min": -0.316650390625, "objective/train/value_reward_corr": 0.6577696228437152, "objective/train/value_std": 0.0131683349609375, "objective/train/weight_avg": 1.0006672143936157, "objective/train/weighted_lm_loss": 1.5662798881530762, "objective/train/weights_max": 1.3565322160720825, "objective/train/weights_min": 0.37095150351524353, "theoretical_loss": 3.537711051492379, "tokens_seen": 1533542400 }, { "epoch": 0.46, "learning_rate": 0.0005406836783822821, "loss": 0.0736, "theoretical_loss": 3.537711051492379, "tokens_seen": 1533542400 }, { "epoch": 0.46, "learning_rate": 0.0005406034344406997, "loss": 0.0708, "theoretical_loss": 3.5376583910457127, "tokens_seen": 1533804544 }, { "epoch": 0.46, "learning_rate": 0.0005405231904991174, "loss": 0.0711, "theoretical_loss": 3.537605742118102, "tokens_seen": 1534066688 }, { "epoch": 0.46, "learning_rate": 0.0005404429465575349, "loss": 0.0721, "theoretical_loss": 3.537553104705061, "tokens_seen": 1534328832 }, { "epoch": 0.47, "learning_rate": 0.0005403627026159526, "loss": 0.0747, "theoretical_loss": 3.5375004788021043, "tokens_seen": 1534590976 }, { "epoch": 0.47, "learning_rate": 0.0005402824586743701, "loss": 0.0725, "theoretical_loss": 3.537447864404749, "tokens_seen": 1534853120 }, { "epoch": 0.47, "learning_rate": 0.0005402022147327876, "loss": 0.0714, "theoretical_loss": 3.5373952615085154, "tokens_seen": 1535115264 }, { "epoch": 0.47, "learning_rate": 0.0005401219707912053, "loss": 0.0695, "theoretical_loss": 3.5373426701089263, "tokens_seen": 1535377408 }, { "epoch": 0.47, "learning_rate": 0.0005400417268496228, "loss": 0.0728, "theoretical_loss": 3.5372900902015063, "tokens_seen": 1535639552 }, { "epoch": 0.47, "learning_rate": 0.0005399614829080405, "loss": 0.0734, "theoretical_loss": 3.537237521781784, "tokens_seen": 1535901696 }, { "epoch": 0.47, "learning_rate": 0.000539881238966458, "loss": 0.072, "theoretical_loss": 3.537184964845289, "tokens_seen": 1536163840 }, { "epoch": 0.47, "learning_rate": 0.0005398009950248756, "loss": 0.0756, "theoretical_loss": 3.5371324193875533, "tokens_seen": 1536425984 }, { "epoch": 0.47, "learning_rate": 0.0005397207510832933, "loss": 0.0702, "theoretical_loss": 3.537079885404113, "tokens_seen": 1536688128 }, { "epoch": 0.47, "learning_rate": 0.0005396405071417108, "loss": 0.0732, "theoretical_loss": 3.5370273628905045, "tokens_seen": 1536950272 }, { "epoch": 0.47, "learning_rate": 0.0005395602632001284, "loss": 0.0706, "theoretical_loss": 3.5369748518422695, "tokens_seen": 1537212416 }, { "epoch": 0.47, "learning_rate": 0.000539480019258546, "loss": 0.0733, "theoretical_loss": 3.5369223522549493, "tokens_seen": 1537474560 }, { "epoch": 0.47, "learning_rate": 0.0005393997753169636, "loss": 0.0715, "theoretical_loss": 3.53686986412409, "tokens_seen": 1537736704 }, { "epoch": 0.47, "learning_rate": 0.0005393195313753811, "loss": 0.0711, "theoretical_loss": 3.5368173874452378, "tokens_seen": 1537998848 }, { "epoch": 0.47, "learning_rate": 0.0005392392874337988, "loss": 0.0716, "theoretical_loss": 3.536764922213944, "tokens_seen": 1538260992 }, { "epoch": 0.47, "learning_rate": 0.0005391590434922163, "loss": 0.071, "theoretical_loss": 3.536712468425761, "tokens_seen": 1538523136 }, { "epoch": 0.47, "learning_rate": 0.0005390787995506339, "loss": 0.0705, "theoretical_loss": 3.5366600260762433, "tokens_seen": 1538785280 }, { "epoch": 0.47, "learning_rate": 0.0005389985556090516, "loss": 0.069, "theoretical_loss": 3.5366075951609486, "tokens_seen": 1539047424 }, { "epoch": 0.47, "learning_rate": 0.0005389183116674691, "loss": 0.0712, "theoretical_loss": 3.5365551756754376, "tokens_seen": 1539309568 }, { "epoch": 0.47, "learning_rate": 0.0005388380677258867, "loss": 0.0711, "theoretical_loss": 3.5365027676152714, "tokens_seen": 1539571712 }, { "epoch": 0.47, "learning_rate": 0.0005387578237843043, "loss": 0.0726, "theoretical_loss": 3.5364503709760164, "tokens_seen": 1539833856 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.0006242617382667959, "objective/train/docs_used": 562258, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4021567106246948, "objective/train/original_loss": 1.4021568298339844, "objective/train/theoretical_loss": 3.536397985753239, "objective/train/tokens_used": 1560556000, "objective/train/value_avg": -0.007061004638671875, "objective/train/value_loss": 0.00021928877686150372, "objective/train/value_max": -5.561113357543945e-05, "objective/train/value_min": -0.7822265625, "objective/train/value_reward_corr": 0.7640836101638206, "objective/train/value_std": 0.0187225341796875, "objective/train/weight_avg": 1.0007275342941284, "objective/train/weighted_lm_loss": 1.4030952453613281, "objective/train/weights_max": 1.5728368759155273, "objective/train/weights_min": 0.5366626381874084, "theoretical_loss": 3.536397985753239, "tokens_seen": 1540096000 }, { "epoch": 0.47, "learning_rate": 0.0005386775798427219, "loss": 0.0733, "theoretical_loss": 3.536397985753239, "tokens_seen": 1540096000 }, { "epoch": 0.47, "learning_rate": 0.0005385973359011395, "loss": 0.0762, "theoretical_loss": 3.53634561194251, "tokens_seen": 1540358144 }, { "epoch": 0.47, "learning_rate": 0.0005385170919595571, "loss": 0.0726, "theoretical_loss": 3.5362932495394013, "tokens_seen": 1540620288 }, { "epoch": 0.47, "learning_rate": 0.0005384368480179746, "loss": 0.0696, "theoretical_loss": 3.5362408985394875, "tokens_seen": 1540882432 }, { "epoch": 0.47, "learning_rate": 0.0005383566040763922, "loss": 0.0713, "theoretical_loss": 3.5361885589383464, "tokens_seen": 1541144576 }, { "epoch": 0.47, "learning_rate": 0.0005382763601348099, "loss": 0.0721, "theoretical_loss": 3.5361362307315574, "tokens_seen": 1541406720 }, { "epoch": 0.47, "learning_rate": 0.0005381961161932274, "loss": 0.0726, "theoretical_loss": 3.5360839139147036, "tokens_seen": 1541668864 }, { "epoch": 0.47, "learning_rate": 0.0005381158722516451, "loss": 0.0716, "theoretical_loss": 3.5360316084833685, "tokens_seen": 1541931008 }, { "epoch": 0.47, "learning_rate": 0.0005380356283100626, "loss": 0.0704, "theoretical_loss": 3.53597931443314, "tokens_seen": 1542193152 }, { "epoch": 0.47, "learning_rate": 0.0005379553843684802, "loss": 0.0723, "theoretical_loss": 3.535927031759608, "tokens_seen": 1542455296 }, { "epoch": 0.47, "learning_rate": 0.0005378751404268978, "loss": 0.072, "theoretical_loss": 3.5358747604583636, "tokens_seen": 1542717440 }, { "epoch": 0.47, "learning_rate": 0.0005377948964853153, "loss": 0.0706, "theoretical_loss": 3.5358225005250024, "tokens_seen": 1542979584 }, { "epoch": 0.47, "learning_rate": 0.0005377146525437329, "loss": 0.0735, "theoretical_loss": 3.535770251955121, "tokens_seen": 1543241728 }, { "epoch": 0.47, "learning_rate": 0.0005376344086021505, "loss": 0.07, "theoretical_loss": 3.5357180147443197, "tokens_seen": 1543503872 }, { "epoch": 0.47, "learning_rate": 0.0005375541646605681, "loss": 0.073, "theoretical_loss": 3.5356657888881986, "tokens_seen": 1543766016 }, { "epoch": 0.47, "learning_rate": 0.0005374739207189858, "loss": 0.0704, "theoretical_loss": 3.5356135743823636, "tokens_seen": 1544028160 }, { "epoch": 0.47, "learning_rate": 0.0005373936767774034, "loss": 0.0723, "theoretical_loss": 3.5355613712224203, "tokens_seen": 1544290304 }, { "epoch": 0.47, "learning_rate": 0.0005373134328358209, "loss": 0.0744, "theoretical_loss": 3.53550917940398, "tokens_seen": 1544552448 }, { "epoch": 0.47, "learning_rate": 0.0005372331888942385, "loss": 0.0691, "theoretical_loss": 3.535456998922652, "tokens_seen": 1544814592 }, { "epoch": 0.47, "learning_rate": 0.0005371529449526561, "loss": 0.0711, "theoretical_loss": 3.535404829774052, "tokens_seen": 1545076736 }, { "epoch": 0.47, "learning_rate": 0.0005370727010110736, "loss": 0.0699, "theoretical_loss": 3.535352671953796, "tokens_seen": 1545338880 }, { "epoch": 0.47, "learning_rate": 0.0005369924570694913, "loss": 0.0724, "theoretical_loss": 3.5353005254575027, "tokens_seen": 1545601024 }, { "epoch": 0.47, "learning_rate": 0.0005369122131279088, "loss": 0.0769, "theoretical_loss": 3.5352483902807945, "tokens_seen": 1545863168 }, { "epoch": 0.47, "learning_rate": 0.0005368319691863264, "loss": 0.0718, "theoretical_loss": 3.535196266419295, "tokens_seen": 1546125312 }, { "epoch": 0.47, "learning_rate": 0.0005367517252447441, "loss": 0.0721, "theoretical_loss": 3.5351441538686306, "tokens_seen": 1546387456 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.0003114771971013397, "objective/train/docs_used": 564726, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4200135469436646, "objective/train/original_loss": 1.420013427734375, "objective/train/theoretical_loss": 3.535092052624429, "objective/train/tokens_used": 1567109600, "objective/train/value_avg": -0.00567626953125, "objective/train/value_loss": 0.0001583931443747133, "objective/train/value_max": -3.0934810638427734e-05, "objective/train/value_min": -0.67724609375, "objective/train/value_reward_corr": 0.6668066682699528, "objective/train/value_std": 0.0106658935546875, "objective/train/weight_avg": 1.0003831386566162, "objective/train/weighted_lm_loss": 1.4202409982681274, "objective/train/weights_max": 1.356201171875, "objective/train/weights_min": 0.3696674406528473, "theoretical_loss": 3.535092052624429, "tokens_seen": 1546649600 }, { "epoch": 0.47, "learning_rate": 0.0005366714813031616, "loss": 0.0688, "theoretical_loss": 3.535092052624429, "tokens_seen": 1546649600 }, { "epoch": 0.47, "learning_rate": 0.0005365912373615792, "loss": 0.0735, "theoretical_loss": 3.5350399626823226, "tokens_seen": 1546911744 }, { "epoch": 0.47, "learning_rate": 0.0005365109934199968, "loss": 0.0729, "theoretical_loss": 3.534987884037945, "tokens_seen": 1547173888 }, { "epoch": 0.47, "learning_rate": 0.0005364307494784144, "loss": 0.072, "theoretical_loss": 3.5349358166869314, "tokens_seen": 1547436032 }, { "epoch": 0.47, "learning_rate": 0.0005363505055368319, "loss": 0.0719, "theoretical_loss": 3.534883760624921, "tokens_seen": 1547698176 }, { "epoch": 0.47, "learning_rate": 0.0005362702615952496, "loss": 0.0765, "theoretical_loss": 3.534831715847555, "tokens_seen": 1547960320 }, { "epoch": 0.47, "learning_rate": 0.0005361900176536671, "loss": 0.0733, "theoretical_loss": 3.534779682350475, "tokens_seen": 1548222464 }, { "epoch": 0.47, "learning_rate": 0.0005361097737120849, "loss": 0.0748, "theoretical_loss": 3.534727660129329, "tokens_seen": 1548484608 }, { "epoch": 0.47, "learning_rate": 0.0005360295297705024, "loss": 0.071, "theoretical_loss": 3.534675649179764, "tokens_seen": 1548746752 }, { "epoch": 0.47, "learning_rate": 0.0005359492858289199, "loss": 0.0735, "theoretical_loss": 3.53462364949743, "tokens_seen": 1549008896 }, { "epoch": 0.47, "learning_rate": 0.0005358690418873376, "loss": 0.0706, "theoretical_loss": 3.5345716610779814, "tokens_seen": 1549271040 }, { "epoch": 0.47, "learning_rate": 0.0005357887979457551, "loss": 0.0721, "theoretical_loss": 3.5345196839170723, "tokens_seen": 1549533184 }, { "epoch": 0.47, "learning_rate": 0.0005357085540041727, "loss": 0.0737, "theoretical_loss": 3.534467718010361, "tokens_seen": 1549795328 }, { "epoch": 0.47, "learning_rate": 0.0005356283100625903, "loss": 0.0716, "theoretical_loss": 3.5344157633535085, "tokens_seen": 1550057472 }, { "epoch": 0.47, "learning_rate": 0.0005355480661210079, "loss": 0.0723, "theoretical_loss": 3.5343638199421763, "tokens_seen": 1550319616 }, { "epoch": 0.47, "learning_rate": 0.0005354678221794254, "loss": 0.0732, "theoretical_loss": 3.5343118877720294, "tokens_seen": 1550581760 }, { "epoch": 0.47, "learning_rate": 0.000535387578237843, "loss": 0.0717, "theoretical_loss": 3.534259966838736, "tokens_seen": 1550843904 }, { "epoch": 0.47, "learning_rate": 0.0005353073342962607, "loss": 0.0696, "theoretical_loss": 3.534208057137966, "tokens_seen": 1551106048 }, { "epoch": 0.47, "learning_rate": 0.0005352270903546782, "loss": 0.0727, "theoretical_loss": 3.5341561586653905, "tokens_seen": 1551368192 }, { "epoch": 0.47, "learning_rate": 0.0005351468464130959, "loss": 0.0693, "theoretical_loss": 3.5341042714166853, "tokens_seen": 1551630336 }, { "epoch": 0.47, "learning_rate": 0.0005350666024715134, "loss": 0.0702, "theoretical_loss": 3.5340523953875267, "tokens_seen": 1551892480 }, { "epoch": 0.47, "learning_rate": 0.0005349863585299311, "loss": 0.0706, "theoretical_loss": 3.5340005305735946, "tokens_seen": 1552154624 }, { "epoch": 0.47, "learning_rate": 0.0005349061145883486, "loss": 0.0686, "theoretical_loss": 3.533948676970571, "tokens_seen": 1552416768 }, { "epoch": 0.47, "learning_rate": 0.0005348258706467661, "loss": 0.072, "theoretical_loss": 3.533896834574139, "tokens_seen": 1552678912 }, { "epoch": 0.47, "learning_rate": 0.0005347456267051838, "loss": 0.0722, "theoretical_loss": 3.5338450033799864, "tokens_seen": 1552941056 }, { "epoch": 0.47, "objective/train/advantage_avg": -0.0007033547153696418, "objective/train/docs_used": 567156, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3911396265029907, "objective/train/original_loss": 1.3911396265029907, "objective/train/theoretical_loss": 3.533793183383802, "objective/train/tokens_used": 1573663200, "objective/train/value_avg": -0.007610321044921875, "objective/train/value_loss": 0.00024034272064454854, "objective/train/value_max": -4.988908767700195e-05, "objective/train/value_min": -0.38916015625, "objective/train/value_reward_corr": 0.6741884057137439, "objective/train/value_std": 0.013458251953125, "objective/train/weight_avg": 0.9994078874588013, "objective/train/weighted_lm_loss": 1.3903698921203613, "objective/train/weights_max": 1.2013026475906372, "objective/train/weights_min": 0.37362140417099, "theoretical_loss": 3.533793183383802, "tokens_seen": 1553203200 }, { "epoch": 0.47, "learning_rate": 0.0005346653827636013, "loss": 0.0693, "theoretical_loss": 3.533793183383802, "tokens_seen": 1553203200 }, { "epoch": 0.47, "learning_rate": 0.000534585138822019, "loss": 0.0685, "theoretical_loss": 3.5337413745812767, "tokens_seen": 1553465344 }, { "epoch": 0.47, "learning_rate": 0.0005345048948804366, "loss": 0.0718, "theoretical_loss": 3.5336895769681043, "tokens_seen": 1553727488 }, { "epoch": 0.47, "learning_rate": 0.0005344246509388542, "loss": 0.0697, "theoretical_loss": 3.533637790539981, "tokens_seen": 1553989632 }, { "epoch": 0.47, "learning_rate": 0.0005343444069972717, "loss": 0.0671, "theoretical_loss": 3.533586015292606, "tokens_seen": 1554251776 }, { "epoch": 0.47, "learning_rate": 0.0005342641630556893, "loss": 0.0708, "theoretical_loss": 3.5335342512216794, "tokens_seen": 1554513920 }, { "epoch": 0.47, "learning_rate": 0.0005341839191141069, "loss": 0.073, "theoretical_loss": 3.5334824983229045, "tokens_seen": 1554776064 }, { "epoch": 0.47, "learning_rate": 0.0005341036751725244, "loss": 0.072, "theoretical_loss": 3.5334307565919874, "tokens_seen": 1555038208 }, { "epoch": 0.47, "learning_rate": 0.0005340234312309421, "loss": 0.0695, "theoretical_loss": 3.533379026024636, "tokens_seen": 1555300352 }, { "epoch": 0.47, "learning_rate": 0.0005339431872893596, "loss": 0.0719, "theoretical_loss": 3.53332730661656, "tokens_seen": 1555562496 }, { "epoch": 0.47, "learning_rate": 0.0005338629433477772, "loss": 0.0716, "theoretical_loss": 3.533275598363473, "tokens_seen": 1555824640 }, { "epoch": 0.47, "learning_rate": 0.0005337826994061949, "loss": 0.0676, "theoretical_loss": 3.53322390126109, "tokens_seen": 1556086784 }, { "epoch": 0.47, "learning_rate": 0.0005337024554646124, "loss": 0.0731, "theoretical_loss": 3.533172215305129, "tokens_seen": 1556348928 }, { "epoch": 0.47, "learning_rate": 0.0005336222115230301, "loss": 0.0681, "theoretical_loss": 3.533120540491309, "tokens_seen": 1556611072 }, { "epoch": 0.47, "learning_rate": 0.0005335419675814476, "loss": 0.0728, "theoretical_loss": 3.533068876815352, "tokens_seen": 1556873216 }, { "epoch": 0.47, "learning_rate": 0.0005334617236398652, "loss": 0.0687, "theoretical_loss": 3.5330172242729834, "tokens_seen": 1557135360 }, { "epoch": 0.47, "learning_rate": 0.0005333814796982828, "loss": 0.072, "theoretical_loss": 3.53296558285993, "tokens_seen": 1557397504 }, { "epoch": 0.47, "learning_rate": 0.0005333012357567004, "loss": 0.067, "theoretical_loss": 3.532913952571921, "tokens_seen": 1557659648 }, { "epoch": 0.47, "learning_rate": 0.0005332209918151179, "loss": 0.0699, "theoretical_loss": 3.5328623334046885, "tokens_seen": 1557921792 }, { "epoch": 0.47, "learning_rate": 0.0005331407478735357, "loss": 0.0697, "theoretical_loss": 3.532810725353966, "tokens_seen": 1558183936 }, { "epoch": 0.47, "learning_rate": 0.0005330605039319532, "loss": 0.0721, "theoretical_loss": 3.5327591284154893, "tokens_seen": 1558446080 }, { "epoch": 0.47, "learning_rate": 0.0005329802599903707, "loss": 0.0692, "theoretical_loss": 3.5327075425849985, "tokens_seen": 1558708224 }, { "epoch": 0.47, "learning_rate": 0.0005329000160487884, "loss": 0.0704, "theoretical_loss": 3.5326559678582337, "tokens_seen": 1558970368 }, { "epoch": 0.47, "learning_rate": 0.0005328197721072059, "loss": 0.0701, "theoretical_loss": 3.532604404230939, "tokens_seen": 1559232512 }, { "epoch": 0.47, "learning_rate": 0.0005327395281656235, "loss": 0.0688, "theoretical_loss": 3.53255285169886, "tokens_seen": 1559494656 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.0007141765672713518, "objective/train/docs_used": 569616, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4324969053268433, "objective/train/original_loss": 1.4324970245361328, "objective/train/theoretical_loss": 3.5325013102577443, "objective/train/tokens_used": 1580216800, "objective/train/value_avg": -0.00835418701171875, "objective/train/value_loss": 0.0002993447124026716, "objective/train/value_max": -0.00010073184967041016, "objective/train/value_min": -0.72412109375, "objective/train/value_reward_corr": 0.7243982875759303, "objective/train/value_std": 0.018646240234375, "objective/train/weight_avg": 1.0008517503738403, "objective/train/weighted_lm_loss": 1.433107614517212, "objective/train/weights_max": 1.4205234050750732, "objective/train/weights_min": 0.36909955739974976, "theoretical_loss": 3.5325013102577443, "tokens_seen": 1559756800 }, { "epoch": 0.47, "learning_rate": 0.0005326592842240411, "loss": 0.0721, "theoretical_loss": 3.5325013102577443, "tokens_seen": 1559756800 }, { "epoch": 0.47, "learning_rate": 0.0005325790402824587, "loss": 0.0729, "theoretical_loss": 3.532449779903343, "tokens_seen": 1560018944 }, { "epoch": 0.47, "learning_rate": 0.0005324987963408762, "loss": 0.0693, "theoretical_loss": 3.5323982606314086, "tokens_seen": 1560281088 }, { "epoch": 0.47, "learning_rate": 0.0005324185523992938, "loss": 0.0722, "theoretical_loss": 3.5323467524376966, "tokens_seen": 1560543232 }, { "epoch": 0.47, "learning_rate": 0.0005323383084577115, "loss": 0.0715, "theoretical_loss": 3.532295255317964, "tokens_seen": 1560805376 }, { "epoch": 0.47, "learning_rate": 0.0005322580645161291, "loss": 0.0673, "theoretical_loss": 3.532243769267971, "tokens_seen": 1561067520 }, { "epoch": 0.47, "learning_rate": 0.0005321778205745467, "loss": 0.0701, "theoretical_loss": 3.5321922942834796, "tokens_seen": 1561329664 }, { "epoch": 0.47, "learning_rate": 0.0005320975766329642, "loss": 0.0738, "theoretical_loss": 3.5321408303602544, "tokens_seen": 1561591808 }, { "epoch": 0.47, "learning_rate": 0.0005320173326913819, "loss": 0.0701, "theoretical_loss": 3.532089377494062, "tokens_seen": 1561853952 }, { "epoch": 0.47, "learning_rate": 0.0005319370887497994, "loss": 0.0753, "theoretical_loss": 3.532037935680672, "tokens_seen": 1562116096 }, { "epoch": 0.47, "learning_rate": 0.0005318568448082169, "loss": 0.0714, "theoretical_loss": 3.5319865049158556, "tokens_seen": 1562378240 }, { "epoch": 0.47, "learning_rate": 0.0005317766008666346, "loss": 0.0686, "theoretical_loss": 3.5319350851953866, "tokens_seen": 1562640384 }, { "epoch": 0.47, "learning_rate": 0.0005316963569250521, "loss": 0.0703, "theoretical_loss": 3.531883676515041, "tokens_seen": 1562902528 }, { "epoch": 0.47, "learning_rate": 0.0005316161129834697, "loss": 0.073, "theoretical_loss": 3.5318322788705974, "tokens_seen": 1563164672 }, { "epoch": 0.47, "learning_rate": 0.0005315358690418874, "loss": 0.068, "theoretical_loss": 3.531780892257837, "tokens_seen": 1563426816 }, { "epoch": 0.47, "learning_rate": 0.000531455625100305, "loss": 0.0691, "theoretical_loss": 3.5317295166725424, "tokens_seen": 1563688960 }, { "epoch": 0.47, "learning_rate": 0.0005313753811587225, "loss": 0.0688, "theoretical_loss": 3.5316781521104996, "tokens_seen": 1563951104 }, { "epoch": 0.47, "learning_rate": 0.0005312951372171401, "loss": 0.0687, "theoretical_loss": 3.5316267985674954, "tokens_seen": 1564213248 }, { "epoch": 0.47, "learning_rate": 0.0005312148932755577, "loss": 0.0695, "theoretical_loss": 3.5315754560393207, "tokens_seen": 1564475392 }, { "epoch": 0.47, "learning_rate": 0.0005311346493339753, "loss": 0.0688, "theoretical_loss": 3.5315241245217672, "tokens_seen": 1564737536 }, { "epoch": 0.47, "learning_rate": 0.0005310544053923929, "loss": 0.0697, "theoretical_loss": 3.5314728040106305, "tokens_seen": 1564999680 }, { "epoch": 0.47, "learning_rate": 0.0005309741614508104, "loss": 0.073, "theoretical_loss": 3.531421494501707, "tokens_seen": 1565261824 }, { "epoch": 0.47, "learning_rate": 0.0005308939175092282, "loss": 0.0713, "theoretical_loss": 3.5313701959907955, "tokens_seen": 1565523968 }, { "epoch": 0.47, "learning_rate": 0.0005308136735676457, "loss": 0.0715, "theoretical_loss": 3.5313189084736987, "tokens_seen": 1565786112 }, { "epoch": 0.47, "learning_rate": 0.0005307334296260632, "loss": 0.0715, "theoretical_loss": 3.5312676319462195, "tokens_seen": 1566048256 }, { "epoch": 0.47, "objective/train/advantage_avg": -0.00031399555155076087, "objective/train/docs_used": 572070, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3572434186935425, "objective/train/original_loss": 1.3572434186935425, "objective/train/theoretical_loss": 3.531216366404165, "objective/train/tokens_used": 1586770400, "objective/train/value_avg": -0.009185791015625, "objective/train/value_loss": 0.00043531539267860353, "objective/train/value_max": -3.11732292175293e-05, "objective/train/value_min": -0.97998046875, "objective/train/value_reward_corr": 0.7681454448311413, "objective/train/value_std": 0.0237579345703125, "objective/train/weight_avg": 0.999882698059082, "objective/train/weighted_lm_loss": 1.3566851615905762, "objective/train/weights_max": 2.3744029998779297, "objective/train/weights_min": 0.3780485987663269, "theoretical_loss": 3.531216366404165, "tokens_seen": 1566310400 }, { "epoch": 0.47, "learning_rate": 0.0005306531856844809, "loss": 0.0684, "theoretical_loss": 3.531216366404165, "tokens_seen": 1566310400 }, { "epoch": 0.47, "learning_rate": 0.0005305729417428984, "loss": 0.0682, "theoretical_loss": 3.531165111843343, "tokens_seen": 1566572544 }, { "epoch": 0.47, "learning_rate": 0.000530492697801316, "loss": 0.0709, "theoretical_loss": 3.531113868259565, "tokens_seen": 1566834688 }, { "epoch": 0.47, "learning_rate": 0.0005304124538597336, "loss": 0.0684, "theoretical_loss": 3.5310626356486434, "tokens_seen": 1567096832 }, { "epoch": 0.47, "learning_rate": 0.0005303322099181512, "loss": 0.0689, "theoretical_loss": 3.5310114140063944, "tokens_seen": 1567358976 }, { "epoch": 0.48, "learning_rate": 0.0005302519659765687, "loss": 0.0703, "theoretical_loss": 3.530960203328635, "tokens_seen": 1567621120 }, { "epoch": 0.48, "learning_rate": 0.0005301717220349863, "loss": 0.0704, "theoretical_loss": 3.5309090036111854, "tokens_seen": 1567883264 }, { "epoch": 0.48, "learning_rate": 0.000530091478093404, "loss": 0.0704, "theoretical_loss": 3.5308578148498686, "tokens_seen": 1568145408 }, { "epoch": 0.48, "learning_rate": 0.0005300112341518215, "loss": 0.0705, "theoretical_loss": 3.5308066370405076, "tokens_seen": 1568407552 }, { "epoch": 0.48, "learning_rate": 0.0005299309902102392, "loss": 0.0695, "theoretical_loss": 3.530755470178931, "tokens_seen": 1568669696 }, { "epoch": 0.48, "learning_rate": 0.0005298507462686567, "loss": 0.0697, "theoretical_loss": 3.5307043142609666, "tokens_seen": 1568931840 }, { "epoch": 0.48, "learning_rate": 0.0005297705023270744, "loss": 0.0696, "theoretical_loss": 3.530653169282447, "tokens_seen": 1569193984 }, { "epoch": 0.48, "learning_rate": 0.0005296902583854919, "loss": 0.0689, "theoretical_loss": 3.5306020352392053, "tokens_seen": 1569456128 }, { "epoch": 0.48, "learning_rate": 0.0005296100144439095, "loss": 0.0699, "theoretical_loss": 3.5305509121270777, "tokens_seen": 1569718272 }, { "epoch": 0.48, "learning_rate": 0.0005295297705023271, "loss": 0.0664, "theoretical_loss": 3.5304997999419028, "tokens_seen": 1569980416 }, { "epoch": 0.48, "learning_rate": 0.0005294495265607446, "loss": 0.0693, "theoretical_loss": 3.5304486986795203, "tokens_seen": 1570242560 }, { "epoch": 0.48, "learning_rate": 0.0005293692826191623, "loss": 0.0701, "theoretical_loss": 3.5303976083357735, "tokens_seen": 1570504704 }, { "epoch": 0.48, "learning_rate": 0.0005292890386775799, "loss": 0.0648, "theoretical_loss": 3.5303465289065077, "tokens_seen": 1570766848 }, { "epoch": 0.48, "learning_rate": 0.0005292087947359975, "loss": 0.0674, "theoretical_loss": 3.5302954603875696, "tokens_seen": 1571028992 }, { "epoch": 0.48, "learning_rate": 0.000529128550794415, "loss": 0.0695, "theoretical_loss": 3.5302444027748106, "tokens_seen": 1571291136 }, { "epoch": 0.48, "learning_rate": 0.0005290483068528327, "loss": 0.0702, "theoretical_loss": 3.530193356064081, "tokens_seen": 1571553280 }, { "epoch": 0.48, "learning_rate": 0.0005289680629112502, "loss": 0.0689, "theoretical_loss": 3.530142320251235, "tokens_seen": 1571815424 }, { "epoch": 0.48, "learning_rate": 0.0005288878189696677, "loss": 0.0688, "theoretical_loss": 3.5300912953321304, "tokens_seen": 1572077568 }, { "epoch": 0.48, "learning_rate": 0.0005288075750280854, "loss": 0.0685, "theoretical_loss": 3.530040281302625, "tokens_seen": 1572339712 }, { "epoch": 0.48, "learning_rate": 0.0005287273310865029, "loss": 0.0736, "theoretical_loss": 3.5299892781585793, "tokens_seen": 1572601856 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.0002667908265721053, "objective/train/docs_used": 574423, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4923175573349, "objective/train/original_loss": 1.492317795753479, "objective/train/theoretical_loss": 3.529938285895858, "objective/train/tokens_used": 1593324000, "objective/train/value_avg": -0.0081787109375, "objective/train/value_loss": 0.0002718984615057707, "objective/train/value_max": -8.481740951538086e-05, "objective/train/value_min": -0.552734375, "objective/train/value_reward_corr": 0.7223534480290491, "objective/train/value_std": 0.0150909423828125, "objective/train/weight_avg": 1.0003913640975952, "objective/train/weighted_lm_loss": 1.4929695129394531, "objective/train/weights_max": 1.278699278831482, "objective/train/weights_min": 0.3693629503250122, "theoretical_loss": 3.529938285895858, "tokens_seen": 1572864000 }, { "epoch": 0.48, "learning_rate": 0.0005286470871449207, "loss": 0.0723, "theoretical_loss": 3.529938285895858, "tokens_seen": 1572864000 }, { "epoch": 0.48, "learning_rate": 0.0005285668432033382, "loss": 0.0671, "theoretical_loss": 3.529887304510326, "tokens_seen": 1573126144 }, { "epoch": 0.48, "learning_rate": 0.0005284865992617558, "loss": 0.0665, "theoretical_loss": 3.5298363339978502, "tokens_seen": 1573388288 }, { "epoch": 0.48, "learning_rate": 0.0005284063553201734, "loss": 0.0673, "theoretical_loss": 3.529785374354302, "tokens_seen": 1573650432 }, { "epoch": 0.48, "learning_rate": 0.0005283261113785909, "loss": 0.0687, "theoretical_loss": 3.529734425575553, "tokens_seen": 1573912576 }, { "epoch": 0.48, "learning_rate": 0.0005282458674370085, "loss": 0.0688, "theoretical_loss": 3.5296834876574783, "tokens_seen": 1574174720 }, { "epoch": 0.48, "learning_rate": 0.0005281656234954261, "loss": 0.0702, "theoretical_loss": 3.529632560595954, "tokens_seen": 1574436864 }, { "epoch": 0.48, "learning_rate": 0.0005280853795538437, "loss": 0.0676, "theoretical_loss": 3.5295816443868593, "tokens_seen": 1574699008 }, { "epoch": 0.48, "learning_rate": 0.0005280051356122612, "loss": 0.0643, "theoretical_loss": 3.529530739026076, "tokens_seen": 1574961152 }, { "epoch": 0.48, "learning_rate": 0.000527924891670679, "loss": 0.0678, "theoretical_loss": 3.5294798445094875, "tokens_seen": 1575223296 }, { "epoch": 0.48, "learning_rate": 0.0005278446477290965, "loss": 0.0702, "theoretical_loss": 3.529428960832979, "tokens_seen": 1575485440 }, { "epoch": 0.48, "learning_rate": 0.000527764403787514, "loss": 0.0679, "theoretical_loss": 3.5293780879924395, "tokens_seen": 1575747584 }, { "epoch": 0.48, "learning_rate": 0.0005276841598459317, "loss": 0.0664, "theoretical_loss": 3.529327225983759, "tokens_seen": 1576009728 }, { "epoch": 0.48, "learning_rate": 0.0005276039159043492, "loss": 0.0683, "theoretical_loss": 3.5292763748028295, "tokens_seen": 1576271872 }, { "epoch": 0.48, "learning_rate": 0.0005275236719627668, "loss": 0.0684, "theoretical_loss": 3.5292255344455468, "tokens_seen": 1576534016 }, { "epoch": 0.48, "learning_rate": 0.0005274434280211844, "loss": 0.0677, "theoretical_loss": 3.529174704907807, "tokens_seen": 1576796160 }, { "epoch": 0.48, "learning_rate": 0.000527363184079602, "loss": 0.069, "theoretical_loss": 3.529123886185509, "tokens_seen": 1577058304 }, { "epoch": 0.48, "learning_rate": 0.0005272829401380196, "loss": 0.0671, "theoretical_loss": 3.529073078274556, "tokens_seen": 1577320448 }, { "epoch": 0.48, "learning_rate": 0.0005272026961964371, "loss": 0.0679, "theoretical_loss": 3.5290222811708505, "tokens_seen": 1577582592 }, { "epoch": 0.48, "learning_rate": 0.0005271224522548548, "loss": 0.0703, "theoretical_loss": 3.528971494870299, "tokens_seen": 1577844736 }, { "epoch": 0.48, "learning_rate": 0.0005270422083132724, "loss": 0.0663, "theoretical_loss": 3.5289207193688092, "tokens_seen": 1578106880 }, { "epoch": 0.48, "learning_rate": 0.00052696196437169, "loss": 0.0671, "theoretical_loss": 3.5288699546622913, "tokens_seen": 1578369024 }, { "epoch": 0.48, "learning_rate": 0.0005268817204301075, "loss": 0.07, "theoretical_loss": 3.528819200746659, "tokens_seen": 1578631168 }, { "epoch": 0.48, "learning_rate": 0.0005268014764885252, "loss": 0.0709, "theoretical_loss": 3.5287684576178258, "tokens_seen": 1578893312 }, { "epoch": 0.48, "learning_rate": 0.0005267212325469427, "loss": 0.0711, "theoretical_loss": 3.5287177252717097, "tokens_seen": 1579155456 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.0015256733167916536, "objective/train/docs_used": 576827, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.413283348083496, "objective/train/original_loss": 1.413283348083496, "objective/train/theoretical_loss": 3.5286670037042303, "objective/train/tokens_used": 1599877600, "objective/train/value_avg": -0.007694244384765625, "objective/train/value_loss": 0.00026877259369939566, "objective/train/value_max": -8.481740951538086e-05, "objective/train/value_min": -0.9794921875, "objective/train/value_reward_corr": 0.7313708202021553, "objective/train/value_std": 0.0199737548828125, "objective/train/weight_avg": 1.0016505718231201, "objective/train/weighted_lm_loss": 1.415053367614746, "objective/train/weights_max": 2.1128692626953125, "objective/train/weights_min": 0.3688335716724396, "theoretical_loss": 3.5286670037042303, "tokens_seen": 1579417600 }, { "epoch": 0.48, "learning_rate": 0.0005266409886053603, "loss": 0.07, "theoretical_loss": 3.5286670037042303, "tokens_seen": 1579417600 }, { "epoch": 0.48, "learning_rate": 0.0005265607446637779, "loss": 0.0683, "theoretical_loss": 3.528616292911309, "tokens_seen": 1579679744 }, { "epoch": 0.48, "learning_rate": 0.0005264805007221954, "loss": 0.07, "theoretical_loss": 3.5285655928888686, "tokens_seen": 1579941888 }, { "epoch": 0.48, "learning_rate": 0.000526400256780613, "loss": 0.0688, "theoretical_loss": 3.5285149036328356, "tokens_seen": 1580204032 }, { "epoch": 0.48, "learning_rate": 0.0005263200128390307, "loss": 0.0696, "theoretical_loss": 3.528464225139139, "tokens_seen": 1580466176 }, { "epoch": 0.48, "learning_rate": 0.0005262397688974483, "loss": 0.0731, "theoretical_loss": 3.5284135574037085, "tokens_seen": 1580728320 }, { "epoch": 0.48, "learning_rate": 0.0005261595249558658, "loss": 0.0699, "theoretical_loss": 3.5283629004224766, "tokens_seen": 1580990464 }, { "epoch": 0.48, "learning_rate": 0.0005260792810142835, "loss": 0.0709, "theoretical_loss": 3.5283122541913787, "tokens_seen": 1581252608 }, { "epoch": 0.48, "learning_rate": 0.000525999037072701, "loss": 0.0687, "theoretical_loss": 3.5282616187063516, "tokens_seen": 1581514752 }, { "epoch": 0.48, "learning_rate": 0.0005259187931311186, "loss": 0.0686, "theoretical_loss": 3.528210993963334, "tokens_seen": 1581776896 }, { "epoch": 0.48, "learning_rate": 0.0005258385491895362, "loss": 0.0714, "theoretical_loss": 3.528160379958268, "tokens_seen": 1582039040 }, { "epoch": 0.48, "learning_rate": 0.0005257583052479537, "loss": 0.0707, "theoretical_loss": 3.528109776687097, "tokens_seen": 1582301184 }, { "epoch": 0.48, "learning_rate": 0.0005256780613063715, "loss": 0.0721, "theoretical_loss": 3.528059184145767, "tokens_seen": 1582563328 }, { "epoch": 0.48, "learning_rate": 0.000525597817364789, "loss": 0.0704, "theoretical_loss": 3.5280086023302264, "tokens_seen": 1582825472 }, { "epoch": 0.48, "learning_rate": 0.0005255175734232066, "loss": 0.0687, "theoretical_loss": 3.527958031236425, "tokens_seen": 1583087616 }, { "epoch": 0.48, "learning_rate": 0.0005254373294816242, "loss": 0.0693, "theoretical_loss": 3.527907470860315, "tokens_seen": 1583349760 }, { "epoch": 0.48, "learning_rate": 0.0005253570855400417, "loss": 0.0709, "theoretical_loss": 3.527856921197852, "tokens_seen": 1583611904 }, { "epoch": 0.48, "learning_rate": 0.0005252768415984593, "loss": 0.0709, "theoretical_loss": 3.5278063822449925, "tokens_seen": 1583874048 }, { "epoch": 0.48, "learning_rate": 0.0005251965976568769, "loss": 0.0699, "theoretical_loss": 3.5277558539976956, "tokens_seen": 1584136192 }, { "epoch": 0.48, "learning_rate": 0.0005251163537152945, "loss": 0.0702, "theoretical_loss": 3.5277053364519215, "tokens_seen": 1584398336 }, { "epoch": 0.48, "learning_rate": 0.000525036109773712, "loss": 0.0707, "theoretical_loss": 3.5276548296036356, "tokens_seen": 1584660480 }, { "epoch": 0.48, "learning_rate": 0.0005249558658321298, "loss": 0.0684, "theoretical_loss": 3.527604333448802, "tokens_seen": 1584922624 }, { "epoch": 0.48, "learning_rate": 0.0005248756218905473, "loss": 0.0687, "theoretical_loss": 3.527553847983389, "tokens_seen": 1585184768 }, { "epoch": 0.48, "learning_rate": 0.0005247953779489648, "loss": 0.07, "theoretical_loss": 3.5275033732033667, "tokens_seen": 1585446912 }, { "epoch": 0.48, "learning_rate": 0.0005247151340073825, "loss": 0.067, "theoretical_loss": 3.5274529091047073, "tokens_seen": 1585709056 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.00030591359245590866, "objective/train/docs_used": 579278, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2970331907272339, "objective/train/original_loss": 1.2970331907272339, "objective/train/theoretical_loss": 3.527402455683385, "objective/train/tokens_used": 1606431200, "objective/train/value_avg": -0.00768280029296875, "objective/train/value_loss": 0.00016412846161983907, "objective/train/value_max": -5.608797073364258e-05, "objective/train/value_min": -0.393310546875, "objective/train/value_reward_corr": 0.7478747737276108, "objective/train/value_std": 0.013519287109375, "objective/train/weight_avg": 1.000381588935852, "objective/train/weighted_lm_loss": 1.2976466417312622, "objective/train/weights_max": 1.1971355676651, "objective/train/weights_min": 0.36882299184799194, "theoretical_loss": 3.527402455683385, "tokens_seen": 1585971200 }, { "epoch": 0.48, "learning_rate": 0.0005246348900658, "loss": 0.0679, "theoretical_loss": 3.527402455683385, "tokens_seen": 1585971200 }, { "epoch": 0.48, "learning_rate": 0.0005245546461242177, "loss": 0.0697, "theoretical_loss": 3.527352012935377, "tokens_seen": 1586233344 }, { "epoch": 0.48, "learning_rate": 0.0005244744021826352, "loss": 0.0696, "theoretical_loss": 3.527301580856661, "tokens_seen": 1586495488 }, { "epoch": 0.48, "learning_rate": 0.0005243941582410528, "loss": 0.0702, "theoretical_loss": 3.527251159443219, "tokens_seen": 1586757632 }, { "epoch": 0.48, "learning_rate": 0.0005243139142994704, "loss": 0.0722, "theoretical_loss": 3.5272007486910333, "tokens_seen": 1587019776 }, { "epoch": 0.48, "learning_rate": 0.0005242336703578879, "loss": 0.0701, "theoretical_loss": 3.5271503485960896, "tokens_seen": 1587281920 }, { "epoch": 0.48, "learning_rate": 0.0005241534264163056, "loss": 0.0694, "theoretical_loss": 3.5270999591543752, "tokens_seen": 1587544064 }, { "epoch": 0.48, "learning_rate": 0.0005240731824747232, "loss": 0.0685, "theoretical_loss": 3.5270495803618793, "tokens_seen": 1587806208 }, { "epoch": 0.48, "learning_rate": 0.0005239929385331408, "loss": 0.0656, "theoretical_loss": 3.5269992122145952, "tokens_seen": 1588068352 }, { "epoch": 0.48, "learning_rate": 0.0005239126945915583, "loss": 0.0678, "theoretical_loss": 3.526948854708515, "tokens_seen": 1588330496 }, { "epoch": 0.48, "learning_rate": 0.000523832450649976, "loss": 0.0688, "theoretical_loss": 3.526898507839636, "tokens_seen": 1588592640 }, { "epoch": 0.48, "learning_rate": 0.0005237522067083935, "loss": 0.0674, "theoretical_loss": 3.526848171603956, "tokens_seen": 1588854784 }, { "epoch": 0.48, "learning_rate": 0.0005236719627668111, "loss": 0.0706, "theoretical_loss": 3.526797845997476, "tokens_seen": 1589116928 }, { "epoch": 0.48, "learning_rate": 0.0005235917188252287, "loss": 0.0669, "theoretical_loss": 3.5267475310161984, "tokens_seen": 1589379072 }, { "epoch": 0.48, "learning_rate": 0.0005235114748836462, "loss": 0.0713, "theoretical_loss": 3.5266972266561276, "tokens_seen": 1589641216 }, { "epoch": 0.48, "learning_rate": 0.000523431230942064, "loss": 0.0673, "theoretical_loss": 3.526646932913271, "tokens_seen": 1589903360 }, { "epoch": 0.48, "learning_rate": 0.0005233509870004815, "loss": 0.0668, "theoretical_loss": 3.5265966497836376, "tokens_seen": 1590165504 }, { "epoch": 0.48, "learning_rate": 0.0005232707430588991, "loss": 0.0726, "theoretical_loss": 3.5265463772632386, "tokens_seen": 1590427648 }, { "epoch": 0.48, "learning_rate": 0.0005231904991173167, "loss": 0.0681, "theoretical_loss": 3.5264961153480874, "tokens_seen": 1590689792 }, { "epoch": 0.48, "learning_rate": 0.0005231102551757343, "loss": 0.0706, "theoretical_loss": 3.5264458640342, "tokens_seen": 1590951936 }, { "epoch": 0.48, "learning_rate": 0.0005230300112341518, "loss": 0.0701, "theoretical_loss": 3.5263956233175935, "tokens_seen": 1591214080 }, { "epoch": 0.48, "learning_rate": 0.0005229497672925694, "loss": 0.067, "theoretical_loss": 3.5263453931942883, "tokens_seen": 1591476224 }, { "epoch": 0.48, "learning_rate": 0.000522869523350987, "loss": 0.0674, "theoretical_loss": 3.5262951736603063, "tokens_seen": 1591738368 }, { "epoch": 0.48, "learning_rate": 0.0005227892794094045, "loss": 0.069, "theoretical_loss": 3.5262449647116716, "tokens_seen": 1592000512 }, { "epoch": 0.48, "learning_rate": 0.0005227090354678223, "loss": 0.0665, "theoretical_loss": 3.5261947663444104, "tokens_seen": 1592262656 }, { "epoch": 0.48, "objective/train/advantage_avg": -0.0003691379679366946, "objective/train/docs_used": 581545, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4430354833602905, "objective/train/original_loss": 1.44303560256958, "objective/train/theoretical_loss": 3.5261445785545513, "objective/train/tokens_used": 1612984800, "objective/train/value_avg": -0.006046295166015625, "objective/train/value_loss": 0.000189928337931633, "objective/train/value_max": -3.534555435180664e-05, "objective/train/value_min": -0.60888671875, "objective/train/value_reward_corr": 0.6734293856838923, "objective/train/value_std": 0.011077880859375, "objective/train/weight_avg": 0.9997175931930542, "objective/train/weighted_lm_loss": 1.442589521408081, "objective/train/weights_max": 1.4511600732803345, "objective/train/weights_min": 0.3682490885257721, "theoretical_loss": 3.5261445785545513, "tokens_seen": 1592524800 }, { "epoch": 0.48, "learning_rate": 0.0005226287915262398, "loss": 0.0679, "theoretical_loss": 3.5261445785545513, "tokens_seen": 1592524800 }, { "epoch": 0.48, "learning_rate": 0.0005225485475846574, "loss": 0.069, "theoretical_loss": 3.526094401338125, "tokens_seen": 1592786944 }, { "epoch": 0.48, "learning_rate": 0.000522468303643075, "loss": 0.0695, "theoretical_loss": 3.526044234691165, "tokens_seen": 1593049088 }, { "epoch": 0.48, "learning_rate": 0.0005223880597014925, "loss": 0.0681, "theoretical_loss": 3.525994078609705, "tokens_seen": 1593311232 }, { "epoch": 0.48, "learning_rate": 0.0005223078157599101, "loss": 0.0694, "theoretical_loss": 3.525943933089782, "tokens_seen": 1593573376 }, { "epoch": 0.48, "learning_rate": 0.0005222275718183277, "loss": 0.0716, "theoretical_loss": 3.5258937981274365, "tokens_seen": 1593835520 }, { "epoch": 0.48, "learning_rate": 0.0005221473278767453, "loss": 0.0689, "theoretical_loss": 3.5258436737187084, "tokens_seen": 1594097664 }, { "epoch": 0.48, "learning_rate": 0.0005220670839351629, "loss": 0.0673, "theoretical_loss": 3.5257935598596424, "tokens_seen": 1594359808 }, { "epoch": 0.48, "learning_rate": 0.0005219868399935806, "loss": 0.0676, "theoretical_loss": 3.5257434565462833, "tokens_seen": 1594621952 }, { "epoch": 0.48, "learning_rate": 0.0005219065960519981, "loss": 0.0695, "theoretical_loss": 3.5256933637746792, "tokens_seen": 1594884096 }, { "epoch": 0.48, "learning_rate": 0.0005218263521104157, "loss": 0.0652, "theoretical_loss": 3.5256432815408796, "tokens_seen": 1595146240 }, { "epoch": 0.48, "learning_rate": 0.0005217461081688333, "loss": 0.0661, "theoretical_loss": 3.5255932098409364, "tokens_seen": 1595408384 }, { "epoch": 0.48, "learning_rate": 0.0005216658642272508, "loss": 0.068, "theoretical_loss": 3.5255431486709043, "tokens_seen": 1595670528 }, { "epoch": 0.48, "learning_rate": 0.0005215856202856685, "loss": 0.0651, "theoretical_loss": 3.525493098026839, "tokens_seen": 1595932672 }, { "epoch": 0.48, "learning_rate": 0.000521505376344086, "loss": 0.0693, "theoretical_loss": 3.5254430579047993, "tokens_seen": 1596194816 }, { "epoch": 0.48, "learning_rate": 0.0005214251324025036, "loss": 0.0658, "theoretical_loss": 3.5253930283008454, "tokens_seen": 1596456960 }, { "epoch": 0.48, "learning_rate": 0.0005213448884609212, "loss": 0.069, "theoretical_loss": 3.5253430092110403, "tokens_seen": 1596719104 }, { "epoch": 0.48, "learning_rate": 0.0005212646445193387, "loss": 0.067, "theoretical_loss": 3.5252930006314482, "tokens_seen": 1596981248 }, { "epoch": 0.48, "learning_rate": 0.0005211844005777564, "loss": 0.0678, "theoretical_loss": 3.5252430025581356, "tokens_seen": 1597243392 }, { "epoch": 0.48, "learning_rate": 0.000521104156636174, "loss": 0.0679, "theoretical_loss": 3.5251930149871726, "tokens_seen": 1597505536 }, { "epoch": 0.48, "learning_rate": 0.0005210239126945916, "loss": 0.0675, "theoretical_loss": 3.5251430379146296, "tokens_seen": 1597767680 }, { "epoch": 0.48, "learning_rate": 0.0005209436687530092, "loss": 0.0674, "theoretical_loss": 3.5250930713365802, "tokens_seen": 1598029824 }, { "epoch": 0.48, "learning_rate": 0.0005208634248114268, "loss": 0.0656, "theoretical_loss": 3.525043115249099, "tokens_seen": 1598291968 }, { "epoch": 0.48, "learning_rate": 0.0005207831808698443, "loss": 0.0699, "theoretical_loss": 3.5249931696482637, "tokens_seen": 1598554112 }, { "epoch": 0.48, "learning_rate": 0.0005207029369282619, "loss": 0.0698, "theoretical_loss": 3.524943234530154, "tokens_seen": 1598816256 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.0008163555758073926, "objective/train/docs_used": 583998, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3744968175888062, "objective/train/original_loss": 1.3744966983795166, "objective/train/theoretical_loss": 3.5248933098908517, "objective/train/tokens_used": 1619538400, "objective/train/value_avg": -0.00650787353515625, "objective/train/value_loss": 0.000182785777724348, "objective/train/value_max": -3.24249267578125e-05, "objective/train/value_min": -0.49658203125, "objective/train/value_reward_corr": 0.6320282058663177, "objective/train/value_std": 0.0113067626953125, "objective/train/weight_avg": 1.0009000301361084, "objective/train/weighted_lm_loss": 1.3754725456237793, "objective/train/weights_max": 1.3192977905273438, "objective/train/weights_min": 0.3684886395931244, "theoretical_loss": 3.5248933098908517, "tokens_seen": 1599078400 }, { "epoch": 0.48, "learning_rate": 0.0005206226929866795, "loss": 0.0687, "theoretical_loss": 3.5248933098908517, "tokens_seen": 1599078400 }, { "epoch": 0.48, "learning_rate": 0.000520542449045097, "loss": 0.0682, "theoretical_loss": 3.5248433957264402, "tokens_seen": 1599340544 }, { "epoch": 0.48, "learning_rate": 0.0005204622051035148, "loss": 0.0703, "theoretical_loss": 3.5247934920330053, "tokens_seen": 1599602688 }, { "epoch": 0.48, "learning_rate": 0.0005203819611619323, "loss": 0.0702, "theoretical_loss": 3.5247435988066353, "tokens_seen": 1599864832 }, { "epoch": 0.48, "learning_rate": 0.0005203017172203499, "loss": 0.0694, "theoretical_loss": 3.52469371604342, "tokens_seen": 1600126976 }, { "epoch": 0.48, "learning_rate": 0.0005202214732787675, "loss": 0.0692, "theoretical_loss": 3.524643843739452, "tokens_seen": 1600389120 }, { "epoch": 0.49, "learning_rate": 0.0005201412293371851, "loss": 0.0727, "theoretical_loss": 3.524593981890825, "tokens_seen": 1600651264 }, { "epoch": 0.49, "learning_rate": 0.0005200609853956026, "loss": 0.069, "theoretical_loss": 3.524544130493635, "tokens_seen": 1600913408 }, { "epoch": 0.49, "learning_rate": 0.0005199807414540202, "loss": 0.0686, "theoretical_loss": 3.5244942895439815, "tokens_seen": 1601175552 }, { "epoch": 0.49, "learning_rate": 0.0005199004975124378, "loss": 0.069, "theoretical_loss": 3.524444459037965, "tokens_seen": 1601437696 }, { "epoch": 0.49, "learning_rate": 0.0005198202535708553, "loss": 0.0686, "theoretical_loss": 3.5243946389716867, "tokens_seen": 1601699840 }, { "epoch": 0.49, "learning_rate": 0.000519740009629273, "loss": 0.0704, "theoretical_loss": 3.5243448293412527, "tokens_seen": 1601961984 }, { "epoch": 0.49, "learning_rate": 0.0005196597656876906, "loss": 0.0705, "theoretical_loss": 3.5242950301427696, "tokens_seen": 1602224128 }, { "epoch": 0.49, "learning_rate": 0.0005195795217461083, "loss": 0.0675, "theoretical_loss": 3.5242452413723457, "tokens_seen": 1602486272 }, { "epoch": 0.49, "learning_rate": 0.0005194992778045258, "loss": 0.069, "theoretical_loss": 3.5241954630260923, "tokens_seen": 1602748416 }, { "epoch": 0.49, "learning_rate": 0.0005194190338629433, "loss": 0.0653, "theoretical_loss": 3.524145695100123, "tokens_seen": 1603010560 }, { "epoch": 0.49, "learning_rate": 0.000519338789921361, "loss": 0.0685, "theoretical_loss": 3.5240959375905527, "tokens_seen": 1603272704 }, { "epoch": 0.49, "learning_rate": 0.0005192585459797785, "loss": 0.0689, "theoretical_loss": 3.5240461904934977, "tokens_seen": 1603534848 }, { "epoch": 0.49, "learning_rate": 0.0005191783020381961, "loss": 0.0696, "theoretical_loss": 3.5239964538050788, "tokens_seen": 1603796992 }, { "epoch": 0.49, "learning_rate": 0.0005190980580966137, "loss": 0.0691, "theoretical_loss": 3.5239467275214165, "tokens_seen": 1604059136 }, { "epoch": 0.49, "learning_rate": 0.0005190178141550314, "loss": 0.0707, "theoretical_loss": 3.523897011638635, "tokens_seen": 1604321280 }, { "epoch": 0.49, "learning_rate": 0.0005189375702134489, "loss": 0.069, "theoretical_loss": 3.5238473061528586, "tokens_seen": 1604583424 }, { "epoch": 0.49, "learning_rate": 0.0005188573262718665, "loss": 0.066, "theoretical_loss": 3.5237976110602163, "tokens_seen": 1604845568 }, { "epoch": 0.49, "learning_rate": 0.0005187770823302841, "loss": 0.0714, "theoretical_loss": 3.523747926356837, "tokens_seen": 1605107712 }, { "epoch": 0.49, "learning_rate": 0.0005186968383887016, "loss": 0.0684, "theoretical_loss": 3.523698252038853, "tokens_seen": 1605369856 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.0014611550141125917, "objective/train/docs_used": 586391, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3702470064163208, "objective/train/original_loss": 1.3702466487884521, "objective/train/theoretical_loss": 3.5236485881023976, "objective/train/tokens_used": 1626092000, "objective/train/value_avg": -0.006923675537109375, "objective/train/value_loss": 0.00013820224558003247, "objective/train/value_max": -2.6464462280273438e-05, "objective/train/value_min": -0.95068359375, "objective/train/value_reward_corr": 0.6985552108151838, "objective/train/value_std": 0.01245880126953125, "objective/train/weight_avg": 1.0015236139297485, "objective/train/weighted_lm_loss": 1.3721559047698975, "objective/train/weights_max": 1.1226495504379272, "objective/train/weights_min": 0.3682887554168701, "theoretical_loss": 3.5236485881023976, "tokens_seen": 1605632000 }, { "epoch": 0.49, "learning_rate": 0.0005186165944471193, "loss": 0.0676, "theoretical_loss": 3.5236485881023976, "tokens_seen": 1605632000 }, { "epoch": 0.49, "learning_rate": 0.0005185363505055368, "loss": 0.0671, "theoretical_loss": 3.5235989345436076, "tokens_seen": 1605894144 }, { "epoch": 0.49, "learning_rate": 0.0005184561065639545, "loss": 0.0693, "theoretical_loss": 3.52354929135862, "tokens_seen": 1606156288 }, { "epoch": 0.49, "learning_rate": 0.000518375862622372, "loss": 0.0665, "theoretical_loss": 3.5234996585435754, "tokens_seen": 1606418432 }, { "epoch": 0.49, "learning_rate": 0.0005182956186807895, "loss": 0.0686, "theoretical_loss": 3.5234500360946157, "tokens_seen": 1606680576 }, { "epoch": 0.49, "learning_rate": 0.0005182153747392073, "loss": 0.0663, "theoretical_loss": 3.5234004240078853, "tokens_seen": 1606942720 }, { "epoch": 0.49, "learning_rate": 0.0005181351307976248, "loss": 0.069, "theoretical_loss": 3.5233508222795304, "tokens_seen": 1607204864 }, { "epoch": 0.49, "learning_rate": 0.0005180548868560424, "loss": 0.0673, "theoretical_loss": 3.5233012309057, "tokens_seen": 1607467008 }, { "epoch": 0.49, "learning_rate": 0.00051797464291446, "loss": 0.0685, "theoretical_loss": 3.5232516498825426, "tokens_seen": 1607729152 }, { "epoch": 0.49, "learning_rate": 0.0005178943989728776, "loss": 0.0706, "theoretical_loss": 3.5232020792062126, "tokens_seen": 1607991296 }, { "epoch": 0.49, "learning_rate": 0.0005178141550312951, "loss": 0.0671, "theoretical_loss": 3.523152518872864, "tokens_seen": 1608253440 }, { "epoch": 0.49, "learning_rate": 0.0005177339110897127, "loss": 0.0694, "theoretical_loss": 3.5231029688786526, "tokens_seen": 1608515584 }, { "epoch": 0.49, "learning_rate": 0.0005176536671481303, "loss": 0.0692, "theoretical_loss": 3.523053429219738, "tokens_seen": 1608777728 }, { "epoch": 0.49, "learning_rate": 0.0005175734232065478, "loss": 0.0743, "theoretical_loss": 3.52300389989228, "tokens_seen": 1609039872 }, { "epoch": 0.49, "learning_rate": 0.0005174931792649656, "loss": 0.0716, "theoretical_loss": 3.5229543808924415, "tokens_seen": 1609302016 }, { "epoch": 0.49, "learning_rate": 0.0005174129353233831, "loss": 0.0684, "theoretical_loss": 3.522904872216388, "tokens_seen": 1609564160 }, { "epoch": 0.49, "learning_rate": 0.0005173326913818007, "loss": 0.0689, "theoretical_loss": 3.5228553738602857, "tokens_seen": 1609826304 }, { "epoch": 0.49, "learning_rate": 0.0005172524474402183, "loss": 0.0705, "theoretical_loss": 3.5228058858203033, "tokens_seen": 1610088448 }, { "epoch": 0.49, "learning_rate": 0.0005171722034986359, "loss": 0.0704, "theoretical_loss": 3.5227564080926115, "tokens_seen": 1610350592 }, { "epoch": 0.49, "learning_rate": 0.0005170919595570535, "loss": 0.0698, "theoretical_loss": 3.5227069406733844, "tokens_seen": 1610612736 }, { "epoch": 0.49, "learning_rate": 0.000517011715615471, "loss": 0.0708, "theoretical_loss": 3.5226574835587963, "tokens_seen": 1610874880 }, { "epoch": 0.49, "learning_rate": 0.0005169314716738886, "loss": 0.0676, "theoretical_loss": 3.522608036745024, "tokens_seen": 1611137024 }, { "epoch": 0.49, "learning_rate": 0.0005168512277323062, "loss": 0.0698, "theoretical_loss": 3.5225586002282467, "tokens_seen": 1611399168 }, { "epoch": 0.49, "learning_rate": 0.0005167709837907239, "loss": 0.0702, "theoretical_loss": 3.5225091740046457, "tokens_seen": 1611661312 }, { "epoch": 0.49, "learning_rate": 0.0005166907398491414, "loss": 0.072, "theoretical_loss": 3.5224597580704033, "tokens_seen": 1611923456 }, { "epoch": 0.49, "objective/train/advantage_avg": -0.0002596320991870016, "objective/train/docs_used": 588885, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3582322597503662, "objective/train/original_loss": 1.3582322597503662, "objective/train/theoretical_loss": 3.5224103524217067, "objective/train/tokens_used": 1632645600, "objective/train/value_avg": -0.008026123046875, "objective/train/value_loss": 0.0002943206636700779, "objective/train/value_max": -4.684925079345703e-05, "objective/train/value_min": -0.91357421875, "objective/train/value_reward_corr": 0.7926579179126808, "objective/train/value_std": 0.0210723876953125, "objective/train/weight_avg": 0.9998724460601807, "objective/train/weighted_lm_loss": 1.3586115837097168, "objective/train/weights_max": 1.6367690563201904, "objective/train/weights_min": 0.3682592809200287, "theoretical_loss": 3.5224103524217067, "tokens_seen": 1612185600 }, { "epoch": 0.49, "learning_rate": 0.0005166104959075591, "loss": 0.0693, "theoretical_loss": 3.5224103524217067, "tokens_seen": 1612185600 }, { "epoch": 0.49, "learning_rate": 0.0005165302519659766, "loss": 0.0705, "theoretical_loss": 3.522360957054741, "tokens_seen": 1612447744 }, { "epoch": 0.49, "learning_rate": 0.0005164500080243941, "loss": 0.0669, "theoretical_loss": 3.5223115719656963, "tokens_seen": 1612709888 }, { "epoch": 0.49, "learning_rate": 0.0005163697640828118, "loss": 0.0705, "theoretical_loss": 3.5222621971507646, "tokens_seen": 1612972032 }, { "epoch": 0.49, "learning_rate": 0.0005162895201412293, "loss": 0.0695, "theoretical_loss": 3.522212832606138, "tokens_seen": 1613234176 }, { "epoch": 0.49, "learning_rate": 0.0005162092761996469, "loss": 0.0714, "theoretical_loss": 3.5221634783280122, "tokens_seen": 1613496320 }, { "epoch": 0.49, "learning_rate": 0.0005161290322580645, "loss": 0.0691, "theoretical_loss": 3.5221141343125852, "tokens_seen": 1613758464 }, { "epoch": 0.49, "learning_rate": 0.0005160487883164821, "loss": 0.0713, "theoretical_loss": 3.5220648005560555, "tokens_seen": 1614020608 }, { "epoch": 0.49, "learning_rate": 0.0005159685443748997, "loss": 0.0748, "theoretical_loss": 3.522015477054625, "tokens_seen": 1614282752 }, { "epoch": 0.49, "learning_rate": 0.0005158883004333173, "loss": 0.0729, "theoretical_loss": 3.521966163804497, "tokens_seen": 1614544896 }, { "epoch": 0.49, "learning_rate": 0.0005158080564917349, "loss": 0.0708, "theoretical_loss": 3.521916860801877, "tokens_seen": 1614807040 }, { "epoch": 0.49, "learning_rate": 0.0005157278125501525, "loss": 0.0704, "theoretical_loss": 3.521867568042973, "tokens_seen": 1615069184 }, { "epoch": 0.49, "learning_rate": 0.0005156475686085701, "loss": 0.07, "theoretical_loss": 3.5218182855239935, "tokens_seen": 1615331328 }, { "epoch": 0.49, "learning_rate": 0.0005155673246669876, "loss": 0.0683, "theoretical_loss": 3.5217690132411508, "tokens_seen": 1615593472 }, { "epoch": 0.49, "learning_rate": 0.0005154870807254053, "loss": 0.0708, "theoretical_loss": 3.521719751190658, "tokens_seen": 1615855616 }, { "epoch": 0.49, "learning_rate": 0.0005154068367838228, "loss": 0.0696, "theoretical_loss": 3.5216704993687307, "tokens_seen": 1616117760 }, { "epoch": 0.49, "learning_rate": 0.0005153265928422403, "loss": 0.0694, "theoretical_loss": 3.5216212577715873, "tokens_seen": 1616379904 }, { "epoch": 0.49, "learning_rate": 0.0005152463489006581, "loss": 0.0711, "theoretical_loss": 3.5215720263954458, "tokens_seen": 1616642048 }, { "epoch": 0.49, "learning_rate": 0.0005151661049590756, "loss": 0.0692, "theoretical_loss": 3.521522805236529, "tokens_seen": 1616904192 }, { "epoch": 0.49, "learning_rate": 0.0005150858610174932, "loss": 0.0725, "theoretical_loss": 3.52147359429106, "tokens_seen": 1617166336 }, { "epoch": 0.49, "learning_rate": 0.0005150056170759108, "loss": 0.0708, "theoretical_loss": 3.5214243935552654, "tokens_seen": 1617428480 }, { "epoch": 0.49, "learning_rate": 0.0005149253731343284, "loss": 0.0664, "theoretical_loss": 3.5213752030253715, "tokens_seen": 1617690624 }, { "epoch": 0.49, "learning_rate": 0.0005148451291927459, "loss": 0.07, "theoretical_loss": 3.5213260226976084, "tokens_seen": 1617952768 }, { "epoch": 0.49, "learning_rate": 0.0005147648852511635, "loss": 0.0702, "theoretical_loss": 3.5212768525682074, "tokens_seen": 1618214912 }, { "epoch": 0.49, "learning_rate": 0.0005146846413095811, "loss": 0.0714, "theoretical_loss": 3.5212276926334027, "tokens_seen": 1618477056 }, { "epoch": 0.49, "objective/train/advantage_avg": -8.634123514639214e-05, "objective/train/docs_used": 591377, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.2014659643173218, "objective/train/original_loss": 1.2014658451080322, "objective/train/theoretical_loss": 3.5211785428894293, "objective/train/tokens_used": 1639199200, "objective/train/value_avg": -0.007335662841796875, "objective/train/value_loss": 0.00014820354408584535, "objective/train/value_max": -2.777576446533203e-05, "objective/train/value_min": -0.5302734375, "objective/train/value_reward_corr": 0.7609983102667354, "objective/train/value_std": 0.0133056640625, "objective/train/weight_avg": 0.9999821782112122, "objective/train/weighted_lm_loss": 1.2018018960952759, "objective/train/weights_max": 1.1847867965698242, "objective/train/weights_min": 0.3695588707923889, "theoretical_loss": 3.5211785428894293, "tokens_seen": 1618739200 }, { "epoch": 0.49, "learning_rate": 0.0005146043973679986, "loss": 0.0672, "theoretical_loss": 3.5211785428894293, "tokens_seen": 1618739200 }, { "epoch": 0.49, "learning_rate": 0.0005145241534264164, "loss": 0.0719, "theoretical_loss": 3.5211294033325258, "tokens_seen": 1619001344 }, { "epoch": 0.49, "learning_rate": 0.0005144439094848339, "loss": 0.0687, "theoretical_loss": 3.5210802739589306, "tokens_seen": 1619263488 }, { "epoch": 0.49, "learning_rate": 0.0005143636655432516, "loss": 0.0697, "theoretical_loss": 3.521031154764886, "tokens_seen": 1619525632 }, { "epoch": 0.49, "learning_rate": 0.0005142834216016691, "loss": 0.0707, "theoretical_loss": 3.520982045746636, "tokens_seen": 1619787776 }, { "epoch": 0.49, "learning_rate": 0.0005142031776600867, "loss": 0.0699, "theoretical_loss": 3.5209329469004254, "tokens_seen": 1620049920 }, { "epoch": 0.49, "learning_rate": 0.0005141229337185043, "loss": 0.0715, "theoretical_loss": 3.5208838582225024, "tokens_seen": 1620312064 }, { "epoch": 0.49, "learning_rate": 0.0005140426897769218, "loss": 0.0685, "theoretical_loss": 3.5208347797091157, "tokens_seen": 1620574208 }, { "epoch": 0.49, "learning_rate": 0.0005139624458353394, "loss": 0.0681, "theoretical_loss": 3.5207857113565177, "tokens_seen": 1620836352 }, { "epoch": 0.49, "learning_rate": 0.000513882201893757, "loss": 0.0692, "theoretical_loss": 3.520736653160962, "tokens_seen": 1621098496 }, { "epoch": 0.49, "learning_rate": 0.0005138019579521747, "loss": 0.0693, "theoretical_loss": 3.520687605118704, "tokens_seen": 1621360640 }, { "epoch": 0.49, "learning_rate": 0.0005137217140105922, "loss": 0.072, "theoretical_loss": 3.520638567226001, "tokens_seen": 1621622784 }, { "epoch": 0.49, "learning_rate": 0.0005136414700690099, "loss": 0.0697, "theoretical_loss": 3.5205895394791127, "tokens_seen": 1621884928 }, { "epoch": 0.49, "learning_rate": 0.0005135612261274274, "loss": 0.0686, "theoretical_loss": 3.5205405218743007, "tokens_seen": 1622147072 }, { "epoch": 0.49, "learning_rate": 0.0005134809821858449, "loss": 0.0702, "theoretical_loss": 3.520491514407828, "tokens_seen": 1622409216 }, { "epoch": 0.49, "learning_rate": 0.0005134007382442626, "loss": 0.0702, "theoretical_loss": 3.520442517075961, "tokens_seen": 1622671360 }, { "epoch": 0.49, "learning_rate": 0.0005133204943026801, "loss": 0.068, "theoretical_loss": 3.5203935298749656, "tokens_seen": 1622933504 }, { "epoch": 0.49, "learning_rate": 0.0005132402503610978, "loss": 0.0689, "theoretical_loss": 3.520344552801113, "tokens_seen": 1623195648 }, { "epoch": 0.49, "learning_rate": 0.0005131600064195153, "loss": 0.0704, "theoretical_loss": 3.5202955858506737, "tokens_seen": 1623457792 }, { "epoch": 0.49, "learning_rate": 0.000513079762477933, "loss": 0.0726, "theoretical_loss": 3.520246629019921, "tokens_seen": 1623719936 }, { "epoch": 0.49, "learning_rate": 0.0005129995185363506, "loss": 0.0694, "theoretical_loss": 3.5201976823051306, "tokens_seen": 1623982080 }, { "epoch": 0.49, "learning_rate": 0.0005129192745947681, "loss": 0.0711, "theoretical_loss": 3.5201487457025795, "tokens_seen": 1624244224 }, { "epoch": 0.49, "learning_rate": 0.0005128390306531857, "loss": 0.0694, "theoretical_loss": 3.5200998192085473, "tokens_seen": 1624506368 }, { "epoch": 0.49, "learning_rate": 0.0005127587867116033, "loss": 0.069, "theoretical_loss": 3.5200509028193148, "tokens_seen": 1624768512 }, { "epoch": 0.49, "learning_rate": 0.0005126785427700209, "loss": 0.0723, "theoretical_loss": 3.5200019965311657, "tokens_seen": 1625030656 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.0010418968740850687, "objective/train/docs_used": 593743, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.481257438659668, "objective/train/original_loss": 1.481257438659668, "objective/train/theoretical_loss": 3.519953100340385, "objective/train/tokens_used": 1645752800, "objective/train/value_avg": -0.0069732666015625, "objective/train/value_loss": 0.00027382862754166126, "objective/train/value_max": -6.502866744995117e-05, "objective/train/value_min": -0.8955078125, "objective/train/value_reward_corr": 0.6842025943888881, "objective/train/value_std": 0.01538848876953125, "objective/train/weight_avg": 1.0011622905731201, "objective/train/weighted_lm_loss": 1.4833272695541382, "objective/train/weights_max": 1.6334962844848633, "objective/train/weights_min": 0.36918336153030396, "theoretical_loss": 3.519953100340385, "tokens_seen": 1625292800 }, { "epoch": 0.49, "learning_rate": 0.0005125982988284384, "loss": 0.0712, "theoretical_loss": 3.519953100340385, "tokens_seen": 1625292800 }, { "epoch": 0.49, "learning_rate": 0.0005125180548868561, "loss": 0.073, "theoretical_loss": 3.51990421424326, "tokens_seen": 1625554944 }, { "epoch": 0.49, "learning_rate": 0.0005124378109452736, "loss": 0.0703, "theoretical_loss": 3.519855338236079, "tokens_seen": 1625817088 }, { "epoch": 0.49, "learning_rate": 0.0005123575670036911, "loss": 0.0687, "theoretical_loss": 3.5198064723151345, "tokens_seen": 1626079232 }, { "epoch": 0.49, "learning_rate": 0.0005122773230621089, "loss": 0.0707, "theoretical_loss": 3.519757616476719, "tokens_seen": 1626341376 }, { "epoch": 0.49, "learning_rate": 0.0005121970791205264, "loss": 0.0693, "theoretical_loss": 3.519708770717126, "tokens_seen": 1626603520 }, { "epoch": 0.49, "learning_rate": 0.000512116835178944, "loss": 0.0718, "theoretical_loss": 3.519659935032655, "tokens_seen": 1626865664 }, { "epoch": 0.49, "learning_rate": 0.0005120365912373616, "loss": 0.0708, "theoretical_loss": 3.5196111094196034, "tokens_seen": 1627127808 }, { "epoch": 0.49, "learning_rate": 0.0005119563472957792, "loss": 0.0684, "theoretical_loss": 3.5195622938742726, "tokens_seen": 1627389952 }, { "epoch": 0.49, "learning_rate": 0.0005118761033541968, "loss": 0.0713, "theoretical_loss": 3.519513488392965, "tokens_seen": 1627652096 }, { "epoch": 0.49, "learning_rate": 0.0005117958594126143, "loss": 0.0706, "theoretical_loss": 3.5194646929719853, "tokens_seen": 1627914240 }, { "epoch": 0.49, "learning_rate": 0.0005117156154710319, "loss": 0.0706, "theoretical_loss": 3.519415907607641, "tokens_seen": 1628176384 }, { "epoch": 0.49, "learning_rate": 0.0005116353715294495, "loss": 0.0682, "theoretical_loss": 3.5193671322962397, "tokens_seen": 1628438528 }, { "epoch": 0.49, "learning_rate": 0.0005115551275878672, "loss": 0.0704, "theoretical_loss": 3.519318367034093, "tokens_seen": 1628700672 }, { "epoch": 0.49, "learning_rate": 0.0005114748836462847, "loss": 0.0693, "theoretical_loss": 3.519269611817513, "tokens_seen": 1628962816 }, { "epoch": 0.49, "learning_rate": 0.0005113946397047024, "loss": 0.0713, "theoretical_loss": 3.5192208666428146, "tokens_seen": 1629224960 }, { "epoch": 0.49, "learning_rate": 0.0005113143957631199, "loss": 0.0703, "theoretical_loss": 3.5191721315063136, "tokens_seen": 1629487104 }, { "epoch": 0.49, "learning_rate": 0.0005112341518215374, "loss": 0.0682, "theoretical_loss": 3.5191234064043293, "tokens_seen": 1629749248 }, { "epoch": 0.49, "learning_rate": 0.0005111539078799551, "loss": 0.0679, "theoretical_loss": 3.5190746913331816, "tokens_seen": 1630011392 }, { "epoch": 0.49, "learning_rate": 0.0005110736639383726, "loss": 0.0705, "theoretical_loss": 3.5190259862891926, "tokens_seen": 1630273536 }, { "epoch": 0.49, "learning_rate": 0.0005109934199967902, "loss": 0.0683, "theoretical_loss": 3.518977291268686, "tokens_seen": 1630535680 }, { "epoch": 0.49, "learning_rate": 0.0005109131760552078, "loss": 0.0708, "theoretical_loss": 3.5189286062679894, "tokens_seen": 1630797824 }, { "epoch": 0.49, "learning_rate": 0.0005108329321136255, "loss": 0.0697, "theoretical_loss": 3.51887993128343, "tokens_seen": 1631059968 }, { "epoch": 0.49, "learning_rate": 0.0005107526881720431, "loss": 0.0661, "theoretical_loss": 3.518831266311339, "tokens_seen": 1631322112 }, { "epoch": 0.49, "learning_rate": 0.0005106724442304607, "loss": 0.0705, "theoretical_loss": 3.518782611348046, "tokens_seen": 1631584256 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.0007258797995746136, "objective/train/docs_used": 596193, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.4411818981170654, "objective/train/original_loss": 1.4411818981170654, "objective/train/theoretical_loss": 3.5187339663898873, "objective/train/tokens_used": 1652306400, "objective/train/value_avg": -0.005550384521484375, "objective/train/value_loss": 0.00012820863048546016, "objective/train/value_max": -5.9664249420166016e-05, "objective/train/value_min": -0.69775390625, "objective/train/value_reward_corr": 0.6792193647270048, "objective/train/value_std": 0.01087188720703125, "objective/train/weight_avg": 1.0007840394973755, "objective/train/weighted_lm_loss": 1.4419759511947632, "objective/train/weights_max": 1.2253751754760742, "objective/train/weights_min": 0.36889898777008057, "theoretical_loss": 3.5187339663898873, "tokens_seen": 1631846400 }, { "epoch": 0.49, "learning_rate": 0.0005105922002888782, "loss": 0.0707, "theoretical_loss": 3.5187339663898873, "tokens_seen": 1631846400 }, { "epoch": 0.49, "learning_rate": 0.0005105119563472958, "loss": 0.071, "theoretical_loss": 3.5186853314331974, "tokens_seen": 1632108544 }, { "epoch": 0.49, "learning_rate": 0.0005104317124057134, "loss": 0.0703, "theoretical_loss": 3.5186367064743145, "tokens_seen": 1632370688 }, { "epoch": 0.49, "learning_rate": 0.0005103514684641309, "loss": 0.073, "theoretical_loss": 3.518588091509578, "tokens_seen": 1632632832 }, { "epoch": 0.49, "learning_rate": 0.0005102712245225486, "loss": 0.0695, "theoretical_loss": 3.51853948653533, "tokens_seen": 1632894976 }, { "epoch": 0.49, "learning_rate": 0.0005101909805809661, "loss": 0.0688, "theoretical_loss": 3.5184908915479145, "tokens_seen": 1633157120 }, { "epoch": 0.49, "learning_rate": 0.0005101107366393837, "loss": 0.0696, "theoretical_loss": 3.5184423065436756, "tokens_seen": 1633419264 }, { "epoch": 0.5, "learning_rate": 0.0005100304926978014, "loss": 0.0693, "theoretical_loss": 3.5183937315189615, "tokens_seen": 1633681408 }, { "epoch": 0.5, "learning_rate": 0.0005099502487562189, "loss": 0.068, "theoretical_loss": 3.5183451664701217, "tokens_seen": 1633943552 }, { "epoch": 0.5, "learning_rate": 0.0005098700048146365, "loss": 0.0693, "theoretical_loss": 3.5182966113935072, "tokens_seen": 1634205696 }, { "epoch": 0.5, "learning_rate": 0.0005097897608730541, "loss": 0.0696, "theoretical_loss": 3.518248066285471, "tokens_seen": 1634467840 }, { "epoch": 0.5, "learning_rate": 0.0005097095169314717, "loss": 0.0675, "theoretical_loss": 3.518199531142369, "tokens_seen": 1634729984 }, { "epoch": 0.5, "learning_rate": 0.0005096292729898892, "loss": 0.0704, "theoretical_loss": 3.518151005960557, "tokens_seen": 1634992128 }, { "epoch": 0.5, "learning_rate": 0.0005095490290483069, "loss": 0.0699, "theoretical_loss": 3.5181024907363945, "tokens_seen": 1635254272 }, { "epoch": 0.5, "learning_rate": 0.0005094687851067244, "loss": 0.0691, "theoretical_loss": 3.518053985466243, "tokens_seen": 1635516416 }, { "epoch": 0.5, "learning_rate": 0.000509388541165142, "loss": 0.0662, "theoretical_loss": 3.518005490146464, "tokens_seen": 1635778560 }, { "epoch": 0.5, "learning_rate": 0.0005093082972235597, "loss": 0.0658, "theoretical_loss": 3.5179570047734225, "tokens_seen": 1636040704 }, { "epoch": 0.5, "learning_rate": 0.0005092280532819772, "loss": 0.0732, "theoretical_loss": 3.5179085293434857, "tokens_seen": 1636302848 }, { "epoch": 0.5, "learning_rate": 0.0005091478093403949, "loss": 0.0653, "theoretical_loss": 3.517860063853022, "tokens_seen": 1636564992 }, { "epoch": 0.5, "learning_rate": 0.0005090675653988124, "loss": 0.0694, "theoretical_loss": 3.517811608298401, "tokens_seen": 1636827136 }, { "epoch": 0.5, "learning_rate": 0.00050898732145723, "loss": 0.0706, "theoretical_loss": 3.5177631626759958, "tokens_seen": 1637089280 }, { "epoch": 0.5, "learning_rate": 0.0005089070775156476, "loss": 0.0665, "theoretical_loss": 3.5177147269821805, "tokens_seen": 1637351424 }, { "epoch": 0.5, "learning_rate": 0.0005088268335740651, "loss": 0.0678, "theoretical_loss": 3.517666301213331, "tokens_seen": 1637613568 }, { "epoch": 0.5, "learning_rate": 0.0005087465896324827, "loss": 0.0691, "theoretical_loss": 3.5176178853658246, "tokens_seen": 1637875712 }, { "epoch": 0.5, "learning_rate": 0.0005086663456909003, "loss": 0.0713, "theoretical_loss": 3.5175694794360424, "tokens_seen": 1638137856 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.0005885313730686903, "objective/train/docs_used": 598618, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3161839246749878, "objective/train/original_loss": 1.3161839246749878, "objective/train/theoretical_loss": 3.5175210834203656, "objective/train/tokens_used": 1658860000, "objective/train/value_avg": -0.00928497314453125, "objective/train/value_loss": 0.0003415115352254361, "objective/train/value_max": -7.140636444091797e-05, "objective/train/value_min": -0.94189453125, "objective/train/value_reward_corr": 0.7613534650600375, "objective/train/value_std": 0.0201263427734375, "objective/train/weight_avg": 1.0007390975952148, "objective/train/weighted_lm_loss": 1.3172998428344727, "objective/train/weights_max": 1.4789870977401733, "objective/train/weights_min": 0.3708581030368805, "theoretical_loss": 3.5175210834203656, "tokens_seen": 1638400000 }, { "epoch": 0.5, "learning_rate": 0.000508586101749318, "loss": 0.0704, "theoretical_loss": 3.5175210834203656, "tokens_seen": 1638400000 }, { "epoch": 0.5, "learning_rate": 0.0005085058578077355, "loss": 0.0736, "theoretical_loss": 3.5174726973151778, "tokens_seen": 1638662144 }, { "epoch": 0.5, "learning_rate": 0.0005084256138661532, "loss": 0.0679, "theoretical_loss": 3.517424321116865, "tokens_seen": 1638924288 }, { "epoch": 0.5, "learning_rate": 0.0005083453699245707, "loss": 0.071, "theoretical_loss": 3.517375954821815, "tokens_seen": 1639186432 }, { "epoch": 0.5, "learning_rate": 0.0005082651259829882, "loss": 0.0691, "theoretical_loss": 3.517327598426416, "tokens_seen": 1639448576 }, { "epoch": 0.5, "learning_rate": 0.0005081848820414059, "loss": 0.0685, "theoretical_loss": 3.5172792519270604, "tokens_seen": 1639710720 }, { "epoch": 0.5, "learning_rate": 0.0005081046380998234, "loss": 0.0716, "theoretical_loss": 3.517230915320141, "tokens_seen": 1639972864 }, { "epoch": 0.5, "learning_rate": 0.0005080243941582411, "loss": 0.07, "theoretical_loss": 3.5171825886020525, "tokens_seen": 1640235008 }, { "epoch": 0.5, "learning_rate": 0.0005079441502166586, "loss": 0.0673, "theoretical_loss": 3.5171342717691925, "tokens_seen": 1640497152 }, { "epoch": 0.5, "learning_rate": 0.0005078639062750763, "loss": 0.0675, "theoretical_loss": 3.517085964817959, "tokens_seen": 1640759296 }, { "epoch": 0.5, "learning_rate": 0.0005077836623334939, "loss": 0.0694, "theoretical_loss": 3.517037667744754, "tokens_seen": 1641021440 }, { "epoch": 0.5, "learning_rate": 0.0005077034183919115, "loss": 0.0687, "theoretical_loss": 3.516989380545979, "tokens_seen": 1641283584 }, { "epoch": 0.5, "learning_rate": 0.000507623174450329, "loss": 0.0708, "theoretical_loss": 3.5169411032180387, "tokens_seen": 1641545728 }, { "epoch": 0.5, "learning_rate": 0.0005075429305087466, "loss": 0.0713, "theoretical_loss": 3.51689283575734, "tokens_seen": 1641807872 }, { "epoch": 0.5, "learning_rate": 0.0005074626865671642, "loss": 0.0724, "theoretical_loss": 3.516844578160291, "tokens_seen": 1642070016 }, { "epoch": 0.5, "learning_rate": 0.0005073824426255817, "loss": 0.0684, "theoretical_loss": 3.5167963304233014, "tokens_seen": 1642332160 }, { "epoch": 0.5, "learning_rate": 0.0005073021986839994, "loss": 0.0702, "theoretical_loss": 3.516748092542784, "tokens_seen": 1642594304 }, { "epoch": 0.5, "learning_rate": 0.0005072219547424169, "loss": 0.0718, "theoretical_loss": 3.5166998645151515, "tokens_seen": 1642856448 }, { "epoch": 0.5, "learning_rate": 0.0005071417108008345, "loss": 0.0721, "theoretical_loss": 3.516651646336821, "tokens_seen": 1643118592 }, { "epoch": 0.5, "learning_rate": 0.0005070614668592522, "loss": 0.0703, "theoretical_loss": 3.5166034380042093, "tokens_seen": 1643380736 }, { "epoch": 0.5, "learning_rate": 0.0005069812229176697, "loss": 0.0701, "theoretical_loss": 3.5165552395137363, "tokens_seen": 1643642880 }, { "epoch": 0.5, "learning_rate": 0.0005069009789760874, "loss": 0.0681, "theoretical_loss": 3.516507050861823, "tokens_seen": 1643905024 }, { "epoch": 0.5, "learning_rate": 0.0005068207350345049, "loss": 0.0671, "theoretical_loss": 3.5164588720448937, "tokens_seen": 1644167168 }, { "epoch": 0.5, "learning_rate": 0.0005067404910929225, "loss": 0.0699, "theoretical_loss": 3.5164107030593725, "tokens_seen": 1644429312 }, { "epoch": 0.5, "learning_rate": 0.0005066602471513401, "loss": 0.0707, "theoretical_loss": 3.516362543901687, "tokens_seen": 1644691456 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.001454401994124055, "objective/train/docs_used": 601010, "objective/train/instantaneous_batch_size": 64, "objective/train/instantaneous_microbatch_size": 65536, "objective/train/lm_loss": 1.3577359914779663, "objective/train/original_loss": 1.3577358722686768, "objective/train/theoretical_loss": 3.5163143945682656, "objective/train/tokens_used": 1665413600, "objective/train/value_avg": -0.007610321044921875, "objective/train/value_loss": 0.00015290330338757485, "objective/train/value_max": -6.657838821411133e-05, "objective/train/value_min": -0.483642578125, "objective/train/value_reward_corr": 0.7295783796450883, "objective/train/value_std": 0.01434326171875, "objective/train/weight_avg": 1.0015279054641724, "objective/train/weighted_lm_loss": 1.3596255779266357, "objective/train/weights_max": 1.3309441804885864, "objective/train/weights_min": 0.369488388299942, "theoretical_loss": 3.5163143945682656, "tokens_seen": 1644953600 }, { "epoch": 0.5, "learning_rate": 0.0005065800032097577, "loss": 0.0724, "theoretical_loss": 3.5163143945682656, "tokens_seen": 1644953600 }, { "epoch": 0.5, "learning_rate": 0.0005064997592681752, "loss": 0.0716, "theoretical_loss": 3.5162662550555392, "tokens_seen": 1645215744 }, { "epoch": 0.5, "learning_rate": 0.0005064195153265928, "loss": 0.0721, "theoretical_loss": 3.5162181253599405, "tokens_seen": 1645477888 }, { "epoch": 0.5, "learning_rate": 0.0005063392713850105, "loss": 0.0704, "theoretical_loss": 3.516170005477904, "tokens_seen": 1645740032 }, { "epoch": 0.5, "learning_rate": 0.000506259027443428, "loss": 0.0698, "theoretical_loss": 3.516121895405866, "tokens_seen": 1646002176 }, { "epoch": 0.5, "learning_rate": 0.0005061787835018457, "loss": 0.0733, "theoretical_loss": 3.516073795140265, "tokens_seen": 1646264320 }, { "epoch": 0.5, "learning_rate": 0.0005060985395602632, "loss": 0.0693, "theoretical_loss": 3.5160257046775407, "tokens_seen": 1646526464 }, { "epoch": 0.5, "learning_rate": 0.0005060182956186808, "loss": 0.0695, "theoretical_loss": 3.515977624014135, "tokens_seen": 1646788608 }, { "epoch": 0.5, "learning_rate": 0.0005059380516770984, "loss": 0.0682, "theoretical_loss": 3.515929553146492, "tokens_seen": 1647050752 }, { "epoch": 0.5, "learning_rate": 0.0005058578077355159, "loss": 0.0681, "theoretical_loss": 3.515881492071057, "tokens_seen": 1647312896 }, { "epoch": 0.5, "learning_rate": 0.0005057775637939335, "loss": 0.0705, "theoretical_loss": 3.515833440784278, "tokens_seen": 1647575040 }, { "epoch": 0.5, "learning_rate": 0.0005056973198523511, "loss": 0.0688, "theoretical_loss": 3.5157853992826036, "tokens_seen": 1647837184 }, { "epoch": 0.5, "learning_rate": 0.0005056170759107688, "loss": 0.0697, "theoretical_loss": 3.5157373675624854, "tokens_seen": 1648099328 }, { "epoch": 0.5, "learning_rate": 0.0005055368319691864, "loss": 0.0712, "theoretical_loss": 3.515689345620377, "tokens_seen": 1648361472 }, { "epoch": 0.5, "learning_rate": 0.000505456588027604, "loss": 0.071, "theoretical_loss": 3.5156413334527317, "tokens_seen": 1648623616 }, { "epoch": 0.5, "learning_rate": 0.0005053763440860215, "loss": 0.0698, "theoretical_loss": 3.515593331056008, "tokens_seen": 1648885760 }, { "epoch": 0.5, "learning_rate": 0.0005052961001444391, "loss": 0.068, "theoretical_loss": 3.5155453384266635, "tokens_seen": 1649147904 }, { "epoch": 0.5, "learning_rate": 0.0005052158562028567, "loss": 0.068, "theoretical_loss": 3.515497355561159, "tokens_seen": 1649410048 }, { "epoch": 0.5, "learning_rate": 0.0005051356122612742, "loss": 0.0712, "theoretical_loss": 3.515449382455957, "tokens_seen": 1649672192 }, { "epoch": 0.5, "learning_rate": 0.0005050553683196919, "loss": 0.0699, "theoretical_loss": 3.515401419107521, "tokens_seen": 1649934336 } ], "max_steps": 12588, "num_train_epochs": 9223372036854775807, "total_flos": 8.420284921943163e+17, "trial_name": null, "trial_params": null }