| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.28, |
| "eval_steps": 500, |
| "global_step": 140, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 1005.0, |
| "completions/max_terminated_length": 1005.0, |
| "completions/mean_length": 442.6666666666667, |
| "completions/mean_terminated_length": 482.90909090909093, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 212.0, |
| "epoch": 0.002, |
| "format_failures": 0.0, |
| "grad_norm": 0.3274489641189575, |
| "kl": 0.0, |
| "learning_rate": 0.0, |
| "loss": 0.048, |
| "num_tokens": 21804.0, |
| "reward": 0.26185137033462524, |
| "reward_std": 0.28920137882232666, |
| "step": 1 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 264.0, |
| "completions/max_terminated_length": 264.0, |
| "completions/mean_length": 136.5, |
| "completions/mean_terminated_length": 148.9090909090909, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 60.0, |
| "epoch": 0.004, |
| "format_failures": 0.0, |
| "grad_norm": 1.2693145275115967, |
| "kl": 0.0, |
| "learning_rate": 1e-06, |
| "loss": 0.0962, |
| "num_tokens": 42324.0, |
| "reward": 0.38461539149284363, |
| "reward_std": 0.3770364224910736, |
| "step": 2 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 362.0, |
| "completions/max_terminated_length": 362.0, |
| "completions/mean_length": 217.83333333333334, |
| "completions/mean_terminated_length": 237.63636363636363, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 124.0, |
| "epoch": 0.006, |
| "format_failures": 0.0, |
| "grad_norm": 0.3044165074825287, |
| "kl": 0.19029825925827026, |
| "learning_rate": 1e-06, |
| "loss": 0.0009, |
| "num_tokens": 58980.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "step": 3 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 896.0, |
| "completions/max_terminated_length": 896.0, |
| "completions/mean_length": 321.0833333333333, |
| "completions/mean_terminated_length": 350.27272727272725, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 103.0, |
| "epoch": 0.008, |
| "format_failures": 1.0, |
| "grad_norm": 0.3372040390968323, |
| "kl": 0.029289670288562775, |
| "learning_rate": 1e-06, |
| "loss": 0.1107, |
| "num_tokens": 81756.0, |
| "reward": 0.23689448833465576, |
| "reward_std": 0.2267814427614212, |
| "step": 4 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 193.0, |
| "completions/max_terminated_length": 193.0, |
| "completions/mean_length": 119.08333333333333, |
| "completions/mean_terminated_length": 129.9090909090909, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 66.0, |
| "epoch": 0.01, |
| "format_failures": 0.0, |
| "grad_norm": 10.779764175415039, |
| "kl": 3.1303787231445312, |
| "learning_rate": 1e-06, |
| "loss": 0.0311, |
| "num_tokens": 96360.0, |
| "reward": 0.1666666716337204, |
| "reward_std": 0.30772873759269714, |
| "step": 5 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 745.0, |
| "completions/max_terminated_length": 745.0, |
| "completions/mean_length": 420.6666666666667, |
| "completions/mean_terminated_length": 458.90909090909093, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 329.0, |
| "epoch": 0.012, |
| "format_failures": 1.0, |
| "grad_norm": 0.2519327402114868, |
| "kl": 0.016291129169985652, |
| "learning_rate": 1e-06, |
| "loss": 0.0559, |
| "num_tokens": 119712.0, |
| "reward": 0.34878918528556824, |
| "reward_std": 0.2739146649837494, |
| "step": 6 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 164.0, |
| "completions/max_terminated_length": 164.0, |
| "completions/mean_length": 67.33333333333333, |
| "completions/mean_terminated_length": 73.45454545454545, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 36.0, |
| "epoch": 0.014, |
| "format_failures": 0.0, |
| "grad_norm": 2531.101806640625, |
| "kl": 562.2636108398438, |
| "learning_rate": 1e-06, |
| "loss": 5.4405, |
| "num_tokens": 128772.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "step": 7 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 421.0, |
| "completions/max_terminated_length": 421.0, |
| "completions/mean_length": 186.41666666666666, |
| "completions/mean_terminated_length": 203.36363636363637, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 85.0, |
| "epoch": 0.016, |
| "format_failures": 0.0, |
| "grad_norm": 0.7023671865463257, |
| "kl": 0.0004708967899205163, |
| "learning_rate": 1e-06, |
| "loss": -0.1143, |
| "num_tokens": 164100.0, |
| "reward": 0.06388889253139496, |
| "reward_std": 0.1274919956922531, |
| "step": 8 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 484.0, |
| "completions/max_terminated_length": 484.0, |
| "completions/mean_length": 253.41666666666666, |
| "completions/mean_terminated_length": 276.45454545454544, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 165.0, |
| "epoch": 0.018, |
| "format_failures": 0.0, |
| "grad_norm": 1.1911135911941528, |
| "kl": 0.0012580148177221417, |
| "learning_rate": 1e-06, |
| "loss": -0.3277, |
| "num_tokens": 197808.0, |
| "reward": 0.1118159219622612, |
| "reward_std": 0.2614404261112213, |
| "step": 9 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 170.0, |
| "completions/max_terminated_length": 170.0, |
| "completions/mean_length": 64.83333333333333, |
| "completions/mean_terminated_length": 70.72727272727273, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 35.0, |
| "epoch": 0.02, |
| "format_failures": 0.0, |
| "grad_norm": 1.324984073638916, |
| "kl": 0.2648707218468189, |
| "learning_rate": 1e-06, |
| "loss": -0.0221, |
| "num_tokens": 207000.0, |
| "reward": 0.01666666753590107, |
| "reward_std": 0.057735029608011246, |
| "step": 10 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 183.0, |
| "completions/max_terminated_length": 183.0, |
| "completions/mean_length": 126.33333333333333, |
| "completions/mean_terminated_length": 137.8181818181818, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 62.0, |
| "epoch": 0.022, |
| "format_failures": 0.0, |
| "grad_norm": 0.5873882174491882, |
| "kl": 0.017587594222277403, |
| "learning_rate": 1e-06, |
| "loss": 0.0197, |
| "num_tokens": 221808.0, |
| "reward": 0.1805555671453476, |
| "reward_std": 0.3134874999523163, |
| "step": 11 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.16666666666666663, |
| "completions/max_length": 2049.0, |
| "completions/max_terminated_length": 2049.0, |
| "completions/mean_length": 541.25, |
| "completions/mean_terminated_length": 649.5, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 137.0, |
| "epoch": 0.024, |
| "format_failures": 0.0, |
| "grad_norm": 0.48546102643013, |
| "kl": 0.002345994464121759, |
| "learning_rate": 1e-06, |
| "loss": 0.0336, |
| "num_tokens": 255132.0, |
| "reward": 0.4682539701461792, |
| "reward_std": 0.4320843815803528, |
| "step": 12 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 53.0, |
| "completions/max_terminated_length": 53.0, |
| "completions/mean_length": 29.666666666666668, |
| "completions/mean_terminated_length": 32.36363636363637, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 22.0, |
| "epoch": 0.026, |
| "format_failures": 0.0, |
| "grad_norm": 0.186175137758255, |
| "kl": 0.041642000898718834, |
| "learning_rate": 1e-06, |
| "loss": 0.0008, |
| "num_tokens": 265092.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "step": 13 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 708.0, |
| "completions/max_terminated_length": 708.0, |
| "completions/mean_length": 381.6666666666667, |
| "completions/mean_terminated_length": 416.3636363636364, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 188.0, |
| "epoch": 0.028, |
| "format_failures": 0.0, |
| "grad_norm": 0.20345070958137512, |
| "kl": 0.009796573780477047, |
| "learning_rate": 1e-06, |
| "loss": 0.0257, |
| "num_tokens": 294096.0, |
| "reward": 0.29761505126953125, |
| "reward_std": 0.16453009843826294, |
| "step": 14 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 1034.0, |
| "completions/max_terminated_length": 1034.0, |
| "completions/mean_length": 332.25, |
| "completions/mean_terminated_length": 362.45454545454544, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 125.0, |
| "epoch": 0.03, |
| "format_failures": 1.0, |
| "grad_norm": 0.5157941579818726, |
| "kl": 0.004433898604474962, |
| "learning_rate": 1e-06, |
| "loss": -0.0103, |
| "num_tokens": 325368.0, |
| "reward": 0.2917824387550354, |
| "reward_std": 0.3325340151786804, |
| "step": 15 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 268.0, |
| "completions/max_terminated_length": 268.0, |
| "completions/mean_length": 150.16666666666666, |
| "completions/mean_terminated_length": 163.8181818181818, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 30.0, |
| "epoch": 0.032, |
| "format_failures": 0.0, |
| "grad_norm": 0.05657627806067467, |
| "kl": 0.0326845021918416, |
| "learning_rate": 1e-06, |
| "loss": 0.0002, |
| "num_tokens": 341196.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "step": 16 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 296.0, |
| "completions/max_terminated_length": 296.0, |
| "completions/mean_length": 228.41666666666666, |
| "completions/mean_terminated_length": 249.1818181818182, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 26.0, |
| "epoch": 0.034, |
| "format_failures": 0.0, |
| "grad_norm": 1.8653935194015503, |
| "kl": 0.8598212422803044, |
| "learning_rate": 1e-06, |
| "loss": 0.014, |
| "num_tokens": 354228.0, |
| "reward": 0.01666666753590107, |
| "reward_std": 0.05773502588272095, |
| "step": 17 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 71.0, |
| "completions/max_terminated_length": 71.0, |
| "completions/mean_length": 48.333333333333336, |
| "completions/mean_terminated_length": 52.72727272727273, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 25.0, |
| "epoch": 0.036, |
| "format_failures": 1.0, |
| "grad_norm": 0.018069056794047356, |
| "kl": 0.023271435871720314, |
| "learning_rate": 1e-06, |
| "loss": 0.0002, |
| "num_tokens": 381468.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "step": 18 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 719.0, |
| "completions/max_terminated_length": 719.0, |
| "completions/mean_length": 228.91666666666666, |
| "completions/mean_terminated_length": 249.72727272727272, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 16.0, |
| "epoch": 0.038, |
| "format_failures": 0.0, |
| "grad_norm": 1.073132872581482, |
| "kl": 0.003063492476940155, |
| "learning_rate": 1e-06, |
| "loss": 0.0334, |
| "num_tokens": 415356.0, |
| "reward": 0.1666666716337204, |
| "reward_std": 0.38924944400787354, |
| "step": 19 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 153.0, |
| "completions/max_terminated_length": 153.0, |
| "completions/mean_length": 84.58333333333333, |
| "completions/mean_terminated_length": 92.27272727272727, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 66.0, |
| "epoch": 0.04, |
| "format_failures": 0.0, |
| "grad_norm": 1.1736811399459839, |
| "kl": 0.018741012550890446, |
| "learning_rate": 1e-06, |
| "loss": 0.0962, |
| "num_tokens": 442596.0, |
| "reward": 0.1041666716337204, |
| "reward_std": 0.22508415579795837, |
| "step": 20 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 118.0, |
| "completions/max_terminated_length": 118.0, |
| "completions/mean_length": 89.58333333333333, |
| "completions/mean_terminated_length": 97.72727272727273, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 69.0, |
| "epoch": 0.042, |
| "format_failures": 0.0, |
| "grad_norm": 0.960914671421051, |
| "kl": 0.03209133446216583, |
| "learning_rate": 1e-06, |
| "loss": -0.0169, |
| "num_tokens": 453252.0, |
| "reward": 0.2708333432674408, |
| "reward_std": 0.4454101026058197, |
| "step": 21 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 249.0, |
| "completions/max_terminated_length": 249.0, |
| "completions/mean_length": 124.33333333333333, |
| "completions/mean_terminated_length": 135.63636363636363, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 74.0, |
| "epoch": 0.044, |
| "format_failures": 0.0, |
| "grad_norm": 1.0618880987167358, |
| "kl": 0.03219995368272066, |
| "learning_rate": 1e-06, |
| "loss": -0.3593, |
| "num_tokens": 481656.0, |
| "reward": 0.09444444626569748, |
| "reward_std": 0.17164288461208344, |
| "step": 22 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 673.0, |
| "completions/max_terminated_length": 673.0, |
| "completions/mean_length": 299.5, |
| "completions/mean_terminated_length": 326.72727272727275, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 148.0, |
| "epoch": 0.046, |
| "format_failures": 0.0, |
| "grad_norm": 0.3598278760910034, |
| "kl": 0.031054741702973843, |
| "learning_rate": 1e-06, |
| "loss": 0.0131, |
| "num_tokens": 505704.0, |
| "reward": 0.4847402572631836, |
| "reward_std": 0.25003767013549805, |
| "step": 23 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 497.0, |
| "completions/max_terminated_length": 497.0, |
| "completions/mean_length": 297.5, |
| "completions/mean_terminated_length": 324.54545454545456, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 211.0, |
| "epoch": 0.048, |
| "format_failures": 0.0, |
| "grad_norm": 0.27960336208343506, |
| "kl": 0.04240706283599138, |
| "learning_rate": 1e-06, |
| "loss": -0.0398, |
| "num_tokens": 523500.0, |
| "reward": 0.2615740895271301, |
| "reward_std": 0.219794362783432, |
| "step": 24 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 383.0, |
| "completions/max_terminated_length": 383.0, |
| "completions/mean_length": 179.16666666666666, |
| "completions/mean_terminated_length": 195.45454545454547, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 114.0, |
| "epoch": 0.05, |
| "format_failures": 0.0, |
| "grad_norm": 1.2980320453643799, |
| "kl": 0.0048073166981339455, |
| "learning_rate": 1e-06, |
| "loss": -0.3887, |
| "num_tokens": 555300.0, |
| "reward": 0.5003399848937988, |
| "reward_std": 0.39150455594062805, |
| "step": 25 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 699.0, |
| "completions/max_terminated_length": 699.0, |
| "completions/mean_length": 315.9166666666667, |
| "completions/mean_terminated_length": 344.6363636363636, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 239.0, |
| "epoch": 0.052, |
| "format_failures": 0.0, |
| "grad_norm": 0.2552706003189087, |
| "kl": 0.027493927627801895, |
| "learning_rate": 1e-06, |
| "loss": 0.0567, |
| "num_tokens": 576000.0, |
| "reward": 0.43729767203330994, |
| "reward_std": 0.18975813686847687, |
| "step": 26 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 140.0, |
| "completions/max_terminated_length": 140.0, |
| "completions/mean_length": 72.91666666666667, |
| "completions/mean_terminated_length": 79.54545454545455, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 65.0, |
| "epoch": 0.054, |
| "format_failures": 0.0, |
| "grad_norm": 1.1299240589141846, |
| "kl": 0.0332061443477869, |
| "learning_rate": 1e-06, |
| "loss": -0.057, |
| "num_tokens": 584712.0, |
| "reward": 0.33095240592956543, |
| "reward_std": 0.444376677274704, |
| "step": 27 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 181.0, |
| "completions/max_terminated_length": 181.0, |
| "completions/mean_length": 91.16666666666667, |
| "completions/mean_terminated_length": 99.45454545454545, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 56.0, |
| "epoch": 0.056, |
| "format_failures": 0.0, |
| "grad_norm": 0.044371046125888824, |
| "kl": 0.03765446413308382, |
| "learning_rate": 1e-06, |
| "loss": 0.0004, |
| "num_tokens": 598032.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "step": 28 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 522.0, |
| "completions/max_terminated_length": 522.0, |
| "completions/mean_length": 304.5, |
| "completions/mean_terminated_length": 332.1818181818182, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 212.0, |
| "epoch": 0.058, |
| "format_failures": 0.0, |
| "grad_norm": 0.5104940533638, |
| "kl": 0.03451683558523655, |
| "learning_rate": 1e-06, |
| "loss": -0.0274, |
| "num_tokens": 615204.0, |
| "reward": 0.4068452715873718, |
| "reward_std": 0.37161099910736084, |
| "step": 29 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 296.0, |
| "completions/max_terminated_length": 296.0, |
| "completions/mean_length": 162.91666666666666, |
| "completions/mean_terminated_length": 177.72727272727272, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 59.0, |
| "epoch": 0.06, |
| "format_failures": 0.0, |
| "grad_norm": 1.2335582971572876, |
| "kl": 0.007039119256660342, |
| "learning_rate": 1e-06, |
| "loss": 0.2673, |
| "num_tokens": 647892.0, |
| "reward": 0.3291666805744171, |
| "reward_std": 0.4266456663608551, |
| "step": 30 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 812.0, |
| "completions/max_terminated_length": 812.0, |
| "completions/mean_length": 332.5, |
| "completions/mean_terminated_length": 362.72727272727275, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 222.0, |
| "epoch": 0.062, |
| "format_failures": 2.0, |
| "grad_norm": 0.3000166416168213, |
| "kl": 0.03664882015436888, |
| "learning_rate": 1e-06, |
| "loss": 0.0306, |
| "num_tokens": 670860.0, |
| "reward": 0.6458902955055237, |
| "reward_std": 0.26038500666618347, |
| "step": 31 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 335.0, |
| "completions/max_terminated_length": 335.0, |
| "completions/mean_length": 218.66666666666666, |
| "completions/mean_terminated_length": 238.54545454545453, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 180.0, |
| "epoch": 0.064, |
| "format_failures": 0.0, |
| "grad_norm": 0.37272748351097107, |
| "kl": 0.07015270553529263, |
| "learning_rate": 1e-06, |
| "loss": 0.0169, |
| "num_tokens": 682212.0, |
| "reward": 0.43658646941185, |
| "reward_std": 0.24143192172050476, |
| "step": 32 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 73.0, |
| "completions/max_terminated_length": 73.0, |
| "completions/mean_length": 53.25, |
| "completions/mean_terminated_length": 58.09090909090909, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 47.0, |
| "epoch": 0.066, |
| "format_failures": 0.0, |
| "grad_norm": 1.1589769124984741, |
| "kl": 0.03555137664079666, |
| "learning_rate": 1e-06, |
| "loss": -0.0651, |
| "num_tokens": 692040.0, |
| "reward": 0.11666666716337204, |
| "reward_std": 0.301008403301239, |
| "step": 33 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 465.0, |
| "completions/max_terminated_length": 465.0, |
| "completions/mean_length": 336.0, |
| "completions/mean_terminated_length": 366.54545454545456, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 292.0, |
| "epoch": 0.068, |
| "format_failures": 0.0, |
| "grad_norm": 0.42152470350265503, |
| "kl": 0.19683832861483097, |
| "learning_rate": 1e-06, |
| "loss": -0.0173, |
| "num_tokens": 704484.0, |
| "reward": 0.5136784911155701, |
| "reward_std": 0.38917282223701477, |
| "step": 34 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 93.0, |
| "completions/max_terminated_length": 93.0, |
| "completions/mean_length": 59.166666666666664, |
| "completions/mean_terminated_length": 64.54545454545455, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 40.0, |
| "epoch": 0.07, |
| "format_failures": 0.0, |
| "grad_norm": 1.729435682296753, |
| "kl": 0.055947478860616684, |
| "learning_rate": 1e-06, |
| "loss": 0.0028, |
| "num_tokens": 710520.0, |
| "reward": 0.5611110925674438, |
| "reward_std": 0.45256468653678894, |
| "step": 35 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 192.0, |
| "completions/max_terminated_length": 192.0, |
| "completions/mean_length": 91.91666666666667, |
| "completions/mean_terminated_length": 100.27272727272727, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 59.0, |
| "epoch": 0.072, |
| "format_failures": 0.0, |
| "grad_norm": 0.7297618389129639, |
| "kl": 0.28226011246442795, |
| "learning_rate": 1e-06, |
| "loss": 0.0022, |
| "num_tokens": 720588.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "step": 36 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 255.0, |
| "completions/max_terminated_length": 255.0, |
| "completions/mean_length": 184.66666666666666, |
| "completions/mean_terminated_length": 201.45454545454547, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 152.0, |
| "epoch": 0.074, |
| "format_failures": 0.0, |
| "grad_norm": 0.1786535382270813, |
| "kl": 0.05143214017152786, |
| "learning_rate": 1e-06, |
| "loss": 0.001, |
| "num_tokens": 731112.0, |
| "reward": 0.5931217074394226, |
| "reward_std": 0.15197694301605225, |
| "step": 37 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 127.0, |
| "completions/max_terminated_length": 127.0, |
| "completions/mean_length": 61.416666666666664, |
| "completions/mean_terminated_length": 67.0, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 36.0, |
| "epoch": 0.076, |
| "format_failures": 1.0, |
| "grad_norm": 2.560441732406616, |
| "kl": 0.061069367453455925, |
| "learning_rate": 1e-06, |
| "loss": 0.1107, |
| "num_tokens": 758340.0, |
| "reward": 0.0833333358168602, |
| "reward_std": 0.28867512941360474, |
| "step": 38 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 2050.0, |
| "completions/max_terminated_length": 2050.0, |
| "completions/mean_length": 715.0, |
| "completions/mean_terminated_length": 780.0, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 357.0, |
| "epoch": 0.078, |
| "format_failures": 0.0, |
| "grad_norm": 0.41932860016822815, |
| "kl": 0.01548363408073783, |
| "learning_rate": 1e-06, |
| "loss": 0.0106, |
| "num_tokens": 790968.0, |
| "reward": 0.25740742683410645, |
| "reward_std": 0.32573264837265015, |
| "step": 39 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 1162.0, |
| "completions/max_terminated_length": 1162.0, |
| "completions/mean_length": 471.75, |
| "completions/mean_terminated_length": 514.6363636363636, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 113.0, |
| "epoch": 0.08, |
| "format_failures": 0.0, |
| "grad_norm": 0.8145480155944824, |
| "kl": 0.016389482654631138, |
| "learning_rate": 1e-06, |
| "loss": 0.154, |
| "num_tokens": 829104.0, |
| "reward": 0.43334314227104187, |
| "reward_std": 0.3763042986392975, |
| "step": 40 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 254.0, |
| "completions/max_terminated_length": 254.0, |
| "completions/mean_length": 99.91666666666667, |
| "completions/mean_terminated_length": 109.0, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 47.0, |
| "epoch": 0.082, |
| "format_failures": 0.0, |
| "grad_norm": 18.232030868530273, |
| "kl": 1.717683531343937, |
| "learning_rate": 1e-06, |
| "loss": 0.197, |
| "num_tokens": 850716.0, |
| "reward": 0.2430555671453476, |
| "reward_std": 0.4042987823486328, |
| "step": 41 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 156.0, |
| "completions/max_terminated_length": 156.0, |
| "completions/mean_length": 77.33333333333333, |
| "completions/mean_terminated_length": 84.36363636363636, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 63.0, |
| "epoch": 0.084, |
| "format_failures": 0.0, |
| "grad_norm": 0.5794758796691895, |
| "kl": 0.21323725581169128, |
| "learning_rate": 1e-06, |
| "loss": -0.0344, |
| "num_tokens": 859644.0, |
| "reward": 0.0476190522313118, |
| "reward_std": 0.1649572253227234, |
| "step": 42 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 186.0, |
| "completions/max_terminated_length": 186.0, |
| "completions/mean_length": 136.66666666666666, |
| "completions/mean_terminated_length": 149.0909090909091, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 57.0, |
| "epoch": 0.086, |
| "format_failures": 0.0, |
| "grad_norm": 2.507535934448242, |
| "kl": 0.2139158956706524, |
| "learning_rate": 1e-06, |
| "loss": -0.0282, |
| "num_tokens": 871596.0, |
| "reward": 0.3333333432674408, |
| "reward_std": 0.4923659861087799, |
| "step": 43 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.33333333333333337, |
| "completions/max_length": 53.0, |
| "completions/max_terminated_length": 53.0, |
| "completions/mean_length": 28.25, |
| "completions/mean_terminated_length": 42.375, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 21.0, |
| "epoch": 0.088, |
| "format_failures": 0.0, |
| "grad_norm": 0.33207282423973083, |
| "kl": 0.035286733880639076, |
| "learning_rate": 1e-06, |
| "loss": 0.0008, |
| "num_tokens": 879828.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "step": 44 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 359.0, |
| "completions/max_terminated_length": 359.0, |
| "completions/mean_length": 117.83333333333333, |
| "completions/mean_terminated_length": 128.54545454545453, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 68.0, |
| "epoch": 0.09, |
| "format_failures": 0.0, |
| "grad_norm": 0.2761678099632263, |
| "kl": 0.15724625438451767, |
| "learning_rate": 1e-06, |
| "loss": 0.0015, |
| "num_tokens": 899448.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "step": 45 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 179.0, |
| "completions/max_terminated_length": 179.0, |
| "completions/mean_length": 105.16666666666667, |
| "completions/mean_terminated_length": 114.72727272727273, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 26.0, |
| "epoch": 0.092, |
| "format_failures": 0.0, |
| "grad_norm": 1.1471128463745117, |
| "kl": 0.12899010255932808, |
| "learning_rate": 1e-06, |
| "loss": 0.0117, |
| "num_tokens": 914760.0, |
| "reward": 0.1666666716337204, |
| "reward_std": 0.30151134729385376, |
| "step": 46 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 345.0, |
| "completions/max_terminated_length": 345.0, |
| "completions/mean_length": 233.66666666666666, |
| "completions/mean_terminated_length": 254.9090909090909, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 153.0, |
| "epoch": 0.094, |
| "format_failures": 0.0, |
| "grad_norm": 0.5467153191566467, |
| "kl": 0.2796362675726414, |
| "learning_rate": 1e-06, |
| "loss": -0.0318, |
| "num_tokens": 925212.0, |
| "reward": 0.549458920955658, |
| "reward_std": 0.3676450848579407, |
| "step": 47 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 264.0, |
| "completions/max_terminated_length": 264.0, |
| "completions/mean_length": 166.25, |
| "completions/mean_terminated_length": 181.36363636363637, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 142.0, |
| "epoch": 0.096, |
| "format_failures": 0.0, |
| "grad_norm": 0.78724205493927, |
| "kl": 0.49516983330249786, |
| "learning_rate": 1e-06, |
| "loss": -0.0104, |
| "num_tokens": 938424.0, |
| "reward": 0.02083333395421505, |
| "reward_std": 0.07216878235340118, |
| "step": 48 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 203.0, |
| "completions/max_terminated_length": 203.0, |
| "completions/mean_length": 106.08333333333333, |
| "completions/mean_terminated_length": 115.72727272727273, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 29.0, |
| "epoch": 0.098, |
| "format_failures": 1.0, |
| "grad_norm": 1.7356528043746948, |
| "kl": 0.389555960893631, |
| "learning_rate": 1e-06, |
| "loss": -0.0599, |
| "num_tokens": 950172.0, |
| "reward": 0.1944444626569748, |
| "reward_std": 0.38816672563552856, |
| "step": 49 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.16666666666666663, |
| "completions/max_length": 1127.0, |
| "completions/max_terminated_length": 1127.0, |
| "completions/mean_length": 186.58333333333334, |
| "completions/mean_terminated_length": 223.9, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 49.0, |
| "epoch": 0.1, |
| "format_failures": 0.0, |
| "grad_norm": 1.3811311721801758, |
| "kl": 0.0656690001487732, |
| "learning_rate": 1e-06, |
| "loss": 0.949, |
| "num_tokens": 981816.0, |
| "reward": 0.5007641911506653, |
| "reward_std": 0.4272591173648834, |
| "step": 50 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 121.0, |
| "completions/max_terminated_length": 121.0, |
| "completions/mean_length": 74.75, |
| "completions/mean_terminated_length": 81.54545454545455, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 67.0, |
| "epoch": 0.102, |
| "format_failures": 0.0, |
| "grad_norm": 3.630605697631836, |
| "kl": 0.11415744014084339, |
| "learning_rate": 1e-06, |
| "loss": 0.1083, |
| "num_tokens": 994800.0, |
| "reward": 0.4722222685813904, |
| "reward_std": 0.4596514403820038, |
| "step": 51 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 447.0, |
| "completions/max_terminated_length": 447.0, |
| "completions/mean_length": 292.9166666666667, |
| "completions/mean_terminated_length": 319.54545454545456, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 230.0, |
| "epoch": 0.104, |
| "format_failures": 0.0, |
| "grad_norm": 0.664616048336029, |
| "kl": 0.024851050227880478, |
| "learning_rate": 1e-06, |
| "loss": -0.0988, |
| "num_tokens": 1028352.0, |
| "reward": 0.5121031999588013, |
| "reward_std": 0.26174625754356384, |
| "step": 52 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 478.0, |
| "completions/max_terminated_length": 478.0, |
| "completions/mean_length": 267.4166666666667, |
| "completions/mean_terminated_length": 291.72727272727275, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 158.0, |
| "epoch": 0.106, |
| "format_failures": 0.0, |
| "grad_norm": 0.3362949788570404, |
| "kl": 0.09099859930574894, |
| "learning_rate": 1e-06, |
| "loss": 0.0303, |
| "num_tokens": 1053264.0, |
| "reward": 0.0625, |
| "reward_std": 0.21650634706020355, |
| "step": 53 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 571.0, |
| "completions/max_terminated_length": 571.0, |
| "completions/mean_length": 292.0833333333333, |
| "completions/mean_terminated_length": 318.6363636363636, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 190.0, |
| "epoch": 0.108, |
| "format_failures": 0.0, |
| "grad_norm": 0.17621153593063354, |
| "kl": 0.03119577933102846, |
| "learning_rate": 1e-06, |
| "loss": 0.0012, |
| "num_tokens": 1068108.0, |
| "reward": 0.4200083613395691, |
| "reward_std": 0.194437637925148, |
| "step": 54 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 168.0, |
| "completions/max_terminated_length": 168.0, |
| "completions/mean_length": 88.75, |
| "completions/mean_terminated_length": 96.81818181818181, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 69.0, |
| "epoch": 0.11, |
| "format_failures": 0.0, |
| "grad_norm": 0.6367191672325134, |
| "kl": 0.03671593498438597, |
| "learning_rate": 1e-06, |
| "loss": 0.0088, |
| "num_tokens": 1079820.0, |
| "reward": 0.19027778506278992, |
| "reward_std": 0.15930061042308807, |
| "step": 55 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 189.0, |
| "completions/max_terminated_length": 189.0, |
| "completions/mean_length": 163.58333333333334, |
| "completions/mean_terminated_length": 178.45454545454547, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 113.0, |
| "epoch": 0.112, |
| "format_failures": 0.0, |
| "grad_norm": 2.1606733798980713, |
| "kl": 0.20935122203081846, |
| "learning_rate": 1e-06, |
| "loss": -0.0277, |
| "num_tokens": 1091832.0, |
| "reward": 0.5777778029441833, |
| "reward_std": 0.4515592157840729, |
| "step": 56 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 456.0, |
| "completions/max_terminated_length": 456.0, |
| "completions/mean_length": 288.4166666666667, |
| "completions/mean_terminated_length": 314.6363636363636, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 169.0, |
| "epoch": 0.114, |
| "format_failures": 0.0, |
| "grad_norm": 0.32393601536750793, |
| "kl": 0.031358057633042336, |
| "learning_rate": 1e-06, |
| "loss": -0.044, |
| "num_tokens": 1105608.0, |
| "reward": 0.1666666716337204, |
| "reward_std": 0.24984844028949738, |
| "step": 57 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 96.0, |
| "completions/max_terminated_length": 96.0, |
| "completions/mean_length": 65.5, |
| "completions/mean_terminated_length": 71.45454545454545, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 55.0, |
| "epoch": 0.116, |
| "format_failures": 0.0, |
| "grad_norm": 0.021954922005534172, |
| "kl": 0.018348069861531258, |
| "learning_rate": 1e-06, |
| "loss": 0.0002, |
| "num_tokens": 1113168.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "step": 58 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 533.0, |
| "completions/max_terminated_length": 533.0, |
| "completions/mean_length": 224.41666666666666, |
| "completions/mean_terminated_length": 244.8181818181818, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 92.0, |
| "epoch": 0.118, |
| "format_failures": 0.0, |
| "grad_norm": 1.1990734338760376, |
| "kl": 0.3062889650464058, |
| "learning_rate": 1e-06, |
| "loss": 0.0431, |
| "num_tokens": 1136832.0, |
| "reward": 0.2395833432674408, |
| "reward_std": 0.25259074568748474, |
| "step": 59 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 374.0, |
| "completions/max_terminated_length": 374.0, |
| "completions/mean_length": 238.0, |
| "completions/mean_terminated_length": 259.6363636363636, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 80.0, |
| "epoch": 0.12, |
| "format_failures": 0.0, |
| "grad_norm": 0.5170612931251526, |
| "kl": 0.03292474150657654, |
| "learning_rate": 1e-06, |
| "loss": 0.0251, |
| "num_tokens": 1150536.0, |
| "reward": 0.39345240592956543, |
| "reward_std": 0.3553503155708313, |
| "step": 60 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 158.0, |
| "completions/max_terminated_length": 158.0, |
| "completions/mean_length": 82.16666666666667, |
| "completions/mean_terminated_length": 89.63636363636364, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 65.0, |
| "epoch": 0.122, |
| "format_failures": 0.0, |
| "grad_norm": 1.1562092304229736, |
| "kl": 0.023061166517436504, |
| "learning_rate": 1e-06, |
| "loss": 0.1452, |
| "num_tokens": 1158984.0, |
| "reward": 0.7333333492279053, |
| "reward_std": 0.3639269173145294, |
| "step": 61 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 553.0, |
| "completions/max_terminated_length": 553.0, |
| "completions/mean_length": 296.1666666666667, |
| "completions/mean_terminated_length": 323.09090909090907, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 201.0, |
| "epoch": 0.124, |
| "format_failures": 0.0, |
| "grad_norm": 0.32044336199760437, |
| "kl": 0.06375124305486679, |
| "learning_rate": 1e-06, |
| "loss": 0.0015, |
| "num_tokens": 1173504.0, |
| "reward": 0.43736547231674194, |
| "reward_std": 0.25956276059150696, |
| "step": 62 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 2051.0, |
| "completions/max_terminated_length": 2051.0, |
| "completions/mean_length": 586.4166666666666, |
| "completions/mean_terminated_length": 639.7272727272727, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 38.0, |
| "epoch": 0.126, |
| "format_failures": 0.0, |
| "grad_norm": 0.6462875008583069, |
| "kl": 0.023477558977901936, |
| "learning_rate": 1e-06, |
| "loss": 0.0492, |
| "num_tokens": 1206840.0, |
| "reward": 0.501884937286377, |
| "reward_std": 0.5706992149353027, |
| "step": 63 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 270.0, |
| "completions/max_terminated_length": 270.0, |
| "completions/mean_length": 150.66666666666666, |
| "completions/mean_terminated_length": 164.36363636363637, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 97.0, |
| "epoch": 0.128, |
| "format_failures": 0.0, |
| "grad_norm": 0.4827415347099304, |
| "kl": 0.11513948068022728, |
| "learning_rate": 1e-06, |
| "loss": 0.2183, |
| "num_tokens": 1230888.0, |
| "reward": 0.3715476393699646, |
| "reward_std": 0.17215265333652496, |
| "step": 64 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 1340.0, |
| "completions/max_terminated_length": 1340.0, |
| "completions/mean_length": 277.5833333333333, |
| "completions/mean_terminated_length": 302.8181818181818, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 20.0, |
| "epoch": 0.13, |
| "format_failures": 0.0, |
| "grad_norm": 0.46889665722846985, |
| "kl": 0.9275694619864225, |
| "learning_rate": 1e-06, |
| "loss": 0.2754, |
| "num_tokens": 1262100.0, |
| "reward": 0.3917522430419922, |
| "reward_std": 0.2266404628753662, |
| "step": 65 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 599.0, |
| "completions/max_terminated_length": 599.0, |
| "completions/mean_length": 366.25, |
| "completions/mean_terminated_length": 399.54545454545456, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 212.0, |
| "epoch": 0.132, |
| "format_failures": 1.0, |
| "grad_norm": 0.30657899379730225, |
| "kl": 0.16883518174290657, |
| "learning_rate": 1e-06, |
| "loss": 0.0155, |
| "num_tokens": 1278012.0, |
| "reward": 0.34761905670166016, |
| "reward_std": 0.2757572531700134, |
| "step": 66 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.16666666666666663, |
| "completions/max_length": 559.0, |
| "completions/max_terminated_length": 559.0, |
| "completions/mean_length": 300.9166666666667, |
| "completions/mean_terminated_length": 361.1, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 224.0, |
| "epoch": 0.134, |
| "format_failures": 0.0, |
| "grad_norm": 0.6152874231338501, |
| "kl": 0.10999106336385012, |
| "learning_rate": 1e-06, |
| "loss": 0.3303, |
| "num_tokens": 1308996.0, |
| "reward": 0.32609128952026367, |
| "reward_std": 0.23752012848854065, |
| "step": 67 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 227.0, |
| "completions/max_terminated_length": 227.0, |
| "completions/mean_length": 137.5, |
| "completions/mean_terminated_length": 150.0, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 59.0, |
| "epoch": 0.136, |
| "format_failures": 0.0, |
| "grad_norm": 1.7395364046096802, |
| "kl": 0.7087040841579437, |
| "learning_rate": 1e-06, |
| "loss": -0.0121, |
| "num_tokens": 1321020.0, |
| "reward": 0.20873016119003296, |
| "reward_std": 0.34043052792549133, |
| "step": 68 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 297.0, |
| "completions/max_terminated_length": 297.0, |
| "completions/mean_length": 129.83333333333334, |
| "completions/mean_terminated_length": 141.63636363636363, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 28.0, |
| "epoch": 0.138, |
| "format_failures": 0.0, |
| "grad_norm": 0.902642548084259, |
| "kl": 0.7902000248432159, |
| "learning_rate": 1e-06, |
| "loss": 0.0035, |
| "num_tokens": 1332492.0, |
| "reward": 0.0877976268529892, |
| "reward_std": 0.20928393304347992, |
| "step": 69 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.25, |
| "completions/max_length": 1172.0, |
| "completions/max_terminated_length": 1172.0, |
| "completions/mean_length": 333.1666666666667, |
| "completions/mean_terminated_length": 444.22222222222223, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 133.0, |
| "epoch": 0.14, |
| "format_failures": 0.0, |
| "grad_norm": 0.22367094457149506, |
| "kl": 0.03544241935014725, |
| "learning_rate": 1e-06, |
| "loss": 0.0442, |
| "num_tokens": 1363812.0, |
| "reward": 0.22601282596588135, |
| "reward_std": 0.1535530686378479, |
| "step": 70 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 547.0, |
| "completions/max_terminated_length": 547.0, |
| "completions/mean_length": 368.5833333333333, |
| "completions/mean_terminated_length": 402.09090909090907, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 205.0, |
| "epoch": 0.142, |
| "format_failures": 0.0, |
| "grad_norm": 0.25884878635406494, |
| "kl": 0.0446395231410861, |
| "learning_rate": 1e-06, |
| "loss": 0.0091, |
| "num_tokens": 1396788.0, |
| "reward": 0.6545634865760803, |
| "reward_std": 0.2292691022157669, |
| "step": 71 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 228.0, |
| "completions/max_terminated_length": 228.0, |
| "completions/mean_length": 127.75, |
| "completions/mean_terminated_length": 139.36363636363637, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 62.0, |
| "epoch": 0.144, |
| "format_failures": 0.0, |
| "grad_norm": 2.139310121536255, |
| "kl": 0.2615228593349457, |
| "learning_rate": 1e-06, |
| "loss": 0.0935, |
| "num_tokens": 1411512.0, |
| "reward": 0.625, |
| "reward_std": 0.4826536476612091, |
| "step": 72 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 644.0, |
| "completions/max_terminated_length": 644.0, |
| "completions/mean_length": 321.1666666666667, |
| "completions/mean_terminated_length": 350.3636363636364, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 194.0, |
| "epoch": 0.146, |
| "format_failures": 0.0, |
| "grad_norm": 0.7009347081184387, |
| "kl": 0.13678913563489914, |
| "learning_rate": 1e-06, |
| "loss": 0.0771, |
| "num_tokens": 1436532.0, |
| "reward": 0.3439815044403076, |
| "reward_std": 0.27971503138542175, |
| "step": 73 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 280.0, |
| "completions/max_terminated_length": 280.0, |
| "completions/mean_length": 253.08333333333334, |
| "completions/mean_terminated_length": 276.09090909090907, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 271.0, |
| "epoch": 0.148, |
| "format_failures": 0.0, |
| "grad_norm": 1.2899372577667236, |
| "kl": 0.10085960477590561, |
| "learning_rate": 1e-06, |
| "loss": 0.3862, |
| "num_tokens": 1471704.0, |
| "reward": 0.7222222685813904, |
| "reward_std": 0.4457052946090698, |
| "step": 74 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 308.0, |
| "completions/max_terminated_length": 308.0, |
| "completions/mean_length": 196.5, |
| "completions/mean_terminated_length": 214.36363636363637, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 54.0, |
| "epoch": 0.15, |
| "format_failures": 0.0, |
| "grad_norm": 0.4177331328392029, |
| "kl": 0.026733385398983955, |
| "learning_rate": 1e-06, |
| "loss": 0.0579, |
| "num_tokens": 1485468.0, |
| "reward": 0.2735119163990021, |
| "reward_std": 0.30911651253700256, |
| "step": 75 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 368.0, |
| "completions/max_terminated_length": 368.0, |
| "completions/mean_length": 200.41666666666666, |
| "completions/mean_terminated_length": 218.63636363636363, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 62.0, |
| "epoch": 0.152, |
| "format_failures": 0.0, |
| "grad_norm": 0.8074631094932556, |
| "kl": 0.45791861414909363, |
| "learning_rate": 1e-06, |
| "loss": -0.0476, |
| "num_tokens": 1500636.0, |
| "reward": 0.17129629850387573, |
| "reward_std": 0.19502559304237366, |
| "step": 76 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 220.0, |
| "completions/max_terminated_length": 220.0, |
| "completions/mean_length": 144.08333333333334, |
| "completions/mean_terminated_length": 157.1818181818182, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 21.0, |
| "epoch": 0.154, |
| "format_failures": 0.0, |
| "grad_norm": 1.8004605770111084, |
| "kl": 0.32159996032714844, |
| "learning_rate": 1e-06, |
| "loss": -0.0603, |
| "num_tokens": 1512264.0, |
| "reward": 0.5055555701255798, |
| "reward_std": 0.29963788390159607, |
| "step": 77 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 564.0, |
| "completions/max_terminated_length": 564.0, |
| "completions/mean_length": 312.1666666666667, |
| "completions/mean_terminated_length": 340.54545454545456, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 170.0, |
| "epoch": 0.156, |
| "format_failures": 0.0, |
| "grad_norm": 0.3055727481842041, |
| "kl": 0.03414521459490061, |
| "learning_rate": 1e-06, |
| "loss": -0.0067, |
| "num_tokens": 1526292.0, |
| "reward": 0.5897321701049805, |
| "reward_std": 0.2986750900745392, |
| "step": 78 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 192.0, |
| "completions/max_terminated_length": 192.0, |
| "completions/mean_length": 167.5, |
| "completions/mean_terminated_length": 182.72727272727272, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 165.0, |
| "epoch": 0.158, |
| "format_failures": 0.0, |
| "grad_norm": 2.3401753902435303, |
| "kl": 0.03888106718659401, |
| "learning_rate": 1e-06, |
| "loss": -0.0218, |
| "num_tokens": 1540416.0, |
| "reward": 0.6666666865348816, |
| "reward_std": 0.4923659861087799, |
| "step": 79 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.16666666666666663, |
| "completions/max_length": 291.0, |
| "completions/max_terminated_length": 291.0, |
| "completions/mean_length": 210.91666666666666, |
| "completions/mean_terminated_length": 253.1, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 137.0, |
| "epoch": 0.16, |
| "format_failures": 0.0, |
| "grad_norm": 28.73111343383789, |
| "kl": 15.663371562957764, |
| "learning_rate": 1e-06, |
| "loss": 0.0445, |
| "num_tokens": 1553580.0, |
| "reward": 0.4305555820465088, |
| "reward_std": 0.4738534092903137, |
| "step": 80 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 60.0, |
| "completions/max_terminated_length": 60.0, |
| "completions/mean_length": 43.166666666666664, |
| "completions/mean_terminated_length": 47.09090909090909, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 32.0, |
| "epoch": 0.162, |
| "format_failures": 0.0, |
| "grad_norm": 13.234149932861328, |
| "kl": 2.6492202281951904, |
| "learning_rate": 1e-06, |
| "loss": -0.0385, |
| "num_tokens": 1560816.0, |
| "reward": 0.27916666865348816, |
| "reward_std": 0.42504456639289856, |
| "step": 81 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 331.0, |
| "completions/max_terminated_length": 331.0, |
| "completions/mean_length": 189.66666666666666, |
| "completions/mean_terminated_length": 206.9090909090909, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 142.0, |
| "epoch": 0.164, |
| "format_failures": 0.0, |
| "grad_norm": 1.0555896759033203, |
| "kl": 0.060676803812384605, |
| "learning_rate": 1e-06, |
| "loss": -0.0432, |
| "num_tokens": 1573524.0, |
| "reward": 0.39722225069999695, |
| "reward_std": 0.2684729993343353, |
| "step": 82 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 684.0, |
| "completions/max_terminated_length": 684.0, |
| "completions/mean_length": 482.1666666666667, |
| "completions/mean_terminated_length": 526.0, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 479.0, |
| "epoch": 0.166, |
| "format_failures": 0.0, |
| "grad_norm": 0.27017322182655334, |
| "kl": 0.013310576789081097, |
| "learning_rate": 1e-06, |
| "loss": -0.0023, |
| "num_tokens": 1595796.0, |
| "reward": 0.8000000715255737, |
| "reward_std": 0.39080336689949036, |
| "step": 83 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 257.0, |
| "completions/max_terminated_length": 257.0, |
| "completions/mean_length": 144.91666666666666, |
| "completions/mean_terminated_length": 158.0909090909091, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 83.0, |
| "epoch": 0.168, |
| "format_failures": 0.0, |
| "grad_norm": 1.0021555423736572, |
| "kl": 0.2212899848818779, |
| "learning_rate": 1e-06, |
| "loss": 0.0304, |
| "num_tokens": 1606284.0, |
| "reward": 0.2957010865211487, |
| "reward_std": 0.2737172842025757, |
| "step": 84 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 2050.0, |
| "completions/max_terminated_length": 2050.0, |
| "completions/mean_length": 510.0833333333333, |
| "completions/mean_terminated_length": 556.4545454545455, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 216.0, |
| "epoch": 0.17, |
| "format_failures": 0.0, |
| "grad_norm": 0.3675689399242401, |
| "kl": 0.2206931747496128, |
| "learning_rate": 1e-06, |
| "loss": 0.1278, |
| "num_tokens": 1639152.0, |
| "reward": 0.43888890743255615, |
| "reward_std": 0.2596941888332367, |
| "step": 85 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 280.0, |
| "completions/max_terminated_length": 280.0, |
| "completions/mean_length": 152.25, |
| "completions/mean_terminated_length": 166.0909090909091, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 117.0, |
| "epoch": 0.172, |
| "format_failures": 0.0, |
| "grad_norm": 2.8949317932128906, |
| "kl": 1.413679599761963, |
| "learning_rate": 1e-06, |
| "loss": 0.0356, |
| "num_tokens": 1652364.0, |
| "reward": 0.4761905074119568, |
| "reward_std": 0.5035434365272522, |
| "step": 86 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 245.0, |
| "completions/max_terminated_length": 245.0, |
| "completions/mean_length": 152.91666666666666, |
| "completions/mean_terminated_length": 166.8181818181818, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 58.0, |
| "epoch": 0.174, |
| "format_failures": 0.0, |
| "grad_norm": 1.7609695196151733, |
| "kl": 0.07055489160120487, |
| "learning_rate": 1e-06, |
| "loss": 0.3366, |
| "num_tokens": 1685136.0, |
| "reward": 0.33750003576278687, |
| "reward_std": 0.43647608160972595, |
| "step": 87 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 180.0, |
| "completions/max_terminated_length": 180.0, |
| "completions/mean_length": 135.25, |
| "completions/mean_terminated_length": 147.54545454545453, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 77.0, |
| "epoch": 0.176, |
| "format_failures": 0.0, |
| "grad_norm": 0.6215497255325317, |
| "kl": 0.08650689758360386, |
| "learning_rate": 1e-06, |
| "loss": 0.0112, |
| "num_tokens": 1693764.0, |
| "reward": 0.5745911598205566, |
| "reward_std": 0.1768045872449875, |
| "step": 88 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 140.0, |
| "completions/max_terminated_length": 140.0, |
| "completions/mean_length": 73.25, |
| "completions/mean_terminated_length": 79.9090909090909, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 61.0, |
| "epoch": 0.178, |
| "format_failures": 1.0, |
| "grad_norm": 0.8421996235847473, |
| "kl": 0.016213122755289078, |
| "learning_rate": 1e-06, |
| "loss": 0.0149, |
| "num_tokens": 1707588.0, |
| "reward": 0.06666667014360428, |
| "reward_std": 0.1775250881910324, |
| "step": 89 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 277.0, |
| "completions/max_terminated_length": 277.0, |
| "completions/mean_length": 178.16666666666666, |
| "completions/mean_terminated_length": 194.36363636363637, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 101.0, |
| "epoch": 0.18, |
| "format_failures": 0.0, |
| "grad_norm": 0.4202212691307068, |
| "kl": 0.3119240030646324, |
| "learning_rate": 1e-06, |
| "loss": 0.0093, |
| "num_tokens": 1716792.0, |
| "reward": 0.6381944417953491, |
| "reward_std": 0.22775352001190186, |
| "step": 90 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 220.0, |
| "completions/max_terminated_length": 220.0, |
| "completions/mean_length": 165.58333333333334, |
| "completions/mean_terminated_length": 180.63636363636363, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 56.0, |
| "epoch": 0.182, |
| "format_failures": 0.0, |
| "grad_norm": 3.5526509284973145, |
| "kl": 0.04295740742236376, |
| "learning_rate": 1e-06, |
| "loss": -0.007, |
| "num_tokens": 1735188.0, |
| "reward": 0.6666666865348816, |
| "reward_std": 0.4923659861087799, |
| "step": 91 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 554.0, |
| "completions/max_terminated_length": 554.0, |
| "completions/mean_length": 296.3333333333333, |
| "completions/mean_terminated_length": 323.27272727272725, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 142.0, |
| "epoch": 0.184, |
| "format_failures": 0.0, |
| "grad_norm": 0.7098760008811951, |
| "kl": 0.14585042744874954, |
| "learning_rate": 1e-06, |
| "loss": -0.052, |
| "num_tokens": 1748808.0, |
| "reward": 0.4570105969905853, |
| "reward_std": 0.29787296056747437, |
| "step": 92 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 442.0, |
| "completions/max_terminated_length": 442.0, |
| "completions/mean_length": 325.1666666666667, |
| "completions/mean_terminated_length": 354.72727272727275, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 233.0, |
| "epoch": 0.186, |
| "format_failures": 0.0, |
| "grad_norm": 4.00807523727417, |
| "kl": 2.2327868938446045, |
| "learning_rate": 1e-06, |
| "loss": 0.0328, |
| "num_tokens": 1763196.0, |
| "reward": 0.37762749195098877, |
| "reward_std": 0.2510078251361847, |
| "step": 93 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 146.0, |
| "completions/max_terminated_length": 146.0, |
| "completions/mean_length": 78.66666666666667, |
| "completions/mean_terminated_length": 85.81818181818181, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 60.0, |
| "epoch": 0.188, |
| "format_failures": 0.0, |
| "grad_norm": 4.166850566864014, |
| "kl": 0.4828091114759445, |
| "learning_rate": 1e-06, |
| "loss": -0.0043, |
| "num_tokens": 1775700.0, |
| "reward": 0.41428571939468384, |
| "reward_std": 0.20157082378864288, |
| "step": 94 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 288.0, |
| "completions/max_terminated_length": 288.0, |
| "completions/mean_length": 163.0, |
| "completions/mean_terminated_length": 177.8181818181818, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 86.0, |
| "epoch": 0.19, |
| "format_failures": 0.0, |
| "grad_norm": 2.0013251304626465, |
| "kl": 0.3356290655210614, |
| "learning_rate": 1e-06, |
| "loss": -0.0532, |
| "num_tokens": 1790064.0, |
| "reward": 0.4275793731212616, |
| "reward_std": 0.3848039209842682, |
| "step": 95 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 144.0, |
| "completions/max_terminated_length": 144.0, |
| "completions/mean_length": 128.33333333333334, |
| "completions/mean_terminated_length": 140.0, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 134.0, |
| "epoch": 0.192, |
| "format_failures": 0.0, |
| "grad_norm": 6.922305107116699, |
| "kl": 3.5449295742437243, |
| "learning_rate": 1e-06, |
| "loss": 0.0385, |
| "num_tokens": 1803036.0, |
| "reward": 0.6979166865348816, |
| "reward_std": 0.31738603115081787, |
| "step": 96 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 221.0, |
| "completions/max_terminated_length": 221.0, |
| "completions/mean_length": 94.33333333333333, |
| "completions/mean_terminated_length": 102.9090909090909, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 72.0, |
| "epoch": 0.194, |
| "format_failures": 0.0, |
| "grad_norm": 1.4514728784561157, |
| "kl": 0.1412234902381897, |
| "learning_rate": 1e-06, |
| "loss": 0.3157, |
| "num_tokens": 1816092.0, |
| "reward": 0.8380953073501587, |
| "reward_std": 0.30834609270095825, |
| "step": 97 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 82.0, |
| "completions/max_terminated_length": 82.0, |
| "completions/mean_length": 43.5, |
| "completions/mean_terminated_length": 47.45454545454545, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 31.0, |
| "epoch": 0.196, |
| "format_failures": 0.0, |
| "grad_norm": 2.004136085510254, |
| "kl": 0.6110408902168274, |
| "learning_rate": 1e-06, |
| "loss": 0.0095, |
| "num_tokens": 1827024.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "step": 98 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 373.0, |
| "completions/max_terminated_length": 373.0, |
| "completions/mean_length": 212.08333333333334, |
| "completions/mean_terminated_length": 231.36363636363637, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 102.0, |
| "epoch": 0.198, |
| "format_failures": 0.0, |
| "grad_norm": 0.8370314240455627, |
| "kl": 0.09233395755290985, |
| "learning_rate": 1e-06, |
| "loss": 0.1438, |
| "num_tokens": 1860576.0, |
| "reward": 0.2782828211784363, |
| "reward_std": 0.2644941210746765, |
| "step": 99 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 225.0, |
| "completions/max_terminated_length": 225.0, |
| "completions/mean_length": 163.25, |
| "completions/mean_terminated_length": 178.0909090909091, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 101.0, |
| "epoch": 0.2, |
| "format_failures": 0.0, |
| "grad_norm": 1.565374732017517, |
| "kl": 0.391565203666687, |
| "learning_rate": 1e-06, |
| "loss": -0.0497, |
| "num_tokens": 1872996.0, |
| "reward": 0.5944445133209229, |
| "reward_std": 0.47775429487228394, |
| "step": 100 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 411.0, |
| "completions/max_terminated_length": 411.0, |
| "completions/mean_length": 150.16666666666666, |
| "completions/mean_terminated_length": 163.8181818181818, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 94.0, |
| "epoch": 0.202, |
| "format_failures": 0.0, |
| "grad_norm": 1.6569881439208984, |
| "kl": 0.24375841114670038, |
| "learning_rate": 1e-06, |
| "loss": 0.0387, |
| "num_tokens": 1892856.0, |
| "reward": 0.3499999940395355, |
| "reward_std": 0.36666667461395264, |
| "step": 101 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 150.0, |
| "completions/max_terminated_length": 150.0, |
| "completions/mean_length": 107.66666666666667, |
| "completions/mean_terminated_length": 117.45454545454545, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 93.0, |
| "epoch": 0.204, |
| "format_failures": 0.0, |
| "grad_norm": 0.9490823745727539, |
| "kl": 0.010788497282192111, |
| "learning_rate": 1e-06, |
| "loss": 0.0193, |
| "num_tokens": 1903992.0, |
| "reward": 0.7714947462081909, |
| "reward_std": 0.2890874743461609, |
| "step": 102 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 110.0, |
| "completions/max_terminated_length": 110.0, |
| "completions/mean_length": 66.0, |
| "completions/mean_terminated_length": 72.0, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 24.0, |
| "epoch": 0.206, |
| "format_failures": 0.0, |
| "grad_norm": 1.482935905456543, |
| "kl": 0.03114949818700552, |
| "learning_rate": 1e-06, |
| "loss": -0.0754, |
| "num_tokens": 1913640.0, |
| "reward": 0.3333333432674408, |
| "reward_std": 0.32566946744918823, |
| "step": 103 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 379.0, |
| "completions/max_terminated_length": 379.0, |
| "completions/mean_length": 260.5833333333333, |
| "completions/mean_terminated_length": 284.27272727272725, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 197.0, |
| "epoch": 0.208, |
| "format_failures": 0.0, |
| "grad_norm": 0.4501963257789612, |
| "kl": 0.011977697955444455, |
| "learning_rate": 1e-06, |
| "loss": -0.0496, |
| "num_tokens": 1932468.0, |
| "reward": 0.37487921118736267, |
| "reward_std": 0.29262858629226685, |
| "step": 104 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.16666666666666663, |
| "completions/max_length": 143.0, |
| "completions/max_terminated_length": 143.0, |
| "completions/mean_length": 113.91666666666667, |
| "completions/mean_terminated_length": 136.7, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 120.0, |
| "epoch": 0.21, |
| "format_failures": 0.0, |
| "grad_norm": 3.2958946228027344, |
| "kl": 0.024902154691517353, |
| "learning_rate": 1e-06, |
| "loss": 0.0181, |
| "num_tokens": 1942992.0, |
| "reward": 0.5, |
| "reward_std": 0.5222329497337341, |
| "step": 105 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 714.0, |
| "completions/max_terminated_length": 714.0, |
| "completions/mean_length": 166.0, |
| "completions/mean_terminated_length": 181.0909090909091, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 57.0, |
| "epoch": 0.212, |
| "format_failures": 0.0, |
| "grad_norm": 1.3716078996658325, |
| "kl": 1.098541870713234, |
| "learning_rate": 1e-06, |
| "loss": 0.0299, |
| "num_tokens": 1964208.0, |
| "reward": 0.07500000298023224, |
| "reward_std": 0.17645499110221863, |
| "step": 106 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 321.0, |
| "completions/max_terminated_length": 321.0, |
| "completions/mean_length": 171.58333333333334, |
| "completions/mean_terminated_length": 187.1818181818182, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 100.0, |
| "epoch": 0.214, |
| "format_failures": 2.0, |
| "grad_norm": 0.27850034832954407, |
| "kl": 0.020487794652581215, |
| "learning_rate": 1e-06, |
| "loss": 0.0329, |
| "num_tokens": 1974972.0, |
| "reward": 0.4126984477043152, |
| "reward_std": 0.18834668397903442, |
| "step": 107 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 54.0, |
| "completions/max_terminated_length": 54.0, |
| "completions/mean_length": 45.416666666666664, |
| "completions/mean_terminated_length": 49.54545454545455, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 34.0, |
| "epoch": 0.216, |
| "format_failures": 0.0, |
| "grad_norm": 2.118313789367676, |
| "kl": 0.03025034721940756, |
| "learning_rate": 1e-06, |
| "loss": 0.0001, |
| "num_tokens": 1981716.0, |
| "reward": 0.8333333730697632, |
| "reward_std": 0.38924944400787354, |
| "step": 108 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 233.0, |
| "completions/max_terminated_length": 233.0, |
| "completions/mean_length": 117.5, |
| "completions/mean_terminated_length": 128.1818181818182, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 88.0, |
| "epoch": 0.218, |
| "format_failures": 0.0, |
| "grad_norm": 1.9193243980407715, |
| "kl": 0.04295819811522961, |
| "learning_rate": 1e-06, |
| "loss": 0.009, |
| "num_tokens": 1992420.0, |
| "reward": 0.701388955116272, |
| "reward_std": 0.38302528858184814, |
| "step": 109 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 131.0, |
| "completions/max_terminated_length": 131.0, |
| "completions/mean_length": 108.66666666666667, |
| "completions/mean_terminated_length": 118.54545454545455, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 92.0, |
| "epoch": 0.22, |
| "format_failures": 0.0, |
| "grad_norm": 4.0581183433532715, |
| "kl": 0.34252697695046663, |
| "learning_rate": 1e-06, |
| "loss": -0.014, |
| "num_tokens": 2004288.0, |
| "reward": 0.479166716337204, |
| "reward_std": 0.30592837929725647, |
| "step": 110 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 192.0, |
| "completions/max_terminated_length": 192.0, |
| "completions/mean_length": 129.0, |
| "completions/mean_terminated_length": 140.72727272727272, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 112.0, |
| "epoch": 0.222, |
| "format_failures": 0.0, |
| "grad_norm": 2.901212692260742, |
| "kl": 0.451558455824852, |
| "learning_rate": 1e-06, |
| "loss": 0.0047, |
| "num_tokens": 2021400.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "step": 111 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 174.0, |
| "completions/max_terminated_length": 174.0, |
| "completions/mean_length": 147.08333333333334, |
| "completions/mean_terminated_length": 160.45454545454547, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 65.0, |
| "epoch": 0.224, |
| "format_failures": 0.0, |
| "grad_norm": 3.0557456016540527, |
| "kl": 0.1749698342755437, |
| "learning_rate": 1e-06, |
| "loss": 0.0461, |
| "num_tokens": 2033580.0, |
| "reward": 0.7708333730697632, |
| "reward_std": 0.32784304022789, |
| "step": 112 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 274.0, |
| "completions/max_terminated_length": 274.0, |
| "completions/mean_length": 81.75, |
| "completions/mean_terminated_length": 89.18181818181819, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 41.0, |
| "epoch": 0.226, |
| "format_failures": 0.0, |
| "grad_norm": 2.929105281829834, |
| "kl": 1.0704956352710724, |
| "learning_rate": 1e-06, |
| "loss": -0.1432, |
| "num_tokens": 2065740.0, |
| "reward": 0.6625000238418579, |
| "reward_std": 0.3711928129196167, |
| "step": 113 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 796.0, |
| "completions/max_terminated_length": 796.0, |
| "completions/mean_length": 420.5, |
| "completions/mean_terminated_length": 458.72727272727275, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 171.0, |
| "epoch": 0.228, |
| "format_failures": 0.0, |
| "grad_norm": 0.966941237449646, |
| "kl": 0.012734876945614815, |
| "learning_rate": 1e-06, |
| "loss": -0.0432, |
| "num_tokens": 2101236.0, |
| "reward": 0.6500000357627869, |
| "reward_std": 0.40886637568473816, |
| "step": 114 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 302.0, |
| "completions/max_terminated_length": 302.0, |
| "completions/mean_length": 263.75, |
| "completions/mean_terminated_length": 287.72727272727275, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 280.0, |
| "epoch": 0.23, |
| "format_failures": 0.0, |
| "grad_norm": 7.276376247406006, |
| "kl": 2.2721076011657715, |
| "learning_rate": 1e-06, |
| "loss": 0.0151, |
| "num_tokens": 2114484.0, |
| "reward": 0.7777778506278992, |
| "reward_std": 0.3576955795288086, |
| "step": 115 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 188.0, |
| "completions/max_terminated_length": 188.0, |
| "completions/mean_length": 167.41666666666666, |
| "completions/mean_terminated_length": 182.63636363636363, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 147.0, |
| "epoch": 0.232, |
| "format_failures": 0.0, |
| "grad_norm": 0.6819717884063721, |
| "kl": 0.020047412253916264, |
| "learning_rate": 1e-06, |
| "loss": 0.0179, |
| "num_tokens": 2125992.0, |
| "reward": 0.8819445371627808, |
| "reward_std": 0.2524084150791168, |
| "step": 116 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 394.0, |
| "completions/max_terminated_length": 394.0, |
| "completions/mean_length": 211.33333333333334, |
| "completions/mean_terminated_length": 230.54545454545453, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 147.0, |
| "epoch": 0.234, |
| "format_failures": 0.0, |
| "grad_norm": 0.19310350716114044, |
| "kl": 0.019224281422793865, |
| "learning_rate": 1e-06, |
| "loss": 0.012, |
| "num_tokens": 2137692.0, |
| "reward": 0.585936427116394, |
| "reward_std": 0.09784586727619171, |
| "step": 117 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 205.0, |
| "completions/max_terminated_length": 205.0, |
| "completions/mean_length": 142.16666666666666, |
| "completions/mean_terminated_length": 155.0909090909091, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 110.0, |
| "epoch": 0.236, |
| "format_failures": 0.0, |
| "grad_norm": 2.085691213607788, |
| "kl": 0.09273007325828075, |
| "learning_rate": 1e-06, |
| "loss": 0.0139, |
| "num_tokens": 2148816.0, |
| "reward": 0.319444477558136, |
| "reward_std": 0.2289450317621231, |
| "step": 118 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 583.0, |
| "completions/max_terminated_length": 583.0, |
| "completions/mean_length": 317.0833333333333, |
| "completions/mean_terminated_length": 345.90909090909093, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 140.0, |
| "epoch": 0.238, |
| "format_failures": 0.0, |
| "grad_norm": 0.37083595991134644, |
| "kl": 0.0630851686000824, |
| "learning_rate": 1e-06, |
| "loss": 0.0918, |
| "num_tokens": 2168256.0, |
| "reward": 0.37870368361473083, |
| "reward_std": 0.2895275950431824, |
| "step": 119 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 306.0, |
| "completions/max_terminated_length": 306.0, |
| "completions/mean_length": 126.66666666666667, |
| "completions/mean_terminated_length": 138.1818181818182, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 58.0, |
| "epoch": 0.24, |
| "format_failures": 0.0, |
| "grad_norm": 6.606923580169678, |
| "kl": 3.8295647501945496, |
| "learning_rate": 1e-06, |
| "loss": 0.1365, |
| "num_tokens": 2183124.0, |
| "reward": 0.4027777910232544, |
| "reward_std": 0.3723955750465393, |
| "step": 120 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.5833333333333333, |
| "completions/max_length": 77.0, |
| "completions/max_terminated_length": 77.0, |
| "completions/mean_length": 32.083333333333336, |
| "completions/mean_terminated_length": 77.0, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 77.0, |
| "epoch": 0.242, |
| "format_failures": 0.0, |
| "grad_norm": 0.08047831058502197, |
| "kl": 0.013985397294163704, |
| "learning_rate": 1e-06, |
| "loss": 0.0003, |
| "num_tokens": 2190396.0, |
| "reward": 1.0, |
| "reward_std": 0.0, |
| "step": 121 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 595.0, |
| "completions/max_terminated_length": 595.0, |
| "completions/mean_length": 431.0833333333333, |
| "completions/mean_terminated_length": 470.27272727272725, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 109.0, |
| "epoch": 0.244, |
| "format_failures": 0.0, |
| "grad_norm": 0.019394446164369583, |
| "kl": 0.01961024198681116, |
| "learning_rate": 1e-06, |
| "loss": 0.0001, |
| "num_tokens": 2218320.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "step": 122 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 454.0, |
| "completions/max_terminated_length": 454.0, |
| "completions/mean_length": 284.9166666666667, |
| "completions/mean_terminated_length": 310.8181818181818, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 118.0, |
| "epoch": 0.246, |
| "format_failures": 0.0, |
| "grad_norm": 1.5184653997421265, |
| "kl": 1.0404187738895416, |
| "learning_rate": 1e-06, |
| "loss": -0.0335, |
| "num_tokens": 2231256.0, |
| "reward": 0.4014219641685486, |
| "reward_std": 0.31073111295700073, |
| "step": 123 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 141.0, |
| "completions/max_terminated_length": 141.0, |
| "completions/mean_length": 64.75, |
| "completions/mean_terminated_length": 70.63636363636364, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 34.0, |
| "epoch": 0.248, |
| "format_failures": 0.0, |
| "grad_norm": 1.6326740980148315, |
| "kl": 0.3745545968413353, |
| "learning_rate": 1e-06, |
| "loss": 0.0517, |
| "num_tokens": 2240424.0, |
| "reward": 0.8037037253379822, |
| "reward_std": 0.3365945816040039, |
| "step": 124 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.6666666666666667, |
| "completions/max_length": 117.0, |
| "completions/max_terminated_length": 117.0, |
| "completions/mean_length": 37.75, |
| "completions/mean_terminated_length": 113.25, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 102.0, |
| "epoch": 0.25, |
| "format_failures": 0.0, |
| "grad_norm": 10.052517890930176, |
| "kl": 1.53599963337183, |
| "learning_rate": 1e-06, |
| "loss": -0.0049, |
| "num_tokens": 2249424.0, |
| "reward": 0.9166666865348816, |
| "reward_std": 0.28867512941360474, |
| "step": 125 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 235.0, |
| "completions/max_terminated_length": 235.0, |
| "completions/mean_length": 199.5, |
| "completions/mean_terminated_length": 217.63636363636363, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 160.0, |
| "epoch": 0.252, |
| "format_failures": 0.0, |
| "grad_norm": 1.1388990879058838, |
| "kl": 0.24531831266358495, |
| "learning_rate": 1e-06, |
| "loss": 0.0013, |
| "num_tokens": 2263584.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "step": 126 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 142.0, |
| "completions/max_terminated_length": 142.0, |
| "completions/mean_length": 125.0, |
| "completions/mean_terminated_length": 136.36363636363637, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 123.0, |
| "epoch": 0.254, |
| "format_failures": 0.0, |
| "grad_norm": 2.392914056777954, |
| "kl": 0.9988721050322056, |
| "learning_rate": 1e-06, |
| "loss": -0.0025, |
| "num_tokens": 2276520.0, |
| "reward": 0.7291666865348816, |
| "reward_std": 0.3608439266681671, |
| "step": 127 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 199.0, |
| "completions/max_terminated_length": 199.0, |
| "completions/mean_length": 134.08333333333334, |
| "completions/mean_terminated_length": 146.27272727272728, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 106.0, |
| "epoch": 0.256, |
| "format_failures": 0.0, |
| "grad_norm": 0.5191885828971863, |
| "kl": 0.20999768376350403, |
| "learning_rate": 1e-06, |
| "loss": 0.0146, |
| "num_tokens": 2286408.0, |
| "reward": 0.717815101146698, |
| "reward_std": 0.14373189210891724, |
| "step": 128 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 229.0, |
| "completions/max_terminated_length": 229.0, |
| "completions/mean_length": 137.75, |
| "completions/mean_terminated_length": 150.27272727272728, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 98.0, |
| "epoch": 0.258, |
| "format_failures": 0.0, |
| "grad_norm": 1.204528570175171, |
| "kl": 0.08800000417977571, |
| "learning_rate": 1e-06, |
| "loss": 0.0511, |
| "num_tokens": 2296044.0, |
| "reward": 0.5675595998764038, |
| "reward_std": 0.2289842963218689, |
| "step": 129 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 198.0, |
| "completions/max_terminated_length": 198.0, |
| "completions/mean_length": 124.58333333333333, |
| "completions/mean_terminated_length": 135.9090909090909, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 54.0, |
| "epoch": 0.26, |
| "format_failures": 0.0, |
| "grad_norm": 0.44312867522239685, |
| "kl": 0.07202759943902493, |
| "learning_rate": 1e-06, |
| "loss": 0.0475, |
| "num_tokens": 2305644.0, |
| "reward": 0.5101972222328186, |
| "reward_std": 0.19489067792892456, |
| "step": 130 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 328.0, |
| "completions/max_terminated_length": 328.0, |
| "completions/mean_length": 281.1666666666667, |
| "completions/mean_terminated_length": 306.72727272727275, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 253.0, |
| "epoch": 0.262, |
| "format_failures": 1.0, |
| "grad_norm": 1.5526983737945557, |
| "kl": 0.06795010529458523, |
| "learning_rate": 1e-06, |
| "loss": -0.0019, |
| "num_tokens": 2319192.0, |
| "reward": 0.75, |
| "reward_std": 0.3217690885066986, |
| "step": 131 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 183.0, |
| "completions/max_terminated_length": 183.0, |
| "completions/mean_length": 162.83333333333334, |
| "completions/mean_terminated_length": 177.63636363636363, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 175.0, |
| "epoch": 0.264, |
| "format_failures": 0.0, |
| "grad_norm": 2.740288257598877, |
| "kl": 0.7462278339080513, |
| "learning_rate": 1e-06, |
| "loss": 0.0045, |
| "num_tokens": 2329488.0, |
| "reward": 0.9791666865348816, |
| "reward_std": 0.07216878235340118, |
| "step": 132 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 532.0, |
| "completions/max_terminated_length": 532.0, |
| "completions/mean_length": 315.5, |
| "completions/mean_terminated_length": 344.1818181818182, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 233.0, |
| "epoch": 0.266, |
| "format_failures": 0.0, |
| "grad_norm": 0.11069951951503754, |
| "kl": 0.01982728624716401, |
| "learning_rate": 1e-06, |
| "loss": -0.034, |
| "num_tokens": 2358276.0, |
| "reward": 0.5852844715118408, |
| "reward_std": 0.12080158293247223, |
| "step": 133 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 269.0, |
| "completions/max_terminated_length": 269.0, |
| "completions/mean_length": 161.16666666666666, |
| "completions/mean_terminated_length": 175.8181818181818, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 104.0, |
| "epoch": 0.268, |
| "format_failures": 0.0, |
| "grad_norm": 0.8276861906051636, |
| "kl": 0.09472572058439255, |
| "learning_rate": 1e-06, |
| "loss": 0.0149, |
| "num_tokens": 2368980.0, |
| "reward": 0.6518849730491638, |
| "reward_std": 0.2886110842227936, |
| "step": 134 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 325.0, |
| "completions/max_terminated_length": 325.0, |
| "completions/mean_length": 227.08333333333334, |
| "completions/mean_terminated_length": 247.72727272727272, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 136.0, |
| "epoch": 0.27, |
| "format_failures": 0.0, |
| "grad_norm": 0.5550012588500977, |
| "kl": 0.02074157353490591, |
| "learning_rate": 1e-06, |
| "loss": -0.0841, |
| "num_tokens": 2379828.0, |
| "reward": 0.6243386268615723, |
| "reward_std": 0.3905191719532013, |
| "step": 135 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 232.0, |
| "completions/max_terminated_length": 232.0, |
| "completions/mean_length": 210.0, |
| "completions/mean_terminated_length": 229.0909090909091, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 228.0, |
| "epoch": 0.272, |
| "format_failures": 0.0, |
| "grad_norm": 1.019722580909729, |
| "kl": 0.13905800506472588, |
| "learning_rate": 1e-06, |
| "loss": 0.0123, |
| "num_tokens": 2394360.0, |
| "reward": 0.949999988079071, |
| "reward_std": 0.17320507764816284, |
| "step": 136 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 348.0, |
| "completions/max_terminated_length": 348.0, |
| "completions/mean_length": 215.0, |
| "completions/mean_terminated_length": 234.54545454545453, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 145.0, |
| "epoch": 0.274, |
| "format_failures": 0.0, |
| "grad_norm": 0.32402342557907104, |
| "kl": 0.014864406548440456, |
| "learning_rate": 1e-06, |
| "loss": -0.0012, |
| "num_tokens": 2406096.0, |
| "reward": 0.6149470806121826, |
| "reward_std": 0.19829140603542328, |
| "step": 137 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 323.0, |
| "completions/max_terminated_length": 323.0, |
| "completions/mean_length": 136.58333333333334, |
| "completions/mean_terminated_length": 149.0, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 58.0, |
| "epoch": 0.276, |
| "format_failures": 0.0, |
| "grad_norm": 1.005679965019226, |
| "kl": 0.023909798823297024, |
| "learning_rate": 1e-06, |
| "loss": -0.0608, |
| "num_tokens": 2423568.0, |
| "reward": 0.5231481790542603, |
| "reward_std": 0.3425479829311371, |
| "step": 138 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08333333333333337, |
| "completions/max_length": 241.0, |
| "completions/max_terminated_length": 241.0, |
| "completions/mean_length": 165.58333333333334, |
| "completions/mean_terminated_length": 180.63636363636363, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 75.0, |
| "epoch": 0.278, |
| "format_failures": 0.0, |
| "grad_norm": 3.9986395835876465, |
| "kl": 2.975656658411026, |
| "learning_rate": 1e-06, |
| "loss": -0.0003, |
| "num_tokens": 2437320.0, |
| "reward": 0.7277778387069702, |
| "reward_std": 0.4172621965408325, |
| "step": 139 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.33333333333333337, |
| "completions/max_length": 55.0, |
| "completions/max_terminated_length": 55.0, |
| "completions/mean_length": 36.5, |
| "completions/mean_terminated_length": 54.75, |
| "completions/min_length": 0.0, |
| "completions/min_terminated_length": 53.0, |
| "epoch": 0.28, |
| "format_failures": 0.0, |
| "grad_norm": 0.04945458099246025, |
| "kl": 0.008955058641731739, |
| "learning_rate": 1e-06, |
| "loss": 0.0002, |
| "num_tokens": 2449116.0, |
| "reward": 1.0, |
| "reward_std": 0.0, |
| "step": 140 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1000, |
| "num_input_tokens_seen": 2449116, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|