| { |
| "best_global_step": 2475, |
| "best_metric": 0.3483333396911621, |
| "best_model_checkpoint": "/mnt/data/user/zhao_jun/tangjixin/output/model/intern3vl-8b-grpo_v2/v19-20250430-174625/checkpoint-2475", |
| "epoch": 1.0, |
| "eval_steps": 250, |
| "global_step": 2475, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 432.5, |
| "completions/mean_length": 292.2916717529297, |
| "completions/min_length": 175.5, |
| "epoch": 0.00040404040404040404, |
| "grad_norm": 2.6534149601732357, |
| "kl": 0.00283050537109375, |
| "learning_rate": 1.6129032258064515e-09, |
| "loss": 0.04529620707035065, |
| "memory(GiB)": 92.98, |
| "reward": 0.2083333395421505, |
| "reward_std": 0.3905205577611923, |
| "rewards/MultiModalAccuracyORM/mean": 0.2083333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.3905205577611923, |
| "step": 1, |
| "train_speed(iter/s)": 0.011973 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 446.25, |
| "completions/mean_length": 238.60417366027832, |
| "completions/min_length": 109.75, |
| "epoch": 0.00202020202020202, |
| "grad_norm": 1.7382476360832968, |
| "kl": 0.004979610443115234, |
| "learning_rate": 8.064516129032257e-09, |
| "loss": 0.005735308863222599, |
| "memory(GiB)": 104.19, |
| "reward": 0.18750000558793545, |
| "reward_std": 0.1695556379854679, |
| "rewards/MultiModalAccuracyORM/mean": 0.18750000558793545, |
| "rewards/MultiModalAccuracyORM/std": 0.1695556379854679, |
| "step": 5, |
| "train_speed(iter/s)": 0.026061 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 931.8, |
| "completions/mean_length": 493.87501831054686, |
| "completions/min_length": 266.1, |
| "epoch": 0.00404040404040404, |
| "grad_norm": 1.6461868811442486, |
| "kl": 0.0029445648193359374, |
| "learning_rate": 1.6129032258064514e-08, |
| "loss": 0.02294178307056427, |
| "memory(GiB)": 104.37, |
| "reward": 0.22500000819563865, |
| "reward_std": 0.308176326751709, |
| "rewards/MultiModalAccuracyORM/mean": 0.22500000819563865, |
| "rewards/MultiModalAccuracyORM/std": 0.308176326751709, |
| "step": 10, |
| "train_speed(iter/s)": 0.027382 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 346.8, |
| "completions/mean_length": 231.4250061035156, |
| "completions/min_length": 144.3, |
| "epoch": 0.006060606060606061, |
| "grad_norm": 3.6175414067372516, |
| "kl": 0.0058765411376953125, |
| "learning_rate": 2.4193548387096773e-08, |
| "loss": -0.020487520098686218, |
| "memory(GiB)": 107.13, |
| "reward": 0.4250000178813934, |
| "reward_std": 0.37195889055728915, |
| "rewards/MultiModalAccuracyORM/mean": 0.4250000178813934, |
| "rewards/MultiModalAccuracyORM/std": 0.37195889055728915, |
| "step": 15, |
| "train_speed(iter/s)": 0.031173 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 592.0, |
| "completions/mean_length": 374.85834045410155, |
| "completions/min_length": 234.0, |
| "epoch": 0.00808080808080808, |
| "grad_norm": 2.0453002988188924, |
| "kl": 0.0025386810302734375, |
| "learning_rate": 3.225806451612903e-08, |
| "loss": 0.018081194162368773, |
| "memory(GiB)": 110.66, |
| "reward": 0.2833333373069763, |
| "reward_std": 0.2855865716934204, |
| "rewards/MultiModalAccuracyORM/mean": 0.2833333373069763, |
| "rewards/MultiModalAccuracyORM/std": 0.2855865716934204, |
| "step": 20, |
| "train_speed(iter/s)": 0.032111 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 675.3, |
| "completions/mean_length": 343.33334197998045, |
| "completions/min_length": 163.6, |
| "epoch": 0.010101010101010102, |
| "grad_norm": 2.0297666321727066, |
| "kl": 0.005942535400390625, |
| "learning_rate": 4.032258064516129e-08, |
| "loss": -0.003527432680130005, |
| "memory(GiB)": 110.66, |
| "reward": 0.26666667982935904, |
| "reward_std": 0.3784792721271515, |
| "rewards/MultiModalAccuracyORM/mean": 0.26666667982935904, |
| "rewards/MultiModalAccuracyORM/std": 0.3784792721271515, |
| "step": 25, |
| "train_speed(iter/s)": 0.03346 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 434.7, |
| "completions/mean_length": 279.9750091552734, |
| "completions/min_length": 170.9, |
| "epoch": 0.012121212121212121, |
| "grad_norm": 1.580858331896628, |
| "kl": 0.0038494110107421876, |
| "learning_rate": 4.8387096774193546e-08, |
| "loss": -0.00242428183555603, |
| "memory(GiB)": 110.68, |
| "reward": 0.10000000298023223, |
| "reward_std": 0.2711698323488235, |
| "rewards/MultiModalAccuracyORM/mean": 0.10000000298023223, |
| "rewards/MultiModalAccuracyORM/std": 0.2711698323488235, |
| "step": 30, |
| "train_speed(iter/s)": 0.034153 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 522.8, |
| "completions/mean_length": 286.36667404174807, |
| "completions/min_length": 165.1, |
| "epoch": 0.014141414141414142, |
| "grad_norm": 1.8379975346697042, |
| "kl": 0.02647857666015625, |
| "learning_rate": 5.645161290322581e-08, |
| "loss": 0.00997340977191925, |
| "memory(GiB)": 110.68, |
| "reward": 0.25000000521540644, |
| "reward_std": 0.2200503796339035, |
| "rewards/MultiModalAccuracyORM/mean": 0.25000000521540644, |
| "rewards/MultiModalAccuracyORM/std": 0.2200503796339035, |
| "step": 35, |
| "train_speed(iter/s)": 0.034524 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 792.7, |
| "completions/mean_length": 407.9500198364258, |
| "completions/min_length": 231.7, |
| "epoch": 0.01616161616161616, |
| "grad_norm": 1.879368475551475, |
| "kl": 0.00126495361328125, |
| "learning_rate": 6.451612903225806e-08, |
| "loss": 0.005544811487197876, |
| "memory(GiB)": 111.72, |
| "reward": 0.16666667014360428, |
| "reward_std": 0.32451151907444, |
| "rewards/MultiModalAccuracyORM/mean": 0.16666667014360428, |
| "rewards/MultiModalAccuracyORM/std": 0.32451151907444, |
| "step": 40, |
| "train_speed(iter/s)": 0.034576 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 502.7, |
| "completions/mean_length": 326.12501068115233, |
| "completions/min_length": 189.6, |
| "epoch": 0.01818181818181818, |
| "grad_norm": 0.7460899635365059, |
| "kl": 0.0039581298828125, |
| "learning_rate": 7.258064516129032e-08, |
| "loss": 0.006708705425262451, |
| "memory(GiB)": 111.74, |
| "reward": 0.2083333395421505, |
| "reward_std": 0.22406027615070342, |
| "rewards/MultiModalAccuracyORM/mean": 0.2083333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.22406027615070342, |
| "step": 45, |
| "train_speed(iter/s)": 0.034933 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 455.3, |
| "completions/mean_length": 274.28333892822263, |
| "completions/min_length": 131.9, |
| "epoch": 0.020202020202020204, |
| "grad_norm": 2.4079312295812714, |
| "kl": 0.00251922607421875, |
| "learning_rate": 8.064516129032257e-08, |
| "loss": 0.015183356404304505, |
| "memory(GiB)": 111.74, |
| "reward": 0.21666667386889457, |
| "reward_std": 0.25738072395324707, |
| "rewards/MultiModalAccuracyORM/mean": 0.21666667386889457, |
| "rewards/MultiModalAccuracyORM/std": 0.25738072395324707, |
| "step": 50, |
| "train_speed(iter/s)": 0.035232 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 643.5, |
| "completions/mean_length": 365.0666778564453, |
| "completions/min_length": 191.6, |
| "epoch": 0.022222222222222223, |
| "grad_norm": 0.014705836185752576, |
| "kl": 0.004721450805664063, |
| "learning_rate": 8.870967741935484e-08, |
| "loss": 0.01203818917274475, |
| "memory(GiB)": 111.74, |
| "reward": 0.32500001043081284, |
| "reward_std": 0.3044206529855728, |
| "rewards/MultiModalAccuracyORM/mean": 0.32500001043081284, |
| "rewards/MultiModalAccuracyORM/std": 0.3044206529855728, |
| "step": 55, |
| "train_speed(iter/s)": 0.035135 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 527.3, |
| "completions/mean_length": 338.5833435058594, |
| "completions/min_length": 199.7, |
| "epoch": 0.024242424242424242, |
| "grad_norm": 2.6954085340696765, |
| "kl": 0.0020017623901367188, |
| "learning_rate": 9.677419354838709e-08, |
| "loss": -0.005992072820663452, |
| "memory(GiB)": 111.74, |
| "reward": 0.18333333507180213, |
| "reward_std": 0.33354574739933013, |
| "rewards/MultiModalAccuracyORM/mean": 0.18333333507180213, |
| "rewards/MultiModalAccuracyORM/std": 0.33354574739933013, |
| "step": 60, |
| "train_speed(iter/s)": 0.035177 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 573.8, |
| "completions/mean_length": 363.6166793823242, |
| "completions/min_length": 208.5, |
| "epoch": 0.026262626262626262, |
| "grad_norm": 3.0115754925592952, |
| "kl": 0.0037433624267578123, |
| "learning_rate": 1.0483870967741934e-07, |
| "loss": -0.03836339712142944, |
| "memory(GiB)": 111.74, |
| "reward": 0.2666666738688946, |
| "reward_std": 0.4085534304380417, |
| "rewards/MultiModalAccuracyORM/mean": 0.2666666738688946, |
| "rewards/MultiModalAccuracyORM/std": 0.4085534304380417, |
| "step": 65, |
| "train_speed(iter/s)": 0.035437 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 590.2, |
| "completions/mean_length": 377.9750099182129, |
| "completions/min_length": 204.0, |
| "epoch": 0.028282828282828285, |
| "grad_norm": 1.7279437509176054, |
| "kl": 0.001779937744140625, |
| "learning_rate": 1.1290322580645162e-07, |
| "loss": -0.05415753722190857, |
| "memory(GiB)": 111.74, |
| "reward": 0.3000000074505806, |
| "reward_std": 0.30035116374492643, |
| "rewards/MultiModalAccuracyORM/mean": 0.3000000074505806, |
| "rewards/MultiModalAccuracyORM/std": 0.30035116374492643, |
| "step": 70, |
| "train_speed(iter/s)": 0.035665 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 362.6, |
| "completions/mean_length": 242.45834197998047, |
| "completions/min_length": 116.8, |
| "epoch": 0.030303030303030304, |
| "grad_norm": 3.0031072335906597, |
| "kl": 0.002858734130859375, |
| "learning_rate": 1.2096774193548387e-07, |
| "loss": 0.03029954433441162, |
| "memory(GiB)": 111.74, |
| "reward": 0.26666667237877845, |
| "reward_std": 0.36043521761894226, |
| "rewards/MultiModalAccuracyORM/mean": 0.26666667237877845, |
| "rewards/MultiModalAccuracyORM/std": 0.36043521761894226, |
| "step": 75, |
| "train_speed(iter/s)": 0.036005 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.016666666666666666, |
| "completions/max_length": 776.6, |
| "completions/mean_length": 435.741682434082, |
| "completions/min_length": 231.3, |
| "epoch": 0.03232323232323232, |
| "grad_norm": 0.42303978897841893, |
| "kl": 0.0016681671142578125, |
| "learning_rate": 1.2903225806451611e-07, |
| "loss": 0.049380439519882205, |
| "memory(GiB)": 111.74, |
| "reward": 0.325000012665987, |
| "reward_std": 0.3008513689041138, |
| "rewards/MultiModalAccuracyORM/mean": 0.325000012665987, |
| "rewards/MultiModalAccuracyORM/std": 0.3008513689041138, |
| "step": 80, |
| "train_speed(iter/s)": 0.035635 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 511.1, |
| "completions/mean_length": 302.8333435058594, |
| "completions/min_length": 166.0, |
| "epoch": 0.03434343434343434, |
| "grad_norm": 2.6438328703498097, |
| "kl": 0.00451507568359375, |
| "learning_rate": 1.3709677419354838e-07, |
| "loss": -0.0442815363407135, |
| "memory(GiB)": 111.74, |
| "reward": 0.2833333402872086, |
| "reward_std": 0.3933126300573349, |
| "rewards/MultiModalAccuracyORM/mean": 0.2833333402872086, |
| "rewards/MultiModalAccuracyORM/std": 0.3933126300573349, |
| "step": 85, |
| "train_speed(iter/s)": 0.035979 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 620.0, |
| "completions/mean_length": 381.02501525878904, |
| "completions/min_length": 183.3, |
| "epoch": 0.03636363636363636, |
| "grad_norm": 1.74840980915549, |
| "kl": 0.0013660430908203126, |
| "learning_rate": 1.4516129032258064e-07, |
| "loss": 0.07182409167289734, |
| "memory(GiB)": 111.74, |
| "reward": 0.30000000521540643, |
| "reward_std": 0.35937642157077787, |
| "rewards/MultiModalAccuracyORM/mean": 0.30000000521540643, |
| "rewards/MultiModalAccuracyORM/std": 0.35937642157077787, |
| "step": 90, |
| "train_speed(iter/s)": 0.035659 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 641.2, |
| "completions/mean_length": 325.55834197998047, |
| "completions/min_length": 170.8, |
| "epoch": 0.03838383838383838, |
| "grad_norm": 0.04177816415582162, |
| "kl": 0.014581298828125, |
| "learning_rate": 1.5322580645161288e-07, |
| "loss": 0.029976147413253783, |
| "memory(GiB)": 111.74, |
| "reward": 0.18333333879709243, |
| "reward_std": 0.2358713388442993, |
| "rewards/MultiModalAccuracyORM/mean": 0.18333333879709243, |
| "rewards/MultiModalAccuracyORM/std": 0.2358713388442993, |
| "step": 95, |
| "train_speed(iter/s)": 0.035533 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 535.2, |
| "completions/mean_length": 339.98334045410155, |
| "completions/min_length": 187.8, |
| "epoch": 0.04040404040404041, |
| "grad_norm": 3.190540630566101, |
| "kl": 0.004257583618164062, |
| "learning_rate": 1.6129032258064515e-07, |
| "loss": 0.0416176974773407, |
| "memory(GiB)": 111.74, |
| "reward": 0.28333334252238274, |
| "reward_std": 0.3247897386550903, |
| "rewards/MultiModalAccuracyORM/mean": 0.28333334252238274, |
| "rewards/MultiModalAccuracyORM/std": 0.3247897386550903, |
| "step": 100, |
| "train_speed(iter/s)": 0.035677 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 589.7, |
| "completions/mean_length": 345.52500610351564, |
| "completions/min_length": 173.9, |
| "epoch": 0.04242424242424243, |
| "grad_norm": 3.073635935584006, |
| "kl": 0.00194549560546875, |
| "learning_rate": 1.6935483870967741e-07, |
| "loss": 0.042548298835754395, |
| "memory(GiB)": 111.74, |
| "reward": 0.2000000111758709, |
| "reward_std": 0.2611959934234619, |
| "rewards/MultiModalAccuracyORM/mean": 0.2000000111758709, |
| "rewards/MultiModalAccuracyORM/std": 0.2611959934234619, |
| "step": 105, |
| "train_speed(iter/s)": 0.035468 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 745.5, |
| "completions/mean_length": 380.5166748046875, |
| "completions/min_length": 225.9, |
| "epoch": 0.044444444444444446, |
| "grad_norm": 0.9626100429708261, |
| "kl": 0.0016246795654296874, |
| "learning_rate": 1.7741935483870968e-07, |
| "loss": -0.02766646146774292, |
| "memory(GiB)": 111.74, |
| "reward": 0.1916666731238365, |
| "reward_std": 0.3073477536439896, |
| "rewards/MultiModalAccuracyORM/mean": 0.1916666731238365, |
| "rewards/MultiModalAccuracyORM/std": 0.3073477536439896, |
| "step": 110, |
| "train_speed(iter/s)": 0.035455 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 566.0, |
| "completions/mean_length": 311.90000915527344, |
| "completions/min_length": 154.2, |
| "epoch": 0.046464646464646465, |
| "grad_norm": 1.342836390340581, |
| "kl": 0.008540725708007813, |
| "learning_rate": 1.8548387096774192e-07, |
| "loss": -0.010879068076610566, |
| "memory(GiB)": 111.74, |
| "reward": 0.10000000074505806, |
| "reward_std": 0.22228264510631562, |
| "rewards/MultiModalAccuracyORM/mean": 0.10000000074505806, |
| "rewards/MultiModalAccuracyORM/std": 0.22228264510631562, |
| "step": 115, |
| "train_speed(iter/s)": 0.035535 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 481.0, |
| "completions/mean_length": 288.9583435058594, |
| "completions/min_length": 165.6, |
| "epoch": 0.048484848484848485, |
| "grad_norm": 2.6619939115206135, |
| "kl": 0.00256195068359375, |
| "learning_rate": 1.9354838709677418e-07, |
| "loss": 0.033258992433547976, |
| "memory(GiB)": 111.74, |
| "reward": 0.4083333469927311, |
| "reward_std": 0.40963622033596037, |
| "rewards/MultiModalAccuracyORM/mean": 0.4083333469927311, |
| "rewards/MultiModalAccuracyORM/std": 0.40963622033596037, |
| "step": 120, |
| "train_speed(iter/s)": 0.035724 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 640.0, |
| "completions/mean_length": 379.45001220703125, |
| "completions/min_length": 187.1, |
| "epoch": 0.050505050505050504, |
| "grad_norm": 1.321049130692736, |
| "kl": 0.0020069122314453126, |
| "learning_rate": 2e-07, |
| "loss": -0.019822967052459717, |
| "memory(GiB)": 111.74, |
| "reward": 0.2916666708886623, |
| "reward_std": 0.32370694279670714, |
| "rewards/MultiModalAccuracyORM/mean": 0.2916666708886623, |
| "rewards/MultiModalAccuracyORM/std": 0.32370694279670714, |
| "step": 125, |
| "train_speed(iter/s)": 0.035602 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 535.7, |
| "completions/mean_length": 316.5916748046875, |
| "completions/min_length": 171.4, |
| "epoch": 0.052525252525252523, |
| "grad_norm": 2.460967418512405, |
| "kl": 0.0105987548828125, |
| "learning_rate": 2e-07, |
| "loss": 0.0003096837550401688, |
| "memory(GiB)": 111.74, |
| "reward": 0.20833333656191827, |
| "reward_std": 0.29007510244846346, |
| "rewards/MultiModalAccuracyORM/mean": 0.20833333656191827, |
| "rewards/MultiModalAccuracyORM/std": 0.29007510244846346, |
| "step": 130, |
| "train_speed(iter/s)": 0.035448 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 666.4, |
| "completions/mean_length": 387.5166763305664, |
| "completions/min_length": 184.3, |
| "epoch": 0.05454545454545454, |
| "grad_norm": 0.059862028341158974, |
| "kl": 0.011987686157226562, |
| "learning_rate": 2e-07, |
| "loss": -0.011434757709503173, |
| "memory(GiB)": 111.74, |
| "reward": 0.1083333358168602, |
| "reward_std": 0.25866150557994844, |
| "rewards/MultiModalAccuracyORM/mean": 0.1083333358168602, |
| "rewards/MultiModalAccuracyORM/std": 0.25866150557994844, |
| "step": 135, |
| "train_speed(iter/s)": 0.035278 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 629.3, |
| "completions/mean_length": 382.4166778564453, |
| "completions/min_length": 206.3, |
| "epoch": 0.05656565656565657, |
| "grad_norm": 0.8204164270444702, |
| "kl": 0.002767181396484375, |
| "learning_rate": 2e-07, |
| "loss": 0.004211039841175079, |
| "memory(GiB)": 111.74, |
| "reward": 0.27500001192092893, |
| "reward_std": 0.2777498096227646, |
| "rewards/MultiModalAccuracyORM/mean": 0.27500001192092893, |
| "rewards/MultiModalAccuracyORM/std": 0.2777498096227646, |
| "step": 140, |
| "train_speed(iter/s)": 0.035472 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 532.4, |
| "completions/mean_length": 358.13333892822266, |
| "completions/min_length": 230.0, |
| "epoch": 0.05858585858585859, |
| "grad_norm": 2.288187560312466, |
| "kl": 0.006110763549804688, |
| "learning_rate": 2e-07, |
| "loss": -6.483197212219239e-05, |
| "memory(GiB)": 111.74, |
| "reward": 0.13333334028720856, |
| "reward_std": 0.19964569807052612, |
| "rewards/MultiModalAccuracyORM/mean": 0.13333334028720856, |
| "rewards/MultiModalAccuracyORM/std": 0.19964569807052612, |
| "step": 145, |
| "train_speed(iter/s)": 0.035406 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 554.8, |
| "completions/mean_length": 361.4166763305664, |
| "completions/min_length": 210.5, |
| "epoch": 0.06060606060606061, |
| "grad_norm": 0.015594201645230225, |
| "kl": 0.015087890625, |
| "learning_rate": 2e-07, |
| "loss": 0.015390211343765258, |
| "memory(GiB)": 111.74, |
| "reward": 0.14166667237877845, |
| "reward_std": 0.21374862194061278, |
| "rewards/MultiModalAccuracyORM/mean": 0.14166667237877845, |
| "rewards/MultiModalAccuracyORM/std": 0.21374862194061278, |
| "step": 150, |
| "train_speed(iter/s)": 0.035348 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 469.4, |
| "completions/mean_length": 268.90834045410156, |
| "completions/min_length": 145.7, |
| "epoch": 0.06262626262626263, |
| "grad_norm": 1.9984607447420715, |
| "kl": 0.009865570068359374, |
| "learning_rate": 2e-07, |
| "loss": 0.041778740286827085, |
| "memory(GiB)": 111.74, |
| "reward": 0.15000000596046448, |
| "reward_std": 0.2238060563802719, |
| "rewards/MultiModalAccuracyORM/mean": 0.15000000596046448, |
| "rewards/MultiModalAccuracyORM/std": 0.2238060563802719, |
| "step": 155, |
| "train_speed(iter/s)": 0.035429 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 555.6, |
| "completions/mean_length": 307.5416748046875, |
| "completions/min_length": 163.1, |
| "epoch": 0.06464646464646465, |
| "grad_norm": 1.9710039404778148, |
| "kl": 0.0016231536865234375, |
| "learning_rate": 2e-07, |
| "loss": 0.06229003667831421, |
| "memory(GiB)": 111.74, |
| "reward": 0.2583333395421505, |
| "reward_std": 0.35413345992565154, |
| "rewards/MultiModalAccuracyORM/mean": 0.2583333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.35413345992565154, |
| "step": 160, |
| "train_speed(iter/s)": 0.035424 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 702.8, |
| "completions/mean_length": 392.75000915527346, |
| "completions/min_length": 207.7, |
| "epoch": 0.06666666666666667, |
| "grad_norm": 1.4786377917798241, |
| "kl": 0.009944915771484375, |
| "learning_rate": 2e-07, |
| "loss": 0.01215519905090332, |
| "memory(GiB)": 111.74, |
| "reward": 0.24166667237877845, |
| "reward_std": 0.28784283697605134, |
| "rewards/MultiModalAccuracyORM/mean": 0.24166667237877845, |
| "rewards/MultiModalAccuracyORM/std": 0.28784283697605134, |
| "step": 165, |
| "train_speed(iter/s)": 0.035279 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 482.7, |
| "completions/mean_length": 280.4750061035156, |
| "completions/min_length": 144.3, |
| "epoch": 0.06868686868686869, |
| "grad_norm": 3.7940420455147077, |
| "kl": 0.019321441650390625, |
| "learning_rate": 2e-07, |
| "loss": -0.022571200132369997, |
| "memory(GiB)": 111.74, |
| "reward": 0.30833334028720855, |
| "reward_std": 0.365692725777626, |
| "rewards/MultiModalAccuracyORM/mean": 0.30833334028720855, |
| "rewards/MultiModalAccuracyORM/std": 0.365692725777626, |
| "step": 170, |
| "train_speed(iter/s)": 0.035381 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 609.2, |
| "completions/mean_length": 346.808341217041, |
| "completions/min_length": 159.3, |
| "epoch": 0.0707070707070707, |
| "grad_norm": 1.6037297839480729, |
| "kl": 0.0017574310302734375, |
| "learning_rate": 2e-07, |
| "loss": 0.05014150142669678, |
| "memory(GiB)": 111.74, |
| "reward": 0.35000001415610316, |
| "reward_std": 0.3534030318260193, |
| "rewards/MultiModalAccuracyORM/mean": 0.35000001415610316, |
| "rewards/MultiModalAccuracyORM/std": 0.3534030318260193, |
| "step": 175, |
| "train_speed(iter/s)": 0.035382 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 487.0, |
| "completions/mean_length": 324.31666870117186, |
| "completions/min_length": 202.0, |
| "epoch": 0.07272727272727272, |
| "grad_norm": 2.7315358529507865, |
| "kl": 0.0067108154296875, |
| "learning_rate": 2e-07, |
| "loss": 0.017354550957679748, |
| "memory(GiB)": 111.74, |
| "reward": 0.10833333730697632, |
| "reward_std": 0.2448128044605255, |
| "rewards/MultiModalAccuracyORM/mean": 0.10833333730697632, |
| "rewards/MultiModalAccuracyORM/std": 0.2448128044605255, |
| "step": 180, |
| "train_speed(iter/s)": 0.035416 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 532.3, |
| "completions/mean_length": 270.41667861938475, |
| "completions/min_length": 138.9, |
| "epoch": 0.07474747474747474, |
| "grad_norm": 2.314028672730481, |
| "kl": 0.002983856201171875, |
| "learning_rate": 2e-07, |
| "loss": 0.033014419674873355, |
| "memory(GiB)": 111.74, |
| "reward": 0.3333333425223827, |
| "reward_std": 0.2566834628582001, |
| "rewards/MultiModalAccuracyORM/mean": 0.3333333425223827, |
| "rewards/MultiModalAccuracyORM/std": 0.2566834628582001, |
| "step": 185, |
| "train_speed(iter/s)": 0.035387 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 561.6, |
| "completions/mean_length": 341.2416763305664, |
| "completions/min_length": 181.1, |
| "epoch": 0.07676767676767676, |
| "grad_norm": 2.3931438253006387, |
| "kl": 0.00200347900390625, |
| "learning_rate": 2e-07, |
| "loss": 0.038839906454086304, |
| "memory(GiB)": 111.74, |
| "reward": 0.17500000596046447, |
| "reward_std": 0.2684228092432022, |
| "rewards/MultiModalAccuracyORM/mean": 0.17500000596046447, |
| "rewards/MultiModalAccuracyORM/std": 0.2684228092432022, |
| "step": 190, |
| "train_speed(iter/s)": 0.03545 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 566.3, |
| "completions/mean_length": 375.1000045776367, |
| "completions/min_length": 215.9, |
| "epoch": 0.07878787878787878, |
| "grad_norm": 1.8630040945251685, |
| "kl": 0.002384376525878906, |
| "learning_rate": 2e-07, |
| "loss": -0.015469104051589966, |
| "memory(GiB)": 111.74, |
| "reward": 0.1583333395421505, |
| "reward_std": 0.27148365080356596, |
| "rewards/MultiModalAccuracyORM/mean": 0.1583333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.27148365080356596, |
| "step": 195, |
| "train_speed(iter/s)": 0.035415 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 638.2, |
| "completions/mean_length": 379.8833435058594, |
| "completions/min_length": 200.8, |
| "epoch": 0.08080808080808081, |
| "grad_norm": 2.200570213421646, |
| "kl": 0.0036174774169921873, |
| "learning_rate": 2e-07, |
| "loss": 0.006271684169769287, |
| "memory(GiB)": 111.74, |
| "reward": 0.25000000447034837, |
| "reward_std": 0.42421777844429015, |
| "rewards/MultiModalAccuracyORM/mean": 0.25000000447034837, |
| "rewards/MultiModalAccuracyORM/std": 0.42421777844429015, |
| "step": 200, |
| "train_speed(iter/s)": 0.035369 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 609.3, |
| "completions/mean_length": 345.00001220703126, |
| "completions/min_length": 174.6, |
| "epoch": 0.08282828282828283, |
| "grad_norm": 1.1008615802288388, |
| "kl": 0.0024932861328125, |
| "learning_rate": 2e-07, |
| "loss": 0.006234277784824371, |
| "memory(GiB)": 111.74, |
| "reward": 0.16666667237877847, |
| "reward_std": 0.2938547760248184, |
| "rewards/MultiModalAccuracyORM/mean": 0.16666667237877847, |
| "rewards/MultiModalAccuracyORM/std": 0.2938547760248184, |
| "step": 205, |
| "train_speed(iter/s)": 0.035338 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 425.3, |
| "completions/mean_length": 269.37500762939453, |
| "completions/min_length": 147.6, |
| "epoch": 0.08484848484848485, |
| "grad_norm": 3.476093319706285, |
| "kl": 0.0026340484619140625, |
| "learning_rate": 2e-07, |
| "loss": -0.0015334427356719972, |
| "memory(GiB)": 111.74, |
| "reward": 0.25000000447034837, |
| "reward_std": 0.300192129611969, |
| "rewards/MultiModalAccuracyORM/mean": 0.25000000447034837, |
| "rewards/MultiModalAccuracyORM/std": 0.300192129611969, |
| "step": 210, |
| "train_speed(iter/s)": 0.035464 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 537.8, |
| "completions/mean_length": 285.75000762939453, |
| "completions/min_length": 148.7, |
| "epoch": 0.08686868686868687, |
| "grad_norm": 2.1593026278667984, |
| "kl": 0.006510162353515625, |
| "learning_rate": 2e-07, |
| "loss": -0.015721744298934935, |
| "memory(GiB)": 111.74, |
| "reward": 0.21666667088866234, |
| "reward_std": 0.3470772713422775, |
| "rewards/MultiModalAccuracyORM/mean": 0.21666667088866234, |
| "rewards/MultiModalAccuracyORM/std": 0.3470772713422775, |
| "step": 215, |
| "train_speed(iter/s)": 0.035439 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 566.7, |
| "completions/mean_length": 354.20001220703125, |
| "completions/min_length": 199.6, |
| "epoch": 0.08888888888888889, |
| "grad_norm": 3.7456181210533077, |
| "kl": 0.004998016357421875, |
| "learning_rate": 2e-07, |
| "loss": -0.02768584489822388, |
| "memory(GiB)": 111.74, |
| "reward": 0.28333333879709244, |
| "reward_std": 0.28452777564525605, |
| "rewards/MultiModalAccuracyORM/mean": 0.28333333879709244, |
| "rewards/MultiModalAccuracyORM/std": 0.28452777564525605, |
| "step": 220, |
| "train_speed(iter/s)": 0.035428 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 533.5, |
| "completions/mean_length": 311.5416778564453, |
| "completions/min_length": 177.8, |
| "epoch": 0.09090909090909091, |
| "grad_norm": 2.0378307788473684, |
| "kl": 0.002862548828125, |
| "learning_rate": 2e-07, |
| "loss": 0.003831219673156738, |
| "memory(GiB)": 111.74, |
| "reward": 0.4000000111758709, |
| "reward_std": 0.3752594023942947, |
| "rewards/MultiModalAccuracyORM/mean": 0.4000000111758709, |
| "rewards/MultiModalAccuracyORM/std": 0.3752594023942947, |
| "step": 225, |
| "train_speed(iter/s)": 0.035407 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 671.5, |
| "completions/mean_length": 371.4583435058594, |
| "completions/min_length": 190.4, |
| "epoch": 0.09292929292929293, |
| "grad_norm": 2.1323681326918855, |
| "kl": 0.0035661697387695313, |
| "learning_rate": 2e-07, |
| "loss": 0.0016314834356307983, |
| "memory(GiB)": 111.74, |
| "reward": 0.2083333432674408, |
| "reward_std": 0.3477985322475433, |
| "rewards/MultiModalAccuracyORM/mean": 0.2083333432674408, |
| "rewards/MultiModalAccuracyORM/std": 0.3477985322475433, |
| "step": 230, |
| "train_speed(iter/s)": 0.035371 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 467.0, |
| "completions/mean_length": 287.6916778564453, |
| "completions/min_length": 168.0, |
| "epoch": 0.09494949494949495, |
| "grad_norm": 3.249083513364966, |
| "kl": 0.00834503173828125, |
| "learning_rate": 2e-07, |
| "loss": -0.004596877098083496, |
| "memory(GiB)": 111.74, |
| "reward": 0.13333333730697633, |
| "reward_std": 0.19513316750526427, |
| "rewards/MultiModalAccuracyORM/mean": 0.13333333730697633, |
| "rewards/MultiModalAccuracyORM/std": 0.19513316750526427, |
| "step": 235, |
| "train_speed(iter/s)": 0.03535 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 492.4, |
| "completions/mean_length": 316.3583450317383, |
| "completions/min_length": 173.8, |
| "epoch": 0.09696969696969697, |
| "grad_norm": 2.412571205764537, |
| "kl": 0.005106735229492188, |
| "learning_rate": 2e-07, |
| "loss": 0.004295679926872254, |
| "memory(GiB)": 111.74, |
| "reward": 0.23333333879709245, |
| "reward_std": 0.3171865612268448, |
| "rewards/MultiModalAccuracyORM/mean": 0.23333333879709245, |
| "rewards/MultiModalAccuracyORM/std": 0.3171865612268448, |
| "step": 240, |
| "train_speed(iter/s)": 0.035314 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 468.3, |
| "completions/mean_length": 298.17500457763674, |
| "completions/min_length": 166.5, |
| "epoch": 0.09898989898989899, |
| "grad_norm": 1.9493555044308044, |
| "kl": 0.003982925415039062, |
| "learning_rate": 2e-07, |
| "loss": -0.04734513759613037, |
| "memory(GiB)": 111.74, |
| "reward": 0.2333333395421505, |
| "reward_std": 0.3471368670463562, |
| "rewards/MultiModalAccuracyORM/mean": 0.2333333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.3471368670463562, |
| "step": 245, |
| "train_speed(iter/s)": 0.035338 |
| }, |
| { |
| "epoch": 0.10101010101010101, |
| "grad_norm": 1.3381064401700158, |
| "learning_rate": 2e-07, |
| "loss": -0.013491255044937134, |
| "memory(GiB)": 111.78, |
| "step": 250, |
| "train_speed(iter/s)": 0.035321 |
| }, |
| { |
| "epoch": 0.10101010101010101, |
| "eval_clip_ratio": 0.0, |
| "eval_completions/clipped_ratio": 0.0016666666666666666, |
| "eval_completions/max_length": 567.88, |
| "eval_completions/mean_length": 340.8433419799805, |
| "eval_completions/min_length": 176.68, |
| "eval_kl": 0.0008290672302246094, |
| "eval_loss": 0.011471391655504704, |
| "eval_reward": 0.25833333894610405, |
| "eval_reward_std": 0.3269642275571823, |
| "eval_rewards/MultiModalAccuracyORM/mean": 0.25833333894610405, |
| "eval_rewards/MultiModalAccuracyORM/std": 0.3269642275571823, |
| "eval_runtime": 589.5277, |
| "eval_samples_per_second": 0.085, |
| "eval_steps_per_second": 0.008, |
| "step": 250 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 628.2, |
| "completions/mean_length": 405.27917556762696, |
| "completions/min_length": 229.2, |
| "epoch": 0.10303030303030303, |
| "grad_norm": 1.3096626974864818, |
| "kl": 0.002015495300292969, |
| "learning_rate": 2e-07, |
| "loss": 0.022876815497875215, |
| "memory(GiB)": 113.5, |
| "reward": 0.21250000447034836, |
| "reward_std": 0.2526913657784462, |
| "rewards/MultiModalAccuracyORM/mean": 0.21250000447034836, |
| "rewards/MultiModalAccuracyORM/std": 0.2526913657784462, |
| "step": 255, |
| "train_speed(iter/s)": 0.031791 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 448.6, |
| "completions/mean_length": 291.32500915527345, |
| "completions/min_length": 161.1, |
| "epoch": 0.10505050505050505, |
| "grad_norm": 2.7968135195637585, |
| "kl": 0.0034709930419921874, |
| "learning_rate": 2e-07, |
| "loss": 0.02938370406627655, |
| "memory(GiB)": 113.5, |
| "reward": 0.2333333410322666, |
| "reward_std": 0.30821192264556885, |
| "rewards/MultiModalAccuracyORM/mean": 0.2333333410322666, |
| "rewards/MultiModalAccuracyORM/std": 0.30821192264556885, |
| "step": 260, |
| "train_speed(iter/s)": 0.031882 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 635.9, |
| "completions/mean_length": 381.02501220703124, |
| "completions/min_length": 193.9, |
| "epoch": 0.10707070707070707, |
| "grad_norm": 2.2674884321553908, |
| "kl": 0.0033966064453125, |
| "learning_rate": 2e-07, |
| "loss": 0.03137490749359131, |
| "memory(GiB)": 113.5, |
| "reward": 0.20000000149011612, |
| "reward_std": 0.3492949903011322, |
| "rewards/MultiModalAccuracyORM/mean": 0.20000000149011612, |
| "rewards/MultiModalAccuracyORM/std": 0.3492949903011322, |
| "step": 265, |
| "train_speed(iter/s)": 0.031856 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 622.5, |
| "completions/mean_length": 384.0666763305664, |
| "completions/min_length": 238.4, |
| "epoch": 0.10909090909090909, |
| "grad_norm": 1.4757764767450905, |
| "kl": 0.006084823608398437, |
| "learning_rate": 2e-07, |
| "loss": 0.012543919682502746, |
| "memory(GiB)": 113.5, |
| "reward": 0.3000000141561031, |
| "reward_std": 0.42771587073802947, |
| "rewards/MultiModalAccuracyORM/mean": 0.3000000141561031, |
| "rewards/MultiModalAccuracyORM/std": 0.42771587073802947, |
| "step": 270, |
| "train_speed(iter/s)": 0.031865 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 569.9, |
| "completions/mean_length": 362.15000610351564, |
| "completions/min_length": 202.6, |
| "epoch": 0.1111111111111111, |
| "grad_norm": 2.133208686622741, |
| "kl": 0.004328155517578125, |
| "learning_rate": 2e-07, |
| "loss": 0.014178204536437988, |
| "memory(GiB)": 113.5, |
| "reward": 0.3083333447575569, |
| "reward_std": 0.35184402465820314, |
| "rewards/MultiModalAccuracyORM/mean": 0.3083333447575569, |
| "rewards/MultiModalAccuracyORM/std": 0.35184402465820314, |
| "step": 275, |
| "train_speed(iter/s)": 0.031998 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 468.6, |
| "completions/mean_length": 274.9250061035156, |
| "completions/min_length": 153.3, |
| "epoch": 0.11313131313131314, |
| "grad_norm": 2.320837755784546, |
| "kl": 0.002793121337890625, |
| "learning_rate": 2e-07, |
| "loss": -0.002980351448059082, |
| "memory(GiB)": 113.5, |
| "reward": 0.2666666738688946, |
| "reward_std": 0.30639869570732114, |
| "rewards/MultiModalAccuracyORM/mean": 0.2666666738688946, |
| "rewards/MultiModalAccuracyORM/std": 0.30639869570732114, |
| "step": 280, |
| "train_speed(iter/s)": 0.032128 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03333333333333333, |
| "completions/max_length": 807.9, |
| "completions/mean_length": 470.2083465576172, |
| "completions/min_length": 219.6, |
| "epoch": 0.11515151515151516, |
| "grad_norm": 1.5979399011587243, |
| "kl": 0.006278228759765625, |
| "learning_rate": 2e-07, |
| "loss": 0.01850479543209076, |
| "memory(GiB)": 113.5, |
| "reward": 0.39166667088866236, |
| "reward_std": 0.4097074121236801, |
| "rewards/MultiModalAccuracyORM/mean": 0.39166667088866236, |
| "rewards/MultiModalAccuracyORM/std": 0.4097074121236801, |
| "step": 285, |
| "train_speed(iter/s)": 0.032047 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 617.6, |
| "completions/mean_length": 375.62501373291013, |
| "completions/min_length": 199.8, |
| "epoch": 0.11717171717171718, |
| "grad_norm": 1.6711790369238562, |
| "kl": 0.002816009521484375, |
| "learning_rate": 2e-07, |
| "loss": 0.05777819156646728, |
| "memory(GiB)": 113.5, |
| "reward": 0.34166667237877846, |
| "reward_std": 0.34181976318359375, |
| "rewards/MultiModalAccuracyORM/mean": 0.34166667237877846, |
| "rewards/MultiModalAccuracyORM/std": 0.34181976318359375, |
| "step": 290, |
| "train_speed(iter/s)": 0.032072 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 593.7, |
| "completions/mean_length": 373.40001068115237, |
| "completions/min_length": 222.1, |
| "epoch": 0.1191919191919192, |
| "grad_norm": 1.2952752164962844, |
| "kl": 0.006529617309570313, |
| "learning_rate": 2e-07, |
| "loss": 0.02864307165145874, |
| "memory(GiB)": 113.5, |
| "reward": 0.21666667386889457, |
| "reward_std": 0.22631654143333435, |
| "rewards/MultiModalAccuracyORM/mean": 0.21666667386889457, |
| "rewards/MultiModalAccuracyORM/std": 0.22631654143333435, |
| "step": 295, |
| "train_speed(iter/s)": 0.032146 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 571.7, |
| "completions/mean_length": 389.9750091552734, |
| "completions/min_length": 260.7, |
| "epoch": 0.12121212121212122, |
| "grad_norm": 2.5199865002602895, |
| "kl": 0.00448150634765625, |
| "learning_rate": 2e-07, |
| "loss": 0.0044337153434753414, |
| "memory(GiB)": 113.5, |
| "reward": 0.3583333417773247, |
| "reward_std": 0.3886078953742981, |
| "rewards/MultiModalAccuracyORM/mean": 0.3583333417773247, |
| "rewards/MultiModalAccuracyORM/std": 0.3886078953742981, |
| "step": 300, |
| "train_speed(iter/s)": 0.03218 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 463.3, |
| "completions/mean_length": 298.0666748046875, |
| "completions/min_length": 161.4, |
| "epoch": 0.12323232323232323, |
| "grad_norm": 0.04178305906141455, |
| "kl": 0.00428619384765625, |
| "learning_rate": 2e-07, |
| "loss": -0.04246575832366943, |
| "memory(GiB)": 113.5, |
| "reward": 0.10000000223517418, |
| "reward_std": 0.20118070244789124, |
| "rewards/MultiModalAccuracyORM/mean": 0.10000000223517418, |
| "rewards/MultiModalAccuracyORM/std": 0.20118070244789124, |
| "step": 305, |
| "train_speed(iter/s)": 0.032256 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 473.8, |
| "completions/mean_length": 311.39167404174805, |
| "completions/min_length": 131.0, |
| "epoch": 0.12525252525252525, |
| "grad_norm": 0.041069103688074135, |
| "kl": 0.004656982421875, |
| "learning_rate": 2e-07, |
| "loss": 0.024589771032333375, |
| "memory(GiB)": 113.5, |
| "reward": 0.23333334401249886, |
| "reward_std": 0.274494343996048, |
| "rewards/MultiModalAccuracyORM/mean": 0.23333334401249886, |
| "rewards/MultiModalAccuracyORM/std": 0.274494343996048, |
| "step": 310, |
| "train_speed(iter/s)": 0.032348 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 585.4, |
| "completions/mean_length": 349.1750030517578, |
| "completions/min_length": 191.6, |
| "epoch": 0.12727272727272726, |
| "grad_norm": 1.4578057904181938, |
| "kl": 0.008466339111328125, |
| "learning_rate": 2e-07, |
| "loss": 0.019071149826049804, |
| "memory(GiB)": 113.5, |
| "reward": 0.18333334103226662, |
| "reward_std": 0.24637180864810942, |
| "rewards/MultiModalAccuracyORM/mean": 0.18333334103226662, |
| "rewards/MultiModalAccuracyORM/std": 0.24637180864810942, |
| "step": 315, |
| "train_speed(iter/s)": 0.032385 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 467.1, |
| "completions/mean_length": 305.83334426879884, |
| "completions/min_length": 177.6, |
| "epoch": 0.1292929292929293, |
| "grad_norm": 2.0332697577512895, |
| "kl": 0.003513336181640625, |
| "learning_rate": 2e-07, |
| "loss": 0.012425613403320313, |
| "memory(GiB)": 113.5, |
| "reward": 0.2583333395421505, |
| "reward_std": 0.3207202464342117, |
| "rewards/MultiModalAccuracyORM/mean": 0.2583333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.3207202464342117, |
| "step": 320, |
| "train_speed(iter/s)": 0.032468 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 528.6, |
| "completions/mean_length": 350.608341217041, |
| "completions/min_length": 207.5, |
| "epoch": 0.13131313131313133, |
| "grad_norm": 2.9017059326660206, |
| "kl": 0.008218002319335938, |
| "learning_rate": 2e-07, |
| "loss": -0.007495748996734619, |
| "memory(GiB)": 113.5, |
| "reward": 0.24166667237877845, |
| "reward_std": 0.2847819983959198, |
| "rewards/MultiModalAccuracyORM/mean": 0.24166667237877845, |
| "rewards/MultiModalAccuracyORM/std": 0.2847819983959198, |
| "step": 325, |
| "train_speed(iter/s)": 0.032489 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 506.7, |
| "completions/mean_length": 348.37501220703126, |
| "completions/min_length": 230.4, |
| "epoch": 0.13333333333333333, |
| "grad_norm": 2.0452895180997612, |
| "kl": 0.00405426025390625, |
| "learning_rate": 2e-07, |
| "loss": 0.012925130128860474, |
| "memory(GiB)": 113.5, |
| "reward": 0.2250000059604645, |
| "reward_std": 0.34633229672908783, |
| "rewards/MultiModalAccuracyORM/mean": 0.2250000059604645, |
| "rewards/MultiModalAccuracyORM/std": 0.34633229672908783, |
| "step": 330, |
| "train_speed(iter/s)": 0.032601 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 618.4, |
| "completions/mean_length": 391.80001068115234, |
| "completions/min_length": 205.6, |
| "epoch": 0.13535353535353536, |
| "grad_norm": 2.3689531245965014, |
| "kl": 0.0037220001220703127, |
| "learning_rate": 2e-07, |
| "loss": -0.02884441614151001, |
| "memory(GiB)": 113.5, |
| "reward": 0.34166667610406876, |
| "reward_std": 0.3244759202003479, |
| "rewards/MultiModalAccuracyORM/mean": 0.34166667610406876, |
| "rewards/MultiModalAccuracyORM/std": 0.3244759202003479, |
| "step": 335, |
| "train_speed(iter/s)": 0.032628 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 711.7, |
| "completions/mean_length": 393.9500137329102, |
| "completions/min_length": 210.7, |
| "epoch": 0.13737373737373737, |
| "grad_norm": 3.1268062962961447, |
| "kl": 0.00513458251953125, |
| "learning_rate": 2e-07, |
| "loss": -0.007295359671115875, |
| "memory(GiB)": 113.5, |
| "reward": 0.12500000447034837, |
| "reward_std": 0.2837377518415451, |
| "rewards/MultiModalAccuracyORM/mean": 0.12500000447034837, |
| "rewards/MultiModalAccuracyORM/std": 0.2837377518415451, |
| "step": 340, |
| "train_speed(iter/s)": 0.032638 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 510.2, |
| "completions/mean_length": 325.42500457763674, |
| "completions/min_length": 202.4, |
| "epoch": 0.1393939393939394, |
| "grad_norm": 2.570539853128275, |
| "kl": 0.010897064208984375, |
| "learning_rate": 2e-07, |
| "loss": -0.03583614826202393, |
| "memory(GiB)": 113.5, |
| "reward": 0.23333333879709245, |
| "reward_std": 0.28154108226299285, |
| "rewards/MultiModalAccuracyORM/mean": 0.23333333879709245, |
| "rewards/MultiModalAccuracyORM/std": 0.28154108226299285, |
| "step": 345, |
| "train_speed(iter/s)": 0.032636 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 573.3, |
| "completions/mean_length": 348.71667327880857, |
| "completions/min_length": 202.0, |
| "epoch": 0.1414141414141414, |
| "grad_norm": 1.4744760782673672, |
| "kl": 0.005255126953125, |
| "learning_rate": 2e-07, |
| "loss": 0.06839704513549805, |
| "memory(GiB)": 113.5, |
| "reward": 0.3416666738688946, |
| "reward_std": 0.3267677813768387, |
| "rewards/MultiModalAccuracyORM/mean": 0.3416666738688946, |
| "rewards/MultiModalAccuracyORM/std": 0.3267677813768387, |
| "step": 350, |
| "train_speed(iter/s)": 0.032723 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 685.7, |
| "completions/mean_length": 395.8666763305664, |
| "completions/min_length": 217.6, |
| "epoch": 0.14343434343434344, |
| "grad_norm": 0.032365545804024926, |
| "kl": 0.00413818359375, |
| "learning_rate": 2e-07, |
| "loss": -0.008323472738265992, |
| "memory(GiB)": 113.5, |
| "reward": 0.24166667610406875, |
| "reward_std": 0.29187673330307007, |
| "rewards/MultiModalAccuracyORM/mean": 0.24166667610406875, |
| "rewards/MultiModalAccuracyORM/std": 0.29187673330307007, |
| "step": 355, |
| "train_speed(iter/s)": 0.032744 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 521.9, |
| "completions/mean_length": 327.34167633056643, |
| "completions/min_length": 184.3, |
| "epoch": 0.14545454545454545, |
| "grad_norm": 1.1619770767978876, |
| "kl": 0.01970672607421875, |
| "learning_rate": 2e-07, |
| "loss": 0.014476829767227173, |
| "memory(GiB)": 113.5, |
| "reward": 0.3916666731238365, |
| "reward_std": 0.35942656397819517, |
| "rewards/MultiModalAccuracyORM/mean": 0.3916666731238365, |
| "rewards/MultiModalAccuracyORM/std": 0.35942656397819517, |
| "step": 360, |
| "train_speed(iter/s)": 0.032848 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 477.8, |
| "completions/mean_length": 331.90001220703124, |
| "completions/min_length": 222.0, |
| "epoch": 0.14747474747474748, |
| "grad_norm": 1.4073504269814208, |
| "kl": 0.006307220458984375, |
| "learning_rate": 2e-07, |
| "loss": 0.03325994312763214, |
| "memory(GiB)": 113.5, |
| "reward": 0.05833333432674408, |
| "reward_std": 0.16069675385951995, |
| "rewards/MultiModalAccuracyORM/mean": 0.05833333432674408, |
| "rewards/MultiModalAccuracyORM/std": 0.16069675385951995, |
| "step": 365, |
| "train_speed(iter/s)": 0.032856 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 698.0, |
| "completions/mean_length": 423.8916839599609, |
| "completions/min_length": 252.2, |
| "epoch": 0.1494949494949495, |
| "grad_norm": 1.4976657581094635, |
| "kl": 0.006170654296875, |
| "learning_rate": 2e-07, |
| "loss": -0.01670956760644913, |
| "memory(GiB)": 113.5, |
| "reward": 0.20000000223517417, |
| "reward_std": 0.21999078392982482, |
| "rewards/MultiModalAccuracyORM/mean": 0.20000000223517417, |
| "rewards/MultiModalAccuracyORM/std": 0.21999078392982482, |
| "step": 370, |
| "train_speed(iter/s)": 0.032832 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 707.2, |
| "completions/mean_length": 363.00001068115233, |
| "completions/min_length": 182.0, |
| "epoch": 0.15151515151515152, |
| "grad_norm": 2.481807345956626, |
| "kl": 0.0046051025390625, |
| "learning_rate": 2e-07, |
| "loss": 0.04444247186183929, |
| "memory(GiB)": 113.5, |
| "reward": 0.400000012665987, |
| "reward_std": 0.3985941380262375, |
| "rewards/MultiModalAccuracyORM/mean": 0.400000012665987, |
| "rewards/MultiModalAccuracyORM/std": 0.3985941380262375, |
| "step": 375, |
| "train_speed(iter/s)": 0.032805 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 597.2, |
| "completions/mean_length": 362.1333435058594, |
| "completions/min_length": 207.8, |
| "epoch": 0.15353535353535352, |
| "grad_norm": 1.225556055703092, |
| "kl": 0.01065216064453125, |
| "learning_rate": 2e-07, |
| "loss": 0.0010599255561828612, |
| "memory(GiB)": 113.5, |
| "reward": 0.2250000022351742, |
| "reward_std": 0.22698737680912018, |
| "rewards/MultiModalAccuracyORM/mean": 0.2250000022351742, |
| "rewards/MultiModalAccuracyORM/std": 0.22698737680912018, |
| "step": 380, |
| "train_speed(iter/s)": 0.032797 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 450.5, |
| "completions/mean_length": 259.9750068664551, |
| "completions/min_length": 151.0, |
| "epoch": 0.15555555555555556, |
| "grad_norm": 3.170333391476991, |
| "kl": 0.010870361328125, |
| "learning_rate": 2e-07, |
| "loss": 0.04853119254112244, |
| "memory(GiB)": 113.5, |
| "reward": 0.4500000074505806, |
| "reward_std": 0.32345272302627565, |
| "rewards/MultiModalAccuracyORM/mean": 0.4500000074505806, |
| "rewards/MultiModalAccuracyORM/std": 0.32345272302627565, |
| "step": 385, |
| "train_speed(iter/s)": 0.032869 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 724.9, |
| "completions/mean_length": 359.0833465576172, |
| "completions/min_length": 170.4, |
| "epoch": 0.15757575757575756, |
| "grad_norm": 1.6322015536148482, |
| "kl": 0.00597076416015625, |
| "learning_rate": 2e-07, |
| "loss": -0.003878127783536911, |
| "memory(GiB)": 113.5, |
| "reward": 0.19166667237877846, |
| "reward_std": 0.3196614503860474, |
| "rewards/MultiModalAccuracyORM/mean": 0.19166667237877846, |
| "rewards/MultiModalAccuracyORM/std": 0.3196614503860474, |
| "step": 390, |
| "train_speed(iter/s)": 0.032905 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 692.1, |
| "completions/mean_length": 429.06668014526366, |
| "completions/min_length": 281.5, |
| "epoch": 0.1595959595959596, |
| "grad_norm": 2.750918910992668, |
| "kl": 0.059673309326171875, |
| "learning_rate": 2e-07, |
| "loss": 0.016079676151275635, |
| "memory(GiB)": 113.5, |
| "reward": 0.14166666865348815, |
| "reward_std": 0.23854664266109465, |
| "rewards/MultiModalAccuracyORM/mean": 0.14166666865348815, |
| "rewards/MultiModalAccuracyORM/std": 0.23854664266109465, |
| "step": 395, |
| "train_speed(iter/s)": 0.032918 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 722.7, |
| "completions/mean_length": 381.7416793823242, |
| "completions/min_length": 187.8, |
| "epoch": 0.16161616161616163, |
| "grad_norm": 1.276714724002977, |
| "kl": 0.004840087890625, |
| "learning_rate": 2e-07, |
| "loss": 0.030894118547439575, |
| "memory(GiB)": 113.5, |
| "reward": 0.2750000074505806, |
| "reward_std": 0.21374862194061278, |
| "rewards/MultiModalAccuracyORM/mean": 0.2750000074505806, |
| "rewards/MultiModalAccuracyORM/std": 0.21374862194061278, |
| "step": 400, |
| "train_speed(iter/s)": 0.032861 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 454.0, |
| "completions/mean_length": 292.2416748046875, |
| "completions/min_length": 188.5, |
| "epoch": 0.16363636363636364, |
| "grad_norm": 1.285497466986634, |
| "kl": 0.00401611328125, |
| "learning_rate": 2e-07, |
| "loss": -0.00028939247131347655, |
| "memory(GiB)": 113.5, |
| "reward": 0.25833333656191826, |
| "reward_std": 0.2986306995153427, |
| "rewards/MultiModalAccuracyORM/mean": 0.25833333656191826, |
| "rewards/MultiModalAccuracyORM/std": 0.2986306995153427, |
| "step": 405, |
| "train_speed(iter/s)": 0.032956 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 574.5, |
| "completions/mean_length": 332.90001373291017, |
| "completions/min_length": 195.8, |
| "epoch": 0.16565656565656567, |
| "grad_norm": 2.4986293478171695, |
| "kl": 0.0099639892578125, |
| "learning_rate": 2e-07, |
| "loss": 0.01775420904159546, |
| "memory(GiB)": 113.5, |
| "reward": 0.14166666939854622, |
| "reward_std": 0.2355453997850418, |
| "rewards/MultiModalAccuracyORM/mean": 0.14166666939854622, |
| "rewards/MultiModalAccuracyORM/std": 0.2355453997850418, |
| "step": 410, |
| "train_speed(iter/s)": 0.032979 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 590.6, |
| "completions/mean_length": 352.77500915527344, |
| "completions/min_length": 189.9, |
| "epoch": 0.16767676767676767, |
| "grad_norm": 1.8788296454969475, |
| "kl": 0.00422210693359375, |
| "learning_rate": 2e-07, |
| "loss": -0.005545926094055176, |
| "memory(GiB)": 113.5, |
| "reward": 0.32500001043081284, |
| "reward_std": 0.3388330668210983, |
| "rewards/MultiModalAccuracyORM/mean": 0.32500001043081284, |
| "rewards/MultiModalAccuracyORM/std": 0.3388330668210983, |
| "step": 415, |
| "train_speed(iter/s)": 0.033025 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 690.9, |
| "completions/mean_length": 414.40001068115237, |
| "completions/min_length": 239.5, |
| "epoch": 0.1696969696969697, |
| "grad_norm": 0.07032446522446908, |
| "kl": 0.005554962158203125, |
| "learning_rate": 2e-07, |
| "loss": -0.002293400466442108, |
| "memory(GiB)": 113.5, |
| "reward": 0.20833333879709243, |
| "reward_std": 0.21973656117916107, |
| "rewards/MultiModalAccuracyORM/mean": 0.20833333879709243, |
| "rewards/MultiModalAccuracyORM/std": 0.21973656117916107, |
| "step": 420, |
| "train_speed(iter/s)": 0.032985 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 551.6, |
| "completions/mean_length": 308.7250091552734, |
| "completions/min_length": 175.5, |
| "epoch": 0.1717171717171717, |
| "grad_norm": 1.4798323094999317, |
| "kl": 0.00482025146484375, |
| "learning_rate": 2e-07, |
| "loss": 0.01790083050727844, |
| "memory(GiB)": 113.5, |
| "reward": 0.25000000521540644, |
| "reward_std": 0.2104335606098175, |
| "rewards/MultiModalAccuracyORM/mean": 0.25000000521540644, |
| "rewards/MultiModalAccuracyORM/std": 0.2104335606098175, |
| "step": 425, |
| "train_speed(iter/s)": 0.033033 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 557.9, |
| "completions/mean_length": 350.28334655761716, |
| "completions/min_length": 202.6, |
| "epoch": 0.17373737373737375, |
| "grad_norm": 1.9633281758859618, |
| "kl": 0.004430389404296875, |
| "learning_rate": 2e-07, |
| "loss": 0.0008227840065956116, |
| "memory(GiB)": 113.5, |
| "reward": 0.37500001713633535, |
| "reward_std": 0.3780064254999161, |
| "rewards/MultiModalAccuracyORM/mean": 0.37500001713633535, |
| "rewards/MultiModalAccuracyORM/std": 0.3780064254999161, |
| "step": 430, |
| "train_speed(iter/s)": 0.033105 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 430.3, |
| "completions/mean_length": 264.40834045410156, |
| "completions/min_length": 139.7, |
| "epoch": 0.17575757575757575, |
| "grad_norm": 1.9529808864934317, |
| "kl": 0.00596923828125, |
| "learning_rate": 2e-07, |
| "loss": -0.06038873791694641, |
| "memory(GiB)": 113.5, |
| "reward": 0.3333333387970924, |
| "reward_std": 0.29837648272514344, |
| "rewards/MultiModalAccuracyORM/mean": 0.3333333387970924, |
| "rewards/MultiModalAccuracyORM/std": 0.29837648272514344, |
| "step": 435, |
| "train_speed(iter/s)": 0.033193 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 461.5, |
| "completions/mean_length": 296.8333374023438, |
| "completions/min_length": 171.6, |
| "epoch": 0.17777777777777778, |
| "grad_norm": 0.03169449948005974, |
| "kl": 0.00481719970703125, |
| "learning_rate": 2e-07, |
| "loss": 0.018176303803920747, |
| "memory(GiB)": 113.5, |
| "reward": 0.25000000968575475, |
| "reward_std": 0.2596701592206955, |
| "rewards/MultiModalAccuracyORM/mean": 0.25000000968575475, |
| "rewards/MultiModalAccuracyORM/std": 0.2596701592206955, |
| "step": 440, |
| "train_speed(iter/s)": 0.03327 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 462.5, |
| "completions/mean_length": 268.50834197998046, |
| "completions/min_length": 126.3, |
| "epoch": 0.1797979797979798, |
| "grad_norm": 2.4262437209194774, |
| "kl": 0.0057281494140625, |
| "learning_rate": 2e-07, |
| "loss": -0.034365218877792356, |
| "memory(GiB)": 113.5, |
| "reward": 0.2500000074505806, |
| "reward_std": 0.38001427948474886, |
| "rewards/MultiModalAccuracyORM/mean": 0.2500000074505806, |
| "rewards/MultiModalAccuracyORM/std": 0.38001427948474886, |
| "step": 445, |
| "train_speed(iter/s)": 0.033325 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 519.4, |
| "completions/mean_length": 337.42501373291014, |
| "completions/min_length": 194.1, |
| "epoch": 0.18181818181818182, |
| "grad_norm": 2.3770604401183997, |
| "kl": 0.00361785888671875, |
| "learning_rate": 2e-07, |
| "loss": -0.010681581497192384, |
| "memory(GiB)": 113.5, |
| "reward": 0.2833333358168602, |
| "reward_std": 0.24490799605846406, |
| "rewards/MultiModalAccuracyORM/mean": 0.2833333358168602, |
| "rewards/MultiModalAccuracyORM/std": 0.24490799605846406, |
| "step": 450, |
| "train_speed(iter/s)": 0.033355 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 645.6, |
| "completions/mean_length": 383.0333450317383, |
| "completions/min_length": 228.9, |
| "epoch": 0.18383838383838383, |
| "grad_norm": 1.5212583244692293, |
| "kl": 0.0044342041015625, |
| "learning_rate": 2e-07, |
| "loss": 0.010468679666519164, |
| "memory(GiB)": 113.5, |
| "reward": 0.22500000447034835, |
| "reward_std": 0.29815449118614196, |
| "rewards/MultiModalAccuracyORM/mean": 0.22500000447034835, |
| "rewards/MultiModalAccuracyORM/std": 0.29815449118614196, |
| "step": 455, |
| "train_speed(iter/s)": 0.033387 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 682.1, |
| "completions/mean_length": 331.59167556762696, |
| "completions/min_length": 148.5, |
| "epoch": 0.18585858585858586, |
| "grad_norm": 2.3101338751804605, |
| "kl": 0.005951690673828125, |
| "learning_rate": 2e-07, |
| "loss": 0.013955891132354736, |
| "memory(GiB)": 113.5, |
| "reward": 0.2083333395421505, |
| "reward_std": 0.3207202464342117, |
| "rewards/MultiModalAccuracyORM/mean": 0.2083333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.3207202464342117, |
| "step": 460, |
| "train_speed(iter/s)": 0.033356 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 529.1, |
| "completions/mean_length": 324.4416748046875, |
| "completions/min_length": 189.3, |
| "epoch": 0.18787878787878787, |
| "grad_norm": 1.9306296492930712, |
| "kl": 0.00476531982421875, |
| "learning_rate": 2e-07, |
| "loss": 0.0007774412631988525, |
| "memory(GiB)": 113.5, |
| "reward": 0.20833333805203438, |
| "reward_std": 0.18332210481166838, |
| "rewards/MultiModalAccuracyORM/mean": 0.20833333805203438, |
| "rewards/MultiModalAccuracyORM/std": 0.18332210481166838, |
| "step": 465, |
| "train_speed(iter/s)": 0.03337 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 696.5, |
| "completions/mean_length": 451.75001220703126, |
| "completions/min_length": 242.0, |
| "epoch": 0.1898989898989899, |
| "grad_norm": 2.9489928820712117, |
| "kl": 0.003478240966796875, |
| "learning_rate": 2e-07, |
| "loss": 0.0002551078796386719, |
| "memory(GiB)": 113.5, |
| "reward": 0.14166666865348815, |
| "reward_std": 0.22453648447990418, |
| "rewards/MultiModalAccuracyORM/mean": 0.14166666865348815, |
| "rewards/MultiModalAccuracyORM/std": 0.22453648447990418, |
| "step": 470, |
| "train_speed(iter/s)": 0.033353 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 655.2, |
| "completions/mean_length": 419.60001983642576, |
| "completions/min_length": 252.7, |
| "epoch": 0.1919191919191919, |
| "grad_norm": 1.657148402320105, |
| "kl": 0.00272979736328125, |
| "learning_rate": 2e-07, |
| "loss": -0.02806915044784546, |
| "memory(GiB)": 113.5, |
| "reward": 0.25000000894069674, |
| "reward_std": 0.3011055916547775, |
| "rewards/MultiModalAccuracyORM/mean": 0.25000000894069674, |
| "rewards/MultiModalAccuracyORM/std": 0.3011055916547775, |
| "step": 475, |
| "train_speed(iter/s)": 0.033331 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 545.1, |
| "completions/mean_length": 373.71667633056643, |
| "completions/min_length": 256.2, |
| "epoch": 0.19393939393939394, |
| "grad_norm": 2.869711221257181, |
| "kl": 0.0064971923828125, |
| "learning_rate": 2e-07, |
| "loss": -0.002555108070373535, |
| "memory(GiB)": 113.5, |
| "reward": 0.3916666768491268, |
| "reward_std": 0.2636824816465378, |
| "rewards/MultiModalAccuracyORM/mean": 0.3916666768491268, |
| "rewards/MultiModalAccuracyORM/std": 0.2636824816465378, |
| "step": 480, |
| "train_speed(iter/s)": 0.033361 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 613.2, |
| "completions/mean_length": 391.5833404541016, |
| "completions/min_length": 218.1, |
| "epoch": 0.19595959595959597, |
| "grad_norm": 1.9631879540052586, |
| "kl": 0.005725860595703125, |
| "learning_rate": 2e-07, |
| "loss": 0.0018699795007705688, |
| "memory(GiB)": 113.5, |
| "reward": 0.14166667237877845, |
| "reward_std": 0.15595400035381318, |
| "rewards/MultiModalAccuracyORM/mean": 0.14166667237877845, |
| "rewards/MultiModalAccuracyORM/std": 0.15595400035381318, |
| "step": 485, |
| "train_speed(iter/s)": 0.033379 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 594.0, |
| "completions/mean_length": 295.8583442687988, |
| "completions/min_length": 156.6, |
| "epoch": 0.19797979797979798, |
| "grad_norm": 0.037793670384228664, |
| "kl": 0.0073211669921875, |
| "learning_rate": 2e-07, |
| "loss": 0.020484793186187743, |
| "memory(GiB)": 113.5, |
| "reward": 0.20000000149011612, |
| "reward_std": 0.24483142793178558, |
| "rewards/MultiModalAccuracyORM/mean": 0.20000000149011612, |
| "rewards/MultiModalAccuracyORM/std": 0.24483142793178558, |
| "step": 490, |
| "train_speed(iter/s)": 0.033414 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 592.9, |
| "completions/mean_length": 380.616682434082, |
| "completions/min_length": 220.0, |
| "epoch": 0.2, |
| "grad_norm": 2.1512862837965163, |
| "kl": 0.003929901123046875, |
| "learning_rate": 2e-07, |
| "loss": 0.0034599393606185914, |
| "memory(GiB)": 113.5, |
| "reward": 0.30000000521540643, |
| "reward_std": 0.30715312659740446, |
| "rewards/MultiModalAccuracyORM/mean": 0.30000000521540643, |
| "rewards/MultiModalAccuracyORM/std": 0.30715312659740446, |
| "step": 495, |
| "train_speed(iter/s)": 0.033449 |
| }, |
| { |
| "epoch": 0.20202020202020202, |
| "grad_norm": 2.239910097717952, |
| "learning_rate": 2e-07, |
| "loss": 0.014047640562057494, |
| "memory(GiB)": 113.5, |
| "step": 500, |
| "train_speed(iter/s)": 0.033495 |
| }, |
| { |
| "epoch": 0.20202020202020202, |
| "eval_clip_ratio": 0.0, |
| "eval_completions/clipped_ratio": 0.0016666666666666666, |
| "eval_completions/max_length": 591.26, |
| "eval_completions/mean_length": 358.19000946044923, |
| "eval_completions/min_length": 202.24, |
| "eval_kl": 0.002655487060546875, |
| "eval_loss": 0.00915438961237669, |
| "eval_reward": 0.22833333894610405, |
| "eval_reward_std": 0.28466624081134795, |
| "eval_rewards/MultiModalAccuracyORM/mean": 0.22833333894610405, |
| "eval_rewards/MultiModalAccuracyORM/std": 0.28466624081134795, |
| "eval_runtime": 608.1673, |
| "eval_samples_per_second": 0.082, |
| "eval_steps_per_second": 0.008, |
| "step": 500 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 521.35, |
| "completions/mean_length": 332.39167404174805, |
| "completions/min_length": 199.1, |
| "epoch": 0.20404040404040405, |
| "grad_norm": 2.3622087713081186, |
| "kl": 0.004245758056640625, |
| "learning_rate": 2e-07, |
| "loss": -0.00013803243637084962, |
| "memory(GiB)": 113.5, |
| "reward": 0.3125000067055225, |
| "reward_std": 0.3219920754432678, |
| "rewards/MultiModalAccuracyORM/mean": 0.3125000067055225, |
| "rewards/MultiModalAccuracyORM/std": 0.3219920754432678, |
| "step": 505, |
| "train_speed(iter/s)": 0.031802 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 605.3, |
| "completions/mean_length": 374.4833450317383, |
| "completions/min_length": 209.5, |
| "epoch": 0.20606060606060606, |
| "grad_norm": 1.7757575475794216, |
| "kl": 0.006531524658203125, |
| "learning_rate": 2e-07, |
| "loss": 0.03503022789955139, |
| "memory(GiB)": 113.5, |
| "reward": 0.29166667312383654, |
| "reward_std": 0.28778324127197263, |
| "rewards/MultiModalAccuracyORM/mean": 0.29166667312383654, |
| "rewards/MultiModalAccuracyORM/std": 0.28778324127197263, |
| "step": 510, |
| "train_speed(iter/s)": 0.031819 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 507.2, |
| "completions/mean_length": 300.96667633056643, |
| "completions/min_length": 179.3, |
| "epoch": 0.2080808080808081, |
| "grad_norm": 2.2727530064482235, |
| "kl": 0.01416778564453125, |
| "learning_rate": 2e-07, |
| "loss": 0.022283512353897094, |
| "memory(GiB)": 113.5, |
| "reward": 0.24166667610406875, |
| "reward_std": 0.3347875773906708, |
| "rewards/MultiModalAccuracyORM/mean": 0.24166667610406875, |
| "rewards/MultiModalAccuracyORM/std": 0.3347875773906708, |
| "step": 515, |
| "train_speed(iter/s)": 0.03184 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 598.9, |
| "completions/mean_length": 341.6000099182129, |
| "completions/min_length": 175.8, |
| "epoch": 0.2101010101010101, |
| "grad_norm": 1.1487867895660082, |
| "kl": 0.00421295166015625, |
| "learning_rate": 2e-07, |
| "loss": 0.04290072023868561, |
| "memory(GiB)": 113.5, |
| "reward": 0.3666666761040688, |
| "reward_std": 0.28399197161197665, |
| "rewards/MultiModalAccuracyORM/mean": 0.3666666761040688, |
| "rewards/MultiModalAccuracyORM/std": 0.28399197161197665, |
| "step": 520, |
| "train_speed(iter/s)": 0.03186 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 598.5, |
| "completions/mean_length": 344.0500091552734, |
| "completions/min_length": 175.8, |
| "epoch": 0.21212121212121213, |
| "grad_norm": 2.2941717609617767, |
| "kl": 0.0046539306640625, |
| "learning_rate": 2e-07, |
| "loss": 0.004269888997077942, |
| "memory(GiB)": 113.5, |
| "reward": 0.30833333879709246, |
| "reward_std": 0.3267677813768387, |
| "rewards/MultiModalAccuracyORM/mean": 0.30833333879709246, |
| "rewards/MultiModalAccuracyORM/std": 0.3267677813768387, |
| "step": 525, |
| "train_speed(iter/s)": 0.031902 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 531.2, |
| "completions/mean_length": 345.83334197998045, |
| "completions/min_length": 172.9, |
| "epoch": 0.21414141414141413, |
| "grad_norm": 1.2948745647020719, |
| "kl": 0.004862213134765625, |
| "learning_rate": 2e-07, |
| "loss": -0.007743622362613678, |
| "memory(GiB)": 113.5, |
| "reward": 0.33333333805203436, |
| "reward_std": 0.25897532403469087, |
| "rewards/MultiModalAccuracyORM/mean": 0.33333333805203436, |
| "rewards/MultiModalAccuracyORM/std": 0.25897532403469087, |
| "step": 530, |
| "train_speed(iter/s)": 0.031973 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 512.0, |
| "completions/mean_length": 277.8500061035156, |
| "completions/min_length": 127.7, |
| "epoch": 0.21616161616161617, |
| "grad_norm": 2.820652916445064, |
| "kl": 0.004701995849609375, |
| "learning_rate": 2e-07, |
| "loss": 0.019122210144996644, |
| "memory(GiB)": 113.5, |
| "reward": 0.25833334028720856, |
| "reward_std": 0.38930273354053496, |
| "rewards/MultiModalAccuracyORM/mean": 0.25833334028720856, |
| "rewards/MultiModalAccuracyORM/std": 0.38930273354053496, |
| "step": 535, |
| "train_speed(iter/s)": 0.032051 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 503.3, |
| "completions/mean_length": 311.0666732788086, |
| "completions/min_length": 155.9, |
| "epoch": 0.21818181818181817, |
| "grad_norm": 0.02000320571323216, |
| "kl": 0.006194305419921875, |
| "learning_rate": 2e-07, |
| "loss": 0.023233750462532045, |
| "memory(GiB)": 113.5, |
| "reward": 0.29166667237877847, |
| "reward_std": 0.26298522055149076, |
| "rewards/MultiModalAccuracyORM/mean": 0.29166667237877847, |
| "rewards/MultiModalAccuracyORM/std": 0.26298522055149076, |
| "step": 540, |
| "train_speed(iter/s)": 0.032099 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 495.1, |
| "completions/mean_length": 298.1750099182129, |
| "completions/min_length": 159.8, |
| "epoch": 0.2202020202020202, |
| "grad_norm": 1.7992434949177767, |
| "kl": 0.00469207763671875, |
| "learning_rate": 2e-07, |
| "loss": 0.015616017580032348, |
| "memory(GiB)": 113.5, |
| "reward": 0.32500000596046447, |
| "reward_std": 0.22704697251319886, |
| "rewards/MultiModalAccuracyORM/mean": 0.32500000596046447, |
| "rewards/MultiModalAccuracyORM/std": 0.22704697251319886, |
| "step": 545, |
| "train_speed(iter/s)": 0.032156 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 467.6, |
| "completions/mean_length": 273.84167633056643, |
| "completions/min_length": 145.6, |
| "epoch": 0.2222222222222222, |
| "grad_norm": 2.8559923799679794, |
| "kl": 0.00508270263671875, |
| "learning_rate": 2e-07, |
| "loss": 0.050173360109329226, |
| "memory(GiB)": 113.5, |
| "reward": 0.37500001341104505, |
| "reward_std": 0.33303394317626955, |
| "rewards/MultiModalAccuracyORM/mean": 0.37500001341104505, |
| "rewards/MultiModalAccuracyORM/std": 0.33303394317626955, |
| "step": 550, |
| "train_speed(iter/s)": 0.032216 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 652.5, |
| "completions/mean_length": 421.71667633056643, |
| "completions/min_length": 240.9, |
| "epoch": 0.22424242424242424, |
| "grad_norm": 2.3260782482625366, |
| "kl": 0.005718994140625, |
| "learning_rate": 2e-07, |
| "loss": 0.02654660940170288, |
| "memory(GiB)": 113.5, |
| "reward": 0.36666667759418486, |
| "reward_std": 0.46648178398609164, |
| "rewards/MultiModalAccuracyORM/mean": 0.36666667759418486, |
| "rewards/MultiModalAccuracyORM/std": 0.46648178398609164, |
| "step": 555, |
| "train_speed(iter/s)": 0.032254 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 695.9, |
| "completions/mean_length": 376.9666717529297, |
| "completions/min_length": 211.5, |
| "epoch": 0.22626262626262628, |
| "grad_norm": 1.9191123297699473, |
| "kl": 0.0039215087890625, |
| "learning_rate": 2e-07, |
| "loss": 0.013482053577899934, |
| "memory(GiB)": 113.5, |
| "reward": 0.1750000037252903, |
| "reward_std": 0.3042020261287689, |
| "rewards/MultiModalAccuracyORM/mean": 0.1750000037252903, |
| "rewards/MultiModalAccuracyORM/std": 0.3042020261287689, |
| "step": 560, |
| "train_speed(iter/s)": 0.032243 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 564.5, |
| "completions/mean_length": 348.8333450317383, |
| "completions/min_length": 184.5, |
| "epoch": 0.22828282828282828, |
| "grad_norm": 2.0009650914845873, |
| "kl": 0.01170501708984375, |
| "learning_rate": 2e-07, |
| "loss": 0.035267585515975954, |
| "memory(GiB)": 113.5, |
| "reward": 0.3583333410322666, |
| "reward_std": 0.38205191493034363, |
| "rewards/MultiModalAccuracyORM/mean": 0.3583333410322666, |
| "rewards/MultiModalAccuracyORM/std": 0.38205191493034363, |
| "step": 565, |
| "train_speed(iter/s)": 0.032295 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 518.6, |
| "completions/mean_length": 319.6166748046875, |
| "completions/min_length": 181.1, |
| "epoch": 0.23030303030303031, |
| "grad_norm": 0.1996246857202343, |
| "kl": 0.005075836181640625, |
| "learning_rate": 2e-07, |
| "loss": -0.02471494972705841, |
| "memory(GiB)": 113.5, |
| "reward": 0.24166666939854622, |
| "reward_std": 0.2549654275178909, |
| "rewards/MultiModalAccuracyORM/mean": 0.24166666939854622, |
| "rewards/MultiModalAccuracyORM/std": 0.2549654275178909, |
| "step": 570, |
| "train_speed(iter/s)": 0.032297 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 596.0, |
| "completions/mean_length": 338.40833892822263, |
| "completions/min_length": 187.7, |
| "epoch": 0.23232323232323232, |
| "grad_norm": 2.3362669602060033, |
| "kl": 0.00451202392578125, |
| "learning_rate": 2e-07, |
| "loss": 0.03307419717311859, |
| "memory(GiB)": 113.5, |
| "reward": 0.2000000037252903, |
| "reward_std": 0.3081523299217224, |
| "rewards/MultiModalAccuracyORM/mean": 0.2000000037252903, |
| "rewards/MultiModalAccuracyORM/std": 0.3081523299217224, |
| "step": 575, |
| "train_speed(iter/s)": 0.03234 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 498.9, |
| "completions/mean_length": 292.68334197998047, |
| "completions/min_length": 158.3, |
| "epoch": 0.23434343434343435, |
| "grad_norm": 2.8417938649503394, |
| "kl": 0.014810943603515625, |
| "learning_rate": 2e-07, |
| "loss": -0.03590070009231568, |
| "memory(GiB)": 113.5, |
| "reward": 0.3500000089406967, |
| "reward_std": 0.39629932343959806, |
| "rewards/MultiModalAccuracyORM/mean": 0.3500000089406967, |
| "rewards/MultiModalAccuracyORM/std": 0.39629932343959806, |
| "step": 580, |
| "train_speed(iter/s)": 0.032381 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 587.8, |
| "completions/mean_length": 396.57501831054685, |
| "completions/min_length": 239.6, |
| "epoch": 0.23636363636363636, |
| "grad_norm": 0.03715745404820811, |
| "kl": 0.00491180419921875, |
| "learning_rate": 2e-07, |
| "loss": -0.0016106054186820983, |
| "memory(GiB)": 113.5, |
| "reward": 0.25000000596046446, |
| "reward_std": 0.27749558687210085, |
| "rewards/MultiModalAccuracyORM/mean": 0.25000000596046446, |
| "rewards/MultiModalAccuracyORM/std": 0.27749558687210085, |
| "step": 585, |
| "train_speed(iter/s)": 0.032385 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 579.1, |
| "completions/mean_length": 371.41667861938475, |
| "completions/min_length": 206.0, |
| "epoch": 0.2383838383838384, |
| "grad_norm": 2.5904505607936237, |
| "kl": 0.0038330078125, |
| "learning_rate": 2e-07, |
| "loss": -0.0013609230518341064, |
| "memory(GiB)": 113.5, |
| "reward": 0.4416666768491268, |
| "reward_std": 0.3044206529855728, |
| "rewards/MultiModalAccuracyORM/mean": 0.4416666768491268, |
| "rewards/MultiModalAccuracyORM/std": 0.3044206529855728, |
| "step": 590, |
| "train_speed(iter/s)": 0.032404 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 550.8, |
| "completions/mean_length": 332.2250091552734, |
| "completions/min_length": 175.5, |
| "epoch": 0.2404040404040404, |
| "grad_norm": 3.252161568752739, |
| "kl": 0.00532073974609375, |
| "learning_rate": 2e-07, |
| "loss": 0.022338399291038515, |
| "memory(GiB)": 113.5, |
| "reward": 0.316666679084301, |
| "reward_std": 0.35766714811325073, |
| "rewards/MultiModalAccuracyORM/mean": 0.316666679084301, |
| "rewards/MultiModalAccuracyORM/std": 0.35766714811325073, |
| "step": 595, |
| "train_speed(iter/s)": 0.032438 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 631.6, |
| "completions/mean_length": 366.18334045410154, |
| "completions/min_length": 210.1, |
| "epoch": 0.24242424242424243, |
| "grad_norm": 1.7160058152461715, |
| "kl": 0.0050140380859375, |
| "learning_rate": 2e-07, |
| "loss": -0.0045736730098724365, |
| "memory(GiB)": 113.5, |
| "reward": 0.3250000074505806, |
| "reward_std": 0.32682737708091736, |
| "rewards/MultiModalAccuracyORM/mean": 0.3250000074505806, |
| "rewards/MultiModalAccuracyORM/std": 0.32682737708091736, |
| "step": 600, |
| "train_speed(iter/s)": 0.032452 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.016666666666666666, |
| "completions/max_length": 968.4, |
| "completions/mean_length": 407.6500114440918, |
| "completions/min_length": 221.4, |
| "epoch": 0.24444444444444444, |
| "grad_norm": 1.5763528784256282, |
| "kl": 0.0037322998046875, |
| "learning_rate": 2e-07, |
| "loss": 0.003979828953742981, |
| "memory(GiB)": 113.5, |
| "reward": 0.30000000819563866, |
| "reward_std": 0.4196960777044296, |
| "rewards/MultiModalAccuracyORM/mean": 0.30000000819563866, |
| "rewards/MultiModalAccuracyORM/std": 0.4196960777044296, |
| "step": 605, |
| "train_speed(iter/s)": 0.0324 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 473.6, |
| "completions/mean_length": 293.57500610351565, |
| "completions/min_length": 166.8, |
| "epoch": 0.24646464646464647, |
| "grad_norm": 3.2425538671850047, |
| "kl": 0.009912109375, |
| "learning_rate": 2e-07, |
| "loss": 0.024757757782936096, |
| "memory(GiB)": 113.5, |
| "reward": 0.20000000670552254, |
| "reward_std": 0.2184557795524597, |
| "rewards/MultiModalAccuracyORM/mean": 0.20000000670552254, |
| "rewards/MultiModalAccuracyORM/std": 0.2184557795524597, |
| "step": 610, |
| "train_speed(iter/s)": 0.032454 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 468.6, |
| "completions/mean_length": 272.27501068115237, |
| "completions/min_length": 161.5, |
| "epoch": 0.24848484848484848, |
| "grad_norm": 2.9002217301843682, |
| "kl": 0.006238555908203125, |
| "learning_rate": 2e-07, |
| "loss": 0.006809020042419433, |
| "memory(GiB)": 113.5, |
| "reward": 0.2083333373069763, |
| "reward_std": 0.28784283697605134, |
| "rewards/MultiModalAccuracyORM/mean": 0.2083333373069763, |
| "rewards/MultiModalAccuracyORM/std": 0.28784283697605134, |
| "step": 615, |
| "train_speed(iter/s)": 0.032521 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 537.6, |
| "completions/mean_length": 323.6333435058594, |
| "completions/min_length": 178.6, |
| "epoch": 0.2505050505050505, |
| "grad_norm": 1.6543202512317519, |
| "kl": 0.005059814453125, |
| "learning_rate": 2e-07, |
| "loss": -0.013031059503555298, |
| "memory(GiB)": 113.5, |
| "reward": 0.21666667088866234, |
| "reward_std": 0.22625694572925567, |
| "rewards/MultiModalAccuracyORM/mean": 0.21666667088866234, |
| "rewards/MultiModalAccuracyORM/std": 0.22625694572925567, |
| "step": 620, |
| "train_speed(iter/s)": 0.032557 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 582.9, |
| "completions/mean_length": 368.2416793823242, |
| "completions/min_length": 204.2, |
| "epoch": 0.25252525252525254, |
| "grad_norm": 1.9398904097162017, |
| "kl": 0.00662689208984375, |
| "learning_rate": 2e-07, |
| "loss": 0.020694077014923096, |
| "memory(GiB)": 113.5, |
| "reward": 0.20833333805203438, |
| "reward_std": 0.2567190587520599, |
| "rewards/MultiModalAccuracyORM/mean": 0.20833333805203438, |
| "rewards/MultiModalAccuracyORM/std": 0.2567190587520599, |
| "step": 625, |
| "train_speed(iter/s)": 0.032596 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 505.2, |
| "completions/mean_length": 298.1750030517578, |
| "completions/min_length": 161.6, |
| "epoch": 0.2545454545454545, |
| "grad_norm": 3.9909953401900657, |
| "kl": 0.00722808837890625, |
| "learning_rate": 2e-07, |
| "loss": 0.012149769067764282, |
| "memory(GiB)": 113.5, |
| "reward": 0.3500000089406967, |
| "reward_std": 0.21594529151916503, |
| "rewards/MultiModalAccuracyORM/mean": 0.3500000089406967, |
| "rewards/MultiModalAccuracyORM/std": 0.21594529151916503, |
| "step": 630, |
| "train_speed(iter/s)": 0.03264 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 516.6, |
| "completions/mean_length": 335.81667404174806, |
| "completions/min_length": 203.4, |
| "epoch": 0.25656565656565655, |
| "grad_norm": 3.223504719612698, |
| "kl": 0.00595703125, |
| "learning_rate": 2e-07, |
| "loss": 0.0355703592300415, |
| "memory(GiB)": 113.5, |
| "reward": 0.39166667833924296, |
| "reward_std": 0.3838055461645126, |
| "rewards/MultiModalAccuracyORM/mean": 0.39166667833924296, |
| "rewards/MultiModalAccuracyORM/std": 0.3838055461645126, |
| "step": 635, |
| "train_speed(iter/s)": 0.032663 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 661.0, |
| "completions/mean_length": 399.683349609375, |
| "completions/min_length": 215.3, |
| "epoch": 0.2585858585858586, |
| "grad_norm": 0.031047629899617252, |
| "kl": 0.00643310546875, |
| "learning_rate": 2e-07, |
| "loss": -0.002796703577041626, |
| "memory(GiB)": 113.5, |
| "reward": 0.20000000670552254, |
| "reward_std": 0.24866367280483245, |
| "rewards/MultiModalAccuracyORM/mean": 0.20000000670552254, |
| "rewards/MultiModalAccuracyORM/std": 0.24866367280483245, |
| "step": 640, |
| "train_speed(iter/s)": 0.032678 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 473.7, |
| "completions/mean_length": 308.32500915527345, |
| "completions/min_length": 162.9, |
| "epoch": 0.2606060606060606, |
| "grad_norm": 2.661462961010607, |
| "kl": 0.0068939208984375, |
| "learning_rate": 2e-07, |
| "loss": 0.006179103255271911, |
| "memory(GiB)": 113.5, |
| "reward": 0.25833333656191826, |
| "reward_std": 0.2652174860239029, |
| "rewards/MultiModalAccuracyORM/mean": 0.25833333656191826, |
| "rewards/MultiModalAccuracyORM/std": 0.2652174860239029, |
| "step": 645, |
| "train_speed(iter/s)": 0.032733 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 407.6, |
| "completions/mean_length": 246.7083396911621, |
| "completions/min_length": 116.2, |
| "epoch": 0.26262626262626265, |
| "grad_norm": 2.282962166826479, |
| "kl": 0.00615692138671875, |
| "learning_rate": 2e-07, |
| "loss": -0.022863130271434783, |
| "memory(GiB)": 113.5, |
| "reward": 0.22500000149011612, |
| "reward_std": 0.25664491653442384, |
| "rewards/MultiModalAccuracyORM/mean": 0.22500000149011612, |
| "rewards/MultiModalAccuracyORM/std": 0.25664491653442384, |
| "step": 650, |
| "train_speed(iter/s)": 0.032795 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 594.1, |
| "completions/mean_length": 367.6666763305664, |
| "completions/min_length": 205.7, |
| "epoch": 0.26464646464646463, |
| "grad_norm": 2.380599579002688, |
| "kl": 0.0057464599609375, |
| "learning_rate": 2e-07, |
| "loss": -0.013085761666297912, |
| "memory(GiB)": 113.5, |
| "reward": 0.2583333395421505, |
| "reward_std": 0.2993255376815796, |
| "rewards/MultiModalAccuracyORM/mean": 0.2583333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.2993255376815796, |
| "step": 655, |
| "train_speed(iter/s)": 0.032829 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 611.3, |
| "completions/mean_length": 398.7916854858398, |
| "completions/min_length": 212.0, |
| "epoch": 0.26666666666666666, |
| "grad_norm": 2.1060964762409085, |
| "kl": 0.0065460205078125, |
| "learning_rate": 2e-07, |
| "loss": 0.001984366774559021, |
| "memory(GiB)": 113.5, |
| "reward": 0.2583333417773247, |
| "reward_std": 0.35184402465820314, |
| "rewards/MultiModalAccuracyORM/mean": 0.2583333417773247, |
| "rewards/MultiModalAccuracyORM/std": 0.35184402465820314, |
| "step": 660, |
| "train_speed(iter/s)": 0.032832 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 577.5, |
| "completions/mean_length": 379.8500099182129, |
| "completions/min_length": 222.5, |
| "epoch": 0.2686868686868687, |
| "grad_norm": 3.0979902221373083, |
| "kl": 0.00526275634765625, |
| "learning_rate": 2e-07, |
| "loss": -0.00811660885810852, |
| "memory(GiB)": 113.5, |
| "reward": 0.2583333358168602, |
| "reward_std": 0.22446234226226808, |
| "rewards/MultiModalAccuracyORM/mean": 0.2583333358168602, |
| "rewards/MultiModalAccuracyORM/std": 0.22446234226226808, |
| "step": 665, |
| "train_speed(iter/s)": 0.03283 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 442.4, |
| "completions/mean_length": 274.05001068115234, |
| "completions/min_length": 143.8, |
| "epoch": 0.27070707070707073, |
| "grad_norm": 2.886266049614615, |
| "kl": 0.00730133056640625, |
| "learning_rate": 2e-07, |
| "loss": 0.008006072044372559, |
| "memory(GiB)": 113.5, |
| "reward": 0.2500000037252903, |
| "reward_std": 0.3111986219882965, |
| "rewards/MultiModalAccuracyORM/mean": 0.2500000037252903, |
| "rewards/MultiModalAccuracyORM/std": 0.3111986219882965, |
| "step": 670, |
| "train_speed(iter/s)": 0.032864 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 490.8, |
| "completions/mean_length": 304.02500915527344, |
| "completions/min_length": 156.8, |
| "epoch": 0.2727272727272727, |
| "grad_norm": 0.3952238447884339, |
| "kl": 0.0077239990234375, |
| "learning_rate": 2e-07, |
| "loss": 0.036022895574569704, |
| "memory(GiB)": 113.5, |
| "reward": 0.37500001192092897, |
| "reward_std": 0.32858100831508635, |
| "rewards/MultiModalAccuracyORM/mean": 0.37500001192092897, |
| "rewards/MultiModalAccuracyORM/std": 0.32858100831508635, |
| "step": 675, |
| "train_speed(iter/s)": 0.032929 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 632.8, |
| "completions/mean_length": 364.55834197998047, |
| "completions/min_length": 203.6, |
| "epoch": 0.27474747474747474, |
| "grad_norm": 0.0686693440428557, |
| "kl": 0.00585784912109375, |
| "learning_rate": 2e-07, |
| "loss": 0.006394723057746887, |
| "memory(GiB)": 113.5, |
| "reward": 0.18333334103226662, |
| "reward_std": 0.24637180864810942, |
| "rewards/MultiModalAccuracyORM/mean": 0.18333334103226662, |
| "rewards/MultiModalAccuracyORM/std": 0.24637180864810942, |
| "step": 680, |
| "train_speed(iter/s)": 0.032965 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 380.4, |
| "completions/mean_length": 235.7500099182129, |
| "completions/min_length": 118.5, |
| "epoch": 0.2767676767676768, |
| "grad_norm": 2.1797104035382873, |
| "kl": 0.01773681640625, |
| "learning_rate": 2e-07, |
| "loss": 0.008138242363929748, |
| "memory(GiB)": 113.5, |
| "reward": 0.26666667610406875, |
| "reward_std": 0.3862804383039474, |
| "rewards/MultiModalAccuracyORM/mean": 0.26666667610406875, |
| "rewards/MultiModalAccuracyORM/std": 0.3862804383039474, |
| "step": 685, |
| "train_speed(iter/s)": 0.032993 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 707.3, |
| "completions/mean_length": 420.05834503173827, |
| "completions/min_length": 225.0, |
| "epoch": 0.2787878787878788, |
| "grad_norm": 1.2422518482071012, |
| "kl": 0.005517578125, |
| "learning_rate": 2e-07, |
| "loss": -0.025521010160446167, |
| "memory(GiB)": 113.5, |
| "reward": 0.07500000223517418, |
| "reward_std": 0.22218745648860933, |
| "rewards/MultiModalAccuracyORM/mean": 0.07500000223517418, |
| "rewards/MultiModalAccuracyORM/std": 0.22218745648860933, |
| "step": 690, |
| "train_speed(iter/s)": 0.032968 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 555.4, |
| "completions/mean_length": 335.8666763305664, |
| "completions/min_length": 201.2, |
| "epoch": 0.2808080808080808, |
| "grad_norm": 0.8721711597058662, |
| "kl": 0.007273101806640625, |
| "learning_rate": 2e-07, |
| "loss": -0.005113717913627624, |
| "memory(GiB)": 113.5, |
| "reward": 0.2083333395421505, |
| "reward_std": 0.32370694279670714, |
| "rewards/MultiModalAccuracyORM/mean": 0.2083333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.32370694279670714, |
| "step": 695, |
| "train_speed(iter/s)": 0.032988 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 995.9, |
| "completions/mean_length": 439.34168548583983, |
| "completions/min_length": 226.8, |
| "epoch": 0.2828282828282828, |
| "grad_norm": 2.6514991151372906, |
| "kl": 0.00522308349609375, |
| "learning_rate": 2e-07, |
| "loss": 0.03241249620914459, |
| "memory(GiB)": 113.5, |
| "reward": 0.12500000298023223, |
| "reward_std": 0.25916995108127594, |
| "rewards/MultiModalAccuracyORM/mean": 0.12500000298023223, |
| "rewards/MultiModalAccuracyORM/std": 0.25916995108127594, |
| "step": 700, |
| "train_speed(iter/s)": 0.032951 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 338.0, |
| "completions/mean_length": 208.48334121704102, |
| "completions/min_length": 125.4, |
| "epoch": 0.28484848484848485, |
| "grad_norm": 2.9111051216276933, |
| "kl": 0.00951080322265625, |
| "learning_rate": 2e-07, |
| "loss": 0.011016063392162323, |
| "memory(GiB)": 113.5, |
| "reward": 0.49166668131947516, |
| "reward_std": 0.3610968828201294, |
| "rewards/MultiModalAccuracyORM/mean": 0.49166668131947516, |
| "rewards/MultiModalAccuracyORM/std": 0.3610968828201294, |
| "step": 705, |
| "train_speed(iter/s)": 0.033011 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 684.6, |
| "completions/mean_length": 394.20833587646484, |
| "completions/min_length": 217.3, |
| "epoch": 0.2868686868686869, |
| "grad_norm": 0.9791505372375504, |
| "kl": 0.0062164306640625, |
| "learning_rate": 2e-07, |
| "loss": 0.009031829237937928, |
| "memory(GiB)": 113.5, |
| "reward": 0.4000000037252903, |
| "reward_std": 0.2825257331132889, |
| "rewards/MultiModalAccuracyORM/mean": 0.4000000037252903, |
| "rewards/MultiModalAccuracyORM/std": 0.2825257331132889, |
| "step": 710, |
| "train_speed(iter/s)": 0.032996 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 534.9, |
| "completions/mean_length": 316.80834197998047, |
| "completions/min_length": 187.2, |
| "epoch": 0.28888888888888886, |
| "grad_norm": 2.748998227170115, |
| "kl": 0.0071197509765625, |
| "learning_rate": 2e-07, |
| "loss": -0.04136030673980713, |
| "memory(GiB)": 113.5, |
| "reward": 0.17500000745058059, |
| "reward_std": 0.2551840543746948, |
| "rewards/MultiModalAccuracyORM/mean": 0.17500000745058059, |
| "rewards/MultiModalAccuracyORM/std": 0.2551840543746948, |
| "step": 715, |
| "train_speed(iter/s)": 0.033011 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 622.0, |
| "completions/mean_length": 334.85834884643555, |
| "completions/min_length": 158.8, |
| "epoch": 0.2909090909090909, |
| "grad_norm": 2.5712788721497355, |
| "kl": 0.005682373046875, |
| "learning_rate": 2e-07, |
| "loss": 0.014300698041915893, |
| "memory(GiB)": 113.5, |
| "reward": 0.4000000089406967, |
| "reward_std": 0.26816858947277067, |
| "rewards/MultiModalAccuracyORM/mean": 0.4000000089406967, |
| "rewards/MultiModalAccuracyORM/std": 0.26816858947277067, |
| "step": 720, |
| "train_speed(iter/s)": 0.033012 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 613.2, |
| "completions/mean_length": 331.2666763305664, |
| "completions/min_length": 199.3, |
| "epoch": 0.29292929292929293, |
| "grad_norm": 1.8581663152703145, |
| "kl": 0.00664215087890625, |
| "learning_rate": 2e-07, |
| "loss": -0.010144461691379548, |
| "memory(GiB)": 113.5, |
| "reward": 0.21666667386889457, |
| "reward_std": 0.31676994562149047, |
| "rewards/MultiModalAccuracyORM/mean": 0.21666667386889457, |
| "rewards/MultiModalAccuracyORM/std": 0.31676994562149047, |
| "step": 725, |
| "train_speed(iter/s)": 0.033019 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 542.9, |
| "completions/mean_length": 326.3500129699707, |
| "completions/min_length": 181.5, |
| "epoch": 0.29494949494949496, |
| "grad_norm": 0.775697379774875, |
| "kl": 0.00522918701171875, |
| "learning_rate": 2e-07, |
| "loss": 0.0003711044788360596, |
| "memory(GiB)": 113.5, |
| "reward": 0.2333333373069763, |
| "reward_std": 0.3189997851848602, |
| "rewards/MultiModalAccuracyORM/mean": 0.2333333373069763, |
| "rewards/MultiModalAccuracyORM/std": 0.3189997851848602, |
| "step": 730, |
| "train_speed(iter/s)": 0.033016 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 574.2, |
| "completions/mean_length": 330.55001068115234, |
| "completions/min_length": 198.5, |
| "epoch": 0.296969696969697, |
| "grad_norm": 1.5273483945564796, |
| "kl": 0.006597900390625, |
| "learning_rate": 2e-07, |
| "loss": 0.012019181251525879, |
| "memory(GiB)": 113.5, |
| "reward": 0.34166667312383653, |
| "reward_std": 0.4374805331230164, |
| "rewards/MultiModalAccuracyORM/mean": 0.34166667312383653, |
| "rewards/MultiModalAccuracyORM/std": 0.4374805331230164, |
| "step": 735, |
| "train_speed(iter/s)": 0.033045 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 575.7, |
| "completions/mean_length": 381.558349609375, |
| "completions/min_length": 230.5, |
| "epoch": 0.298989898989899, |
| "grad_norm": 2.3594893660788374, |
| "kl": 0.0050323486328125, |
| "learning_rate": 2e-07, |
| "loss": 0.01788020133972168, |
| "memory(GiB)": 113.5, |
| "reward": 0.20000000596046447, |
| "reward_std": 0.3330695390701294, |
| "rewards/MultiModalAccuracyORM/mean": 0.20000000596046447, |
| "rewards/MultiModalAccuracyORM/std": 0.3330695390701294, |
| "step": 740, |
| "train_speed(iter/s)": 0.033058 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 562.7, |
| "completions/mean_length": 324.033349609375, |
| "completions/min_length": 184.4, |
| "epoch": 0.301010101010101, |
| "grad_norm": 2.29708410353418, |
| "kl": 0.00646209716796875, |
| "learning_rate": 2e-07, |
| "loss": -0.009415292739868164, |
| "memory(GiB)": 113.5, |
| "reward": 0.41666667386889455, |
| "reward_std": 0.2529277890920639, |
| "rewards/MultiModalAccuracyORM/mean": 0.41666667386889455, |
| "rewards/MultiModalAccuracyORM/std": 0.2529277890920639, |
| "step": 745, |
| "train_speed(iter/s)": 0.033072 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "grad_norm": 2.3159606947695557, |
| "learning_rate": 2e-07, |
| "loss": 0.006078800559043885, |
| "memory(GiB)": 113.5, |
| "step": 750, |
| "train_speed(iter/s)": 0.033129 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "eval_clip_ratio": 0.0, |
| "eval_completions/clipped_ratio": 0.0, |
| "eval_completions/max_length": 539.88, |
| "eval_completions/mean_length": 336.97334396362305, |
| "eval_completions/min_length": 192.2, |
| "eval_kl": 0.00380157470703125, |
| "eval_loss": 0.01653137058019638, |
| "eval_reward": 0.2800000062584877, |
| "eval_reward_std": 0.28693030297756195, |
| "eval_rewards/MultiModalAccuracyORM/mean": 0.2800000062584877, |
| "eval_rewards/MultiModalAccuracyORM/std": 0.28693030297756195, |
| "eval_runtime": 588.5073, |
| "eval_samples_per_second": 0.085, |
| "eval_steps_per_second": 0.008, |
| "step": 750 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.004166666666666667, |
| "completions/max_length": 541.6, |
| "completions/mean_length": 313.96250953674314, |
| "completions/min_length": 176.25, |
| "epoch": 0.30505050505050507, |
| "grad_norm": 1.6419689379277844, |
| "kl": 0.008066558837890625, |
| "learning_rate": 2e-07, |
| "loss": -0.004991033673286438, |
| "memory(GiB)": 113.5, |
| "reward": 0.31250000894069674, |
| "reward_std": 0.35801745802164076, |
| "rewards/MultiModalAccuracyORM/mean": 0.31250000894069674, |
| "rewards/MultiModalAccuracyORM/std": 0.35801745802164076, |
| "step": 755, |
| "train_speed(iter/s)": 0.031887 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 555.9, |
| "completions/mean_length": 292.9916732788086, |
| "completions/min_length": 150.9, |
| "epoch": 0.30707070707070705, |
| "grad_norm": 2.0844707046825723, |
| "kl": 0.00627288818359375, |
| "learning_rate": 2e-07, |
| "loss": 0.0167288139462471, |
| "memory(GiB)": 113.5, |
| "reward": 0.21666666865348816, |
| "reward_std": 0.3554166704416275, |
| "rewards/MultiModalAccuracyORM/mean": 0.21666666865348816, |
| "rewards/MultiModalAccuracyORM/std": 0.3554166704416275, |
| "step": 760, |
| "train_speed(iter/s)": 0.031901 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 457.4, |
| "completions/mean_length": 296.4166778564453, |
| "completions/min_length": 161.6, |
| "epoch": 0.3090909090909091, |
| "grad_norm": 1.7792521459456232, |
| "kl": 0.01121368408203125, |
| "learning_rate": 2e-07, |
| "loss": 0.017529194056987763, |
| "memory(GiB)": 113.5, |
| "reward": 0.4000000134110451, |
| "reward_std": 0.3734437495470047, |
| "rewards/MultiModalAccuracyORM/mean": 0.4000000134110451, |
| "rewards/MultiModalAccuracyORM/std": 0.3734437495470047, |
| "step": 765, |
| "train_speed(iter/s)": 0.031933 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 414.8, |
| "completions/mean_length": 228.51667175292968, |
| "completions/min_length": 120.5, |
| "epoch": 0.3111111111111111, |
| "grad_norm": 2.1702261558412697, |
| "kl": 0.0090240478515625, |
| "learning_rate": 2e-07, |
| "loss": -0.05565891861915588, |
| "memory(GiB)": 113.5, |
| "reward": 0.15000000596046448, |
| "reward_std": 0.24261613488197326, |
| "rewards/MultiModalAccuracyORM/mean": 0.15000000596046448, |
| "rewards/MultiModalAccuracyORM/std": 0.24261613488197326, |
| "step": 770, |
| "train_speed(iter/s)": 0.031968 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 531.4, |
| "completions/mean_length": 306.7916748046875, |
| "completions/min_length": 165.3, |
| "epoch": 0.31313131313131315, |
| "grad_norm": 1.2794183651984758, |
| "kl": 0.00740814208984375, |
| "learning_rate": 2e-07, |
| "loss": 0.04246864318847656, |
| "memory(GiB)": 113.5, |
| "reward": 0.46666667982935905, |
| "reward_std": 0.4767192959785461, |
| "rewards/MultiModalAccuracyORM/mean": 0.46666667982935905, |
| "rewards/MultiModalAccuracyORM/std": 0.4767192959785461, |
| "step": 775, |
| "train_speed(iter/s)": 0.032 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 634.4, |
| "completions/mean_length": 415.7916763305664, |
| "completions/min_length": 231.3, |
| "epoch": 0.3151515151515151, |
| "grad_norm": 1.1135361589863462, |
| "kl": 0.00513763427734375, |
| "learning_rate": 2e-07, |
| "loss": 0.028287124633789063, |
| "memory(GiB)": 113.5, |
| "reward": 0.15000000596046448, |
| "reward_std": 0.18482151329517366, |
| "rewards/MultiModalAccuracyORM/mean": 0.15000000596046448, |
| "rewards/MultiModalAccuracyORM/std": 0.18482151329517366, |
| "step": 780, |
| "train_speed(iter/s)": 0.031996 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 629.5, |
| "completions/mean_length": 379.56668243408205, |
| "completions/min_length": 220.9, |
| "epoch": 0.31717171717171716, |
| "grad_norm": 2.365467793016899, |
| "kl": 0.0066986083984375, |
| "learning_rate": 2e-07, |
| "loss": -0.014639610052108764, |
| "memory(GiB)": 113.5, |
| "reward": 0.1833333358168602, |
| "reward_std": 0.20363159477710724, |
| "rewards/MultiModalAccuracyORM/mean": 0.1833333358168602, |
| "rewards/MultiModalAccuracyORM/std": 0.20363159477710724, |
| "step": 785, |
| "train_speed(iter/s)": 0.032005 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 409.3, |
| "completions/mean_length": 267.75834503173826, |
| "completions/min_length": 155.6, |
| "epoch": 0.3191919191919192, |
| "grad_norm": 1.7124152646997672, |
| "kl": 0.00589141845703125, |
| "learning_rate": 2e-07, |
| "loss": 0.001770263910293579, |
| "memory(GiB)": 113.5, |
| "reward": 0.33333333730697634, |
| "reward_std": 0.29483942985534667, |
| "rewards/MultiModalAccuracyORM/mean": 0.33333333730697634, |
| "rewards/MultiModalAccuracyORM/std": 0.29483942985534667, |
| "step": 790, |
| "train_speed(iter/s)": 0.032028 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 566.7, |
| "completions/mean_length": 331.6916763305664, |
| "completions/min_length": 190.2, |
| "epoch": 0.3212121212121212, |
| "grad_norm": 2.1658449229692316, |
| "kl": 0.0065338134765625, |
| "learning_rate": 2e-07, |
| "loss": 0.018888431787490844, |
| "memory(GiB)": 113.5, |
| "reward": 0.22500001192092894, |
| "reward_std": 0.3477985322475433, |
| "rewards/MultiModalAccuracyORM/mean": 0.22500001192092894, |
| "rewards/MultiModalAccuracyORM/std": 0.3477985322475433, |
| "step": 795, |
| "train_speed(iter/s)": 0.032028 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 566.9, |
| "completions/mean_length": 319.0250076293945, |
| "completions/min_length": 159.2, |
| "epoch": 0.32323232323232326, |
| "grad_norm": 0.10193444456144864, |
| "kl": 0.0068878173828125, |
| "learning_rate": 2e-07, |
| "loss": 0.008858251571655273, |
| "memory(GiB)": 113.5, |
| "reward": 0.1916666693985462, |
| "reward_std": 0.2567190587520599, |
| "rewards/MultiModalAccuracyORM/mean": 0.1916666693985462, |
| "rewards/MultiModalAccuracyORM/std": 0.2567190587520599, |
| "step": 800, |
| "train_speed(iter/s)": 0.032045 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 522.7, |
| "completions/mean_length": 316.66667556762695, |
| "completions/min_length": 179.2, |
| "epoch": 0.32525252525252524, |
| "grad_norm": 2.315498401390807, |
| "kl": 0.00754852294921875, |
| "learning_rate": 2e-07, |
| "loss": -0.002603813260793686, |
| "memory(GiB)": 113.5, |
| "reward": 0.33333333656191827, |
| "reward_std": 0.2722736746072769, |
| "rewards/MultiModalAccuracyORM/mean": 0.33333333656191827, |
| "rewards/MultiModalAccuracyORM/std": 0.2722736746072769, |
| "step": 805, |
| "train_speed(iter/s)": 0.03204 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 675.0, |
| "completions/mean_length": 342.4083450317383, |
| "completions/min_length": 170.8, |
| "epoch": 0.32727272727272727, |
| "grad_norm": 3.1462959818853165, |
| "kl": 0.006378173828125, |
| "learning_rate": 2e-07, |
| "loss": -0.010855591297149659, |
| "memory(GiB)": 113.5, |
| "reward": 0.1916666693985462, |
| "reward_std": 0.3259988039731979, |
| "rewards/MultiModalAccuracyORM/mean": 0.1916666693985462, |
| "rewards/MultiModalAccuracyORM/std": 0.3259988039731979, |
| "step": 810, |
| "train_speed(iter/s)": 0.032048 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 674.7, |
| "completions/mean_length": 386.7166778564453, |
| "completions/min_length": 189.5, |
| "epoch": 0.3292929292929293, |
| "grad_norm": 2.441646562638453, |
| "kl": 0.008112335205078125, |
| "learning_rate": 2e-07, |
| "loss": 0.022695478796958924, |
| "memory(GiB)": 113.5, |
| "reward": 0.30833333507180216, |
| "reward_std": 0.29793586432933805, |
| "rewards/MultiModalAccuracyORM/mean": 0.30833333507180216, |
| "rewards/MultiModalAccuracyORM/std": 0.29793586432933805, |
| "step": 815, |
| "train_speed(iter/s)": 0.032056 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 549.7, |
| "completions/mean_length": 339.0166732788086, |
| "completions/min_length": 194.4, |
| "epoch": 0.33131313131313134, |
| "grad_norm": 1.020422275315147, |
| "kl": 0.0112762451171875, |
| "learning_rate": 2e-07, |
| "loss": 0.05103216171264648, |
| "memory(GiB)": 113.5, |
| "reward": 0.22500000670552253, |
| "reward_std": 0.2956440031528473, |
| "rewards/MultiModalAccuracyORM/mean": 0.22500000670552253, |
| "rewards/MultiModalAccuracyORM/std": 0.2956440031528473, |
| "step": 820, |
| "train_speed(iter/s)": 0.032046 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 543.8, |
| "completions/mean_length": 313.6000061035156, |
| "completions/min_length": 160.3, |
| "epoch": 0.3333333333333333, |
| "grad_norm": 3.530685574433075, |
| "kl": 0.00774993896484375, |
| "learning_rate": 2e-07, |
| "loss": 0.0580863893032074, |
| "memory(GiB)": 113.5, |
| "reward": 0.21666667535901069, |
| "reward_std": 0.31899061501026155, |
| "rewards/MultiModalAccuracyORM/mean": 0.21666667535901069, |
| "rewards/MultiModalAccuracyORM/std": 0.31899061501026155, |
| "step": 825, |
| "train_speed(iter/s)": 0.03206 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 425.5, |
| "completions/mean_length": 246.05833740234374, |
| "completions/min_length": 134.2, |
| "epoch": 0.33535353535353535, |
| "grad_norm": 0.05179156879937817, |
| "kl": 0.007355499267578125, |
| "learning_rate": 2e-07, |
| "loss": 0.0357688844203949, |
| "memory(GiB)": 113.5, |
| "reward": 0.2750000074505806, |
| "reward_std": 0.20817729830741882, |
| "rewards/MultiModalAccuracyORM/mean": 0.2750000074505806, |
| "rewards/MultiModalAccuracyORM/std": 0.20817729830741882, |
| "step": 830, |
| "train_speed(iter/s)": 0.03208 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 650.0, |
| "completions/mean_length": 445.533349609375, |
| "completions/min_length": 285.9, |
| "epoch": 0.3373737373737374, |
| "grad_norm": 2.4237696716807413, |
| "kl": 0.005771636962890625, |
| "learning_rate": 2e-07, |
| "loss": 0.0007819652557373047, |
| "memory(GiB)": 113.5, |
| "reward": 0.31666666865348814, |
| "reward_std": 0.3596066445112228, |
| "rewards/MultiModalAccuracyORM/mean": 0.31666666865348814, |
| "rewards/MultiModalAccuracyORM/std": 0.3596066445112228, |
| "step": 835, |
| "train_speed(iter/s)": 0.032078 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 492.1, |
| "completions/mean_length": 283.40834350585936, |
| "completions/min_length": 164.0, |
| "epoch": 0.3393939393939394, |
| "grad_norm": 2.6192745381364615, |
| "kl": 0.00804901123046875, |
| "learning_rate": 2e-07, |
| "loss": 0.04405608177185059, |
| "memory(GiB)": 113.5, |
| "reward": 0.43333334401249884, |
| "reward_std": 0.2840515673160553, |
| "rewards/MultiModalAccuracyORM/mean": 0.43333334401249884, |
| "rewards/MultiModalAccuracyORM/std": 0.2840515673160553, |
| "step": 840, |
| "train_speed(iter/s)": 0.032104 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 560.4, |
| "completions/mean_length": 347.0500091552734, |
| "completions/min_length": 163.6, |
| "epoch": 0.3414141414141414, |
| "grad_norm": 2.7151298756229827, |
| "kl": 0.00639801025390625, |
| "learning_rate": 2e-07, |
| "loss": -0.004790738224983215, |
| "memory(GiB)": 113.5, |
| "reward": 0.4333333484828472, |
| "reward_std": 0.39859413504600527, |
| "rewards/MultiModalAccuracyORM/mean": 0.4333333484828472, |
| "rewards/MultiModalAccuracyORM/std": 0.39859413504600527, |
| "step": 845, |
| "train_speed(iter/s)": 0.032112 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 445.8, |
| "completions/mean_length": 273.2666717529297, |
| "completions/min_length": 157.8, |
| "epoch": 0.3434343434343434, |
| "grad_norm": 1.030614252722568, |
| "kl": 0.0097747802734375, |
| "learning_rate": 2e-07, |
| "loss": 0.0008672773838043213, |
| "memory(GiB)": 113.5, |
| "reward": 0.14166667237877845, |
| "reward_std": 0.28624823689460754, |
| "rewards/MultiModalAccuracyORM/mean": 0.14166667237877845, |
| "rewards/MultiModalAccuracyORM/std": 0.28624823689460754, |
| "step": 850, |
| "train_speed(iter/s)": 0.032141 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 331.9, |
| "completions/mean_length": 196.70833892822264, |
| "completions/min_length": 103.7, |
| "epoch": 0.34545454545454546, |
| "grad_norm": 4.600894892489762, |
| "kl": 0.00904083251953125, |
| "learning_rate": 2e-07, |
| "loss": -0.002990037202835083, |
| "memory(GiB)": 113.5, |
| "reward": 0.35000001043081286, |
| "reward_std": 0.2511145621538162, |
| "rewards/MultiModalAccuracyORM/mean": 0.35000001043081286, |
| "rewards/MultiModalAccuracyORM/std": 0.2511145621538162, |
| "step": 855, |
| "train_speed(iter/s)": 0.0322 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 424.7, |
| "completions/mean_length": 280.9416793823242, |
| "completions/min_length": 175.6, |
| "epoch": 0.3474747474747475, |
| "grad_norm": 2.236927092635949, |
| "kl": 0.0068603515625, |
| "learning_rate": 2e-07, |
| "loss": 0.034914878010749814, |
| "memory(GiB)": 113.5, |
| "reward": 0.27500000670552255, |
| "reward_std": 0.28004167079925535, |
| "rewards/MultiModalAccuracyORM/mean": 0.27500000670552255, |
| "rewards/MultiModalAccuracyORM/std": 0.28004167079925535, |
| "step": 860, |
| "train_speed(iter/s)": 0.032218 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 553.4, |
| "completions/mean_length": 340.1083419799805, |
| "completions/min_length": 183.8, |
| "epoch": 0.34949494949494947, |
| "grad_norm": 3.1857406542737943, |
| "kl": 0.00835723876953125, |
| "learning_rate": 2e-07, |
| "loss": 0.019358628988265993, |
| "memory(GiB)": 113.5, |
| "reward": 0.21666667088866234, |
| "reward_std": 0.25585488975048065, |
| "rewards/MultiModalAccuracyORM/mean": 0.21666667088866234, |
| "rewards/MultiModalAccuracyORM/std": 0.25585488975048065, |
| "step": 865, |
| "train_speed(iter/s)": 0.032254 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 570.8, |
| "completions/mean_length": 342.0833381652832, |
| "completions/min_length": 181.3, |
| "epoch": 0.3515151515151515, |
| "grad_norm": 2.5743781015760714, |
| "kl": 0.00620880126953125, |
| "learning_rate": 2e-07, |
| "loss": -0.019692707061767577, |
| "memory(GiB)": 113.5, |
| "reward": 0.2083333358168602, |
| "reward_std": 0.23004821836948394, |
| "rewards/MultiModalAccuracyORM/mean": 0.2083333358168602, |
| "rewards/MultiModalAccuracyORM/std": 0.23004821836948394, |
| "step": 870, |
| "train_speed(iter/s)": 0.032261 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 527.4, |
| "completions/mean_length": 339.12501220703126, |
| "completions/min_length": 174.0, |
| "epoch": 0.35353535353535354, |
| "grad_norm": 3.184656199614579, |
| "kl": 0.00756072998046875, |
| "learning_rate": 2e-07, |
| "loss": 0.016688653826713563, |
| "memory(GiB)": 113.5, |
| "reward": 0.39166667610406875, |
| "reward_std": 0.37845527231693266, |
| "rewards/MultiModalAccuracyORM/mean": 0.39166667610406875, |
| "rewards/MultiModalAccuracyORM/std": 0.37845527231693266, |
| "step": 875, |
| "train_speed(iter/s)": 0.032289 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 555.3, |
| "completions/mean_length": 336.61668090820314, |
| "completions/min_length": 199.1, |
| "epoch": 0.35555555555555557, |
| "grad_norm": 1.8292091239029376, |
| "kl": 0.006783294677734375, |
| "learning_rate": 2e-07, |
| "loss": -0.0035984992980957033, |
| "memory(GiB)": 113.5, |
| "reward": 0.25000000521540644, |
| "reward_std": 0.353110259771347, |
| "rewards/MultiModalAccuracyORM/mean": 0.25000000521540644, |
| "rewards/MultiModalAccuracyORM/std": 0.353110259771347, |
| "step": 880, |
| "train_speed(iter/s)": 0.032303 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 644.7, |
| "completions/mean_length": 367.0833480834961, |
| "completions/min_length": 201.2, |
| "epoch": 0.3575757575757576, |
| "grad_norm": 2.157154554024042, |
| "kl": 0.0074066162109375, |
| "learning_rate": 2e-07, |
| "loss": -0.012543225288391113, |
| "memory(GiB)": 113.5, |
| "reward": 0.2666666693985462, |
| "reward_std": 0.292328941822052, |
| "rewards/MultiModalAccuracyORM/mean": 0.2666666693985462, |
| "rewards/MultiModalAccuracyORM/std": 0.292328941822052, |
| "step": 885, |
| "train_speed(iter/s)": 0.032311 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 677.1, |
| "completions/mean_length": 368.5666778564453, |
| "completions/min_length": 197.0, |
| "epoch": 0.3595959595959596, |
| "grad_norm": 1.8591339481325562, |
| "kl": 0.01016082763671875, |
| "learning_rate": 2e-07, |
| "loss": -0.015211772918701173, |
| "memory(GiB)": 113.5, |
| "reward": 0.22500000670552253, |
| "reward_std": 0.3802089035511017, |
| "rewards/MultiModalAccuracyORM/mean": 0.22500000670552253, |
| "rewards/MultiModalAccuracyORM/std": 0.3802089035511017, |
| "step": 890, |
| "train_speed(iter/s)": 0.032317 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 713.9, |
| "completions/mean_length": 334.3000091552734, |
| "completions/min_length": 169.7, |
| "epoch": 0.3616161616161616, |
| "grad_norm": 1.891661158050905, |
| "kl": 0.00751495361328125, |
| "learning_rate": 2e-07, |
| "loss": 0.057868242263793945, |
| "memory(GiB)": 113.5, |
| "reward": 0.2833333373069763, |
| "reward_std": 0.36168283224105835, |
| "rewards/MultiModalAccuracyORM/mean": 0.2833333373069763, |
| "rewards/MultiModalAccuracyORM/std": 0.36168283224105835, |
| "step": 895, |
| "train_speed(iter/s)": 0.03232 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 572.6, |
| "completions/mean_length": 367.8083435058594, |
| "completions/min_length": 193.6, |
| "epoch": 0.36363636363636365, |
| "grad_norm": 2.944909454157867, |
| "kl": 0.0079620361328125, |
| "learning_rate": 2e-07, |
| "loss": 0.003379705175757408, |
| "memory(GiB)": 113.5, |
| "reward": 0.17500000670552254, |
| "reward_std": 0.22300148010253906, |
| "rewards/MultiModalAccuracyORM/mean": 0.17500000670552254, |
| "rewards/MultiModalAccuracyORM/std": 0.22300148010253906, |
| "step": 900, |
| "train_speed(iter/s)": 0.032324 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 724.4, |
| "completions/mean_length": 414.4750144958496, |
| "completions/min_length": 241.5, |
| "epoch": 0.3656565656565657, |
| "grad_norm": 1.0572142583091821, |
| "kl": 0.00611724853515625, |
| "learning_rate": 2e-07, |
| "loss": 0.02717306911945343, |
| "memory(GiB)": 113.5, |
| "reward": 0.3083333417773247, |
| "reward_std": 0.27447034418582916, |
| "rewards/MultiModalAccuracyORM/mean": 0.3083333417773247, |
| "rewards/MultiModalAccuracyORM/std": 0.27447034418582916, |
| "step": 905, |
| "train_speed(iter/s)": 0.03233 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 549.5, |
| "completions/mean_length": 329.6416717529297, |
| "completions/min_length": 166.2, |
| "epoch": 0.36767676767676766, |
| "grad_norm": 1.806687314036588, |
| "kl": 0.0089080810546875, |
| "learning_rate": 2e-07, |
| "loss": 0.010141277313232422, |
| "memory(GiB)": 113.5, |
| "reward": 0.391666679084301, |
| "reward_std": 0.40894138514995576, |
| "rewards/MultiModalAccuracyORM/mean": 0.391666679084301, |
| "rewards/MultiModalAccuracyORM/std": 0.40894138514995576, |
| "step": 910, |
| "train_speed(iter/s)": 0.032344 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 548.1, |
| "completions/mean_length": 302.3416748046875, |
| "completions/min_length": 159.3, |
| "epoch": 0.3696969696969697, |
| "grad_norm": 2.825820952489286, |
| "kl": 0.0180572509765625, |
| "learning_rate": 2e-07, |
| "loss": 0.011392435431480408, |
| "memory(GiB)": 113.5, |
| "reward": 0.1916666731238365, |
| "reward_std": 0.33297434747219085, |
| "rewards/MultiModalAccuracyORM/mean": 0.1916666731238365, |
| "rewards/MultiModalAccuracyORM/std": 0.33297434747219085, |
| "step": 915, |
| "train_speed(iter/s)": 0.032356 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 512.4, |
| "completions/mean_length": 327.7416717529297, |
| "completions/min_length": 178.4, |
| "epoch": 0.3717171717171717, |
| "grad_norm": 2.438516683028765, |
| "kl": 0.00719757080078125, |
| "learning_rate": 2e-07, |
| "loss": 0.037606388330459595, |
| "memory(GiB)": 113.5, |
| "reward": 0.23333333879709245, |
| "reward_std": 0.3543280869722366, |
| "rewards/MultiModalAccuracyORM/mean": 0.23333333879709245, |
| "rewards/MultiModalAccuracyORM/std": 0.3543280869722366, |
| "step": 920, |
| "train_speed(iter/s)": 0.032355 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 520.9, |
| "completions/mean_length": 351.9500076293945, |
| "completions/min_length": 190.7, |
| "epoch": 0.37373737373737376, |
| "grad_norm": 2.1782598398370943, |
| "kl": 0.006235504150390625, |
| "learning_rate": 2e-07, |
| "loss": -0.007940790057182312, |
| "memory(GiB)": 113.5, |
| "reward": 0.2250000096857548, |
| "reward_std": 0.3659113526344299, |
| "rewards/MultiModalAccuracyORM/mean": 0.2250000096857548, |
| "rewards/MultiModalAccuracyORM/std": 0.3659113526344299, |
| "step": 925, |
| "train_speed(iter/s)": 0.032383 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 603.8, |
| "completions/mean_length": 379.47501373291016, |
| "completions/min_length": 206.3, |
| "epoch": 0.37575757575757573, |
| "grad_norm": 2.304852196233359, |
| "kl": 0.0072235107421875, |
| "learning_rate": 2e-07, |
| "loss": 0.03286640048027038, |
| "memory(GiB)": 113.5, |
| "reward": 0.34166667312383653, |
| "reward_std": 0.44222086369991304, |
| "rewards/MultiModalAccuracyORM/mean": 0.34166667312383653, |
| "rewards/MultiModalAccuracyORM/std": 0.44222086369991304, |
| "step": 930, |
| "train_speed(iter/s)": 0.032396 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 512.6, |
| "completions/mean_length": 323.35000762939455, |
| "completions/min_length": 192.7, |
| "epoch": 0.37777777777777777, |
| "grad_norm": 2.9791049041845494, |
| "kl": 0.01037445068359375, |
| "learning_rate": 2e-07, |
| "loss": -0.007777485251426697, |
| "memory(GiB)": 113.5, |
| "reward": 0.25000000447034837, |
| "reward_std": 0.35737437903881075, |
| "rewards/MultiModalAccuracyORM/mean": 0.25000000447034837, |
| "rewards/MultiModalAccuracyORM/std": 0.35737437903881075, |
| "step": 935, |
| "train_speed(iter/s)": 0.032395 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 543.9, |
| "completions/mean_length": 280.97500762939455, |
| "completions/min_length": 138.2, |
| "epoch": 0.3797979797979798, |
| "grad_norm": 1.881006300919645, |
| "kl": 0.013104248046875, |
| "learning_rate": 2e-07, |
| "loss": 0.02690579891204834, |
| "memory(GiB)": 113.5, |
| "reward": 0.29166667759418485, |
| "reward_std": 0.337774270772934, |
| "rewards/MultiModalAccuracyORM/mean": 0.29166667759418485, |
| "rewards/MultiModalAccuracyORM/std": 0.337774270772934, |
| "step": 940, |
| "train_speed(iter/s)": 0.032434 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 505.1, |
| "completions/mean_length": 295.9500076293945, |
| "completions/min_length": 174.9, |
| "epoch": 0.38181818181818183, |
| "grad_norm": 3.313912407777126, |
| "kl": 0.00870513916015625, |
| "learning_rate": 2e-07, |
| "loss": -0.032750940322875975, |
| "memory(GiB)": 113.5, |
| "reward": 0.40000000819563863, |
| "reward_std": 0.45158345997333527, |
| "rewards/MultiModalAccuracyORM/mean": 0.40000000819563863, |
| "rewards/MultiModalAccuracyORM/std": 0.45158345997333527, |
| "step": 945, |
| "train_speed(iter/s)": 0.032469 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 443.3, |
| "completions/mean_length": 262.3000038146973, |
| "completions/min_length": 133.7, |
| "epoch": 0.3838383838383838, |
| "grad_norm": 2.7566340852478053, |
| "kl": 0.00853729248046875, |
| "learning_rate": 2e-07, |
| "loss": 0.018448495864868165, |
| "memory(GiB)": 113.5, |
| "reward": 0.4083333440124989, |
| "reward_std": 0.2674977511167526, |
| "rewards/MultiModalAccuracyORM/mean": 0.4083333440124989, |
| "rewards/MultiModalAccuracyORM/std": 0.2674977511167526, |
| "step": 950, |
| "train_speed(iter/s)": 0.032489 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 658.0, |
| "completions/mean_length": 363.36668090820314, |
| "completions/min_length": 190.9, |
| "epoch": 0.38585858585858585, |
| "grad_norm": 1.6957648809966595, |
| "kl": 0.0067291259765625, |
| "learning_rate": 2e-07, |
| "loss": -0.02898831069469452, |
| "memory(GiB)": 113.5, |
| "reward": 0.2000000074505806, |
| "reward_std": 0.32902404963970183, |
| "rewards/MultiModalAccuracyORM/mean": 0.2000000074505806, |
| "rewards/MultiModalAccuracyORM/std": 0.32902404963970183, |
| "step": 955, |
| "train_speed(iter/s)": 0.032506 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 480.1, |
| "completions/mean_length": 307.16667022705076, |
| "completions/min_length": 173.2, |
| "epoch": 0.3878787878787879, |
| "grad_norm": 2.6324617057971755, |
| "kl": 0.0082183837890625, |
| "learning_rate": 2e-07, |
| "loss": 0.010876613110303879, |
| "memory(GiB)": 113.5, |
| "reward": 0.2916666708886623, |
| "reward_std": 0.3953502655029297, |
| "rewards/MultiModalAccuracyORM/mean": 0.2916666708886623, |
| "rewards/MultiModalAccuracyORM/std": 0.3953502655029297, |
| "step": 960, |
| "train_speed(iter/s)": 0.032526 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 523.3, |
| "completions/mean_length": 310.78333892822263, |
| "completions/min_length": 176.0, |
| "epoch": 0.3898989898989899, |
| "grad_norm": 0.25204548209314886, |
| "kl": 0.01051025390625, |
| "learning_rate": 2e-07, |
| "loss": 0.05701416730880737, |
| "memory(GiB)": 113.5, |
| "reward": 0.2500000029802322, |
| "reward_std": 0.2885732680559158, |
| "rewards/MultiModalAccuracyORM/mean": 0.2500000029802322, |
| "rewards/MultiModalAccuracyORM/std": 0.2885732680559158, |
| "step": 965, |
| "train_speed(iter/s)": 0.032526 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 592.1, |
| "completions/mean_length": 354.4916687011719, |
| "completions/min_length": 207.3, |
| "epoch": 0.39191919191919194, |
| "grad_norm": 1.8105174117337208, |
| "kl": 0.00800018310546875, |
| "learning_rate": 2e-07, |
| "loss": 0.008932539820671081, |
| "memory(GiB)": 113.5, |
| "reward": 0.18333333730697632, |
| "reward_std": 0.3538196414709091, |
| "rewards/MultiModalAccuracyORM/mean": 0.18333333730697632, |
| "rewards/MultiModalAccuracyORM/std": 0.3538196414709091, |
| "step": 970, |
| "train_speed(iter/s)": 0.032536 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 615.5, |
| "completions/mean_length": 375.68334503173827, |
| "completions/min_length": 236.0, |
| "epoch": 0.3939393939393939, |
| "grad_norm": 1.4251930411180411, |
| "kl": 0.00720672607421875, |
| "learning_rate": 2e-07, |
| "loss": -0.04558621346950531, |
| "memory(GiB)": 113.5, |
| "reward": 0.10833333507180214, |
| "reward_std": 0.2549058347940445, |
| "rewards/MultiModalAccuracyORM/mean": 0.10833333507180214, |
| "rewards/MultiModalAccuracyORM/std": 0.2549058347940445, |
| "step": 975, |
| "train_speed(iter/s)": 0.032554 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 703.2, |
| "completions/mean_length": 440.5833435058594, |
| "completions/min_length": 218.4, |
| "epoch": 0.39595959595959596, |
| "grad_norm": 1.8329415728640532, |
| "kl": 0.0074310302734375, |
| "learning_rate": 2e-07, |
| "loss": -0.004531031847000122, |
| "memory(GiB)": 113.5, |
| "reward": 0.2666666738688946, |
| "reward_std": 0.351182359457016, |
| "rewards/MultiModalAccuracyORM/mean": 0.2666666738688946, |
| "rewards/MultiModalAccuracyORM/std": 0.351182359457016, |
| "step": 980, |
| "train_speed(iter/s)": 0.032558 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 425.7, |
| "completions/mean_length": 267.5916748046875, |
| "completions/min_length": 165.0, |
| "epoch": 0.397979797979798, |
| "grad_norm": 2.742069878873229, |
| "kl": 0.05179443359375, |
| "learning_rate": 2e-07, |
| "loss": 0.019256360828876495, |
| "memory(GiB)": 113.5, |
| "reward": 0.33333334028720857, |
| "reward_std": 0.3274982154369354, |
| "rewards/MultiModalAccuracyORM/mean": 0.33333334028720857, |
| "rewards/MultiModalAccuracyORM/std": 0.3274982154369354, |
| "step": 985, |
| "train_speed(iter/s)": 0.032587 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 638.6, |
| "completions/mean_length": 360.90001068115237, |
| "completions/min_length": 205.4, |
| "epoch": 0.4, |
| "grad_norm": 3.049274715544681, |
| "kl": 0.00958404541015625, |
| "learning_rate": 2e-07, |
| "loss": -0.033705079555511476, |
| "memory(GiB)": 113.5, |
| "reward": 0.31666667610406873, |
| "reward_std": 0.27122942507267, |
| "rewards/MultiModalAccuracyORM/mean": 0.31666667610406873, |
| "rewards/MultiModalAccuracyORM/std": 0.27122942507267, |
| "step": 990, |
| "train_speed(iter/s)": 0.032615 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 526.2, |
| "completions/mean_length": 330.02501220703124, |
| "completions/min_length": 198.8, |
| "epoch": 0.402020202020202, |
| "grad_norm": 2.6515591125640574, |
| "kl": 0.0110382080078125, |
| "learning_rate": 2e-07, |
| "loss": 0.008444187045097352, |
| "memory(GiB)": 113.5, |
| "reward": 0.41666667982935907, |
| "reward_std": 0.4297270834445953, |
| "rewards/MultiModalAccuracyORM/mean": 0.41666667982935907, |
| "rewards/MultiModalAccuracyORM/std": 0.4297270834445953, |
| "step": 995, |
| "train_speed(iter/s)": 0.032638 |
| }, |
| { |
| "epoch": 0.40404040404040403, |
| "grad_norm": 1.6423776292289114, |
| "learning_rate": 2e-07, |
| "loss": -0.0013245075941085815, |
| "memory(GiB)": 113.5, |
| "step": 1000, |
| "train_speed(iter/s)": 0.032641 |
| }, |
| { |
| "epoch": 0.40404040404040403, |
| "eval_clip_ratio": 0.0, |
| "eval_completions/clipped_ratio": 0.0, |
| "eval_completions/max_length": 565.38, |
| "eval_completions/mean_length": 346.96667633056643, |
| "eval_completions/min_length": 203.6, |
| "eval_kl": 0.00558807373046875, |
| "eval_loss": 0.016358518972992897, |
| "eval_reward": 0.3083333417773247, |
| "eval_reward_std": 0.3403226917982101, |
| "eval_rewards/MultiModalAccuracyORM/mean": 0.3083333417773247, |
| "eval_rewards/MultiModalAccuracyORM/std": 0.3403226917982101, |
| "eval_runtime": 586.662, |
| "eval_samples_per_second": 0.085, |
| "eval_steps_per_second": 0.009, |
| "step": 1000 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.004166666666666667, |
| "completions/max_length": 608.15, |
| "completions/mean_length": 370.19167709350586, |
| "completions/min_length": 202.85, |
| "epoch": 0.40606060606060607, |
| "grad_norm": 2.014189773891532, |
| "kl": 0.009693145751953125, |
| "learning_rate": 2e-07, |
| "loss": 0.026693809032440185, |
| "memory(GiB)": 113.5, |
| "reward": 0.22500000484287738, |
| "reward_std": 0.2774069786071777, |
| "rewards/MultiModalAccuracyORM/mean": 0.22500000484287738, |
| "rewards/MultiModalAccuracyORM/std": 0.2774069786071777, |
| "step": 1005, |
| "train_speed(iter/s)": 0.031849 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 707.9, |
| "completions/mean_length": 427.22500762939455, |
| "completions/min_length": 238.0, |
| "epoch": 0.4080808080808081, |
| "grad_norm": 2.2096007474060633, |
| "kl": 0.00854034423828125, |
| "learning_rate": 2e-07, |
| "loss": -0.01839480996131897, |
| "memory(GiB)": 113.5, |
| "reward": 0.19166667014360428, |
| "reward_std": 0.23004821836948394, |
| "rewards/MultiModalAccuracyORM/mean": 0.19166667014360428, |
| "rewards/MultiModalAccuracyORM/std": 0.23004821836948394, |
| "step": 1010, |
| "train_speed(iter/s)": 0.031846 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 597.1, |
| "completions/mean_length": 383.8250106811523, |
| "completions/min_length": 206.1, |
| "epoch": 0.4101010101010101, |
| "grad_norm": 2.360953993727072, |
| "kl": 0.00855560302734375, |
| "learning_rate": 2e-07, |
| "loss": -0.03324509263038635, |
| "memory(GiB)": 113.5, |
| "reward": 0.46666667610406876, |
| "reward_std": 0.36664178371429446, |
| "rewards/MultiModalAccuracyORM/mean": 0.46666667610406876, |
| "rewards/MultiModalAccuracyORM/std": 0.36664178371429446, |
| "step": 1015, |
| "train_speed(iter/s)": 0.031859 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 668.4, |
| "completions/mean_length": 396.28334350585936, |
| "completions/min_length": 187.6, |
| "epoch": 0.4121212121212121, |
| "grad_norm": 1.1532109667394932, |
| "kl": 0.00652008056640625, |
| "learning_rate": 2e-07, |
| "loss": 0.012686711549758912, |
| "memory(GiB)": 113.5, |
| "reward": 0.23333333805203438, |
| "reward_std": 0.3129522502422333, |
| "rewards/MultiModalAccuracyORM/mean": 0.23333333805203438, |
| "rewards/MultiModalAccuracyORM/std": 0.3129522502422333, |
| "step": 1020, |
| "train_speed(iter/s)": 0.031867 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 524.3, |
| "completions/mean_length": 314.48334045410155, |
| "completions/min_length": 181.4, |
| "epoch": 0.41414141414141414, |
| "grad_norm": 2.3285330234433017, |
| "kl": 0.01016845703125, |
| "learning_rate": 2e-07, |
| "loss": -0.00456441193819046, |
| "memory(GiB)": 113.5, |
| "reward": 0.35000001043081286, |
| "reward_std": 0.36670138239860534, |
| "rewards/MultiModalAccuracyORM/mean": 0.35000001043081286, |
| "rewards/MultiModalAccuracyORM/std": 0.36670138239860534, |
| "step": 1025, |
| "train_speed(iter/s)": 0.03188 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 679.1, |
| "completions/mean_length": 417.8000122070313, |
| "completions/min_length": 228.2, |
| "epoch": 0.4161616161616162, |
| "grad_norm": 4.123945619995185, |
| "kl": 0.0071319580078125, |
| "learning_rate": 2e-07, |
| "loss": -0.015000586211681367, |
| "memory(GiB)": 113.5, |
| "reward": 0.30833334252238276, |
| "reward_std": 0.4016164273023605, |
| "rewards/MultiModalAccuracyORM/mean": 0.30833334252238276, |
| "rewards/MultiModalAccuracyORM/std": 0.4016164273023605, |
| "step": 1030, |
| "train_speed(iter/s)": 0.031877 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 525.2, |
| "completions/mean_length": 334.6833435058594, |
| "completions/min_length": 201.1, |
| "epoch": 0.41818181818181815, |
| "grad_norm": 1.0210308419459193, |
| "kl": 0.00837860107421875, |
| "learning_rate": 2e-07, |
| "loss": -0.008147723227739333, |
| "memory(GiB)": 113.5, |
| "reward": 0.14166667312383652, |
| "reward_std": 0.14815283417701722, |
| "rewards/MultiModalAccuracyORM/mean": 0.14166667312383652, |
| "rewards/MultiModalAccuracyORM/std": 0.14815283417701722, |
| "step": 1035, |
| "train_speed(iter/s)": 0.03191 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 454.1, |
| "completions/mean_length": 269.1333374023437, |
| "completions/min_length": 140.9, |
| "epoch": 0.4202020202020202, |
| "grad_norm": 2.4151827408725546, |
| "kl": 0.01279144287109375, |
| "learning_rate": 2e-07, |
| "loss": -0.0017376184463500977, |
| "memory(GiB)": 113.5, |
| "reward": 0.5000000074505806, |
| "reward_std": 0.2591939508914948, |
| "rewards/MultiModalAccuracyORM/mean": 0.5000000074505806, |
| "rewards/MultiModalAccuracyORM/std": 0.2591939508914948, |
| "step": 1040, |
| "train_speed(iter/s)": 0.031912 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 693.0, |
| "completions/mean_length": 420.541682434082, |
| "completions/min_length": 252.2, |
| "epoch": 0.4222222222222222, |
| "grad_norm": 1.5651289466382694, |
| "kl": 0.0089202880859375, |
| "learning_rate": 2e-07, |
| "loss": 0.007678426802158356, |
| "memory(GiB)": 113.5, |
| "reward": 0.07500000074505805, |
| "reward_std": 0.17705594301223754, |
| "rewards/MultiModalAccuracyORM/mean": 0.07500000074505805, |
| "rewards/MultiModalAccuracyORM/std": 0.17705594301223754, |
| "step": 1045, |
| "train_speed(iter/s)": 0.031887 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 609.3, |
| "completions/mean_length": 382.8750061035156, |
| "completions/min_length": 219.2, |
| "epoch": 0.42424242424242425, |
| "grad_norm": 1.6669744297788438, |
| "kl": 0.010504150390625, |
| "learning_rate": 2e-07, |
| "loss": 0.04403962194919586, |
| "memory(GiB)": 113.5, |
| "reward": 0.1916666716337204, |
| "reward_std": 0.2908295333385468, |
| "rewards/MultiModalAccuracyORM/mean": 0.1916666716337204, |
| "rewards/MultiModalAccuracyORM/std": 0.2908295333385468, |
| "step": 1050, |
| "train_speed(iter/s)": 0.031882 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 530.7, |
| "completions/mean_length": 340.3916778564453, |
| "completions/min_length": 203.1, |
| "epoch": 0.4262626262626263, |
| "grad_norm": 0.08753771583148155, |
| "kl": 0.006915283203125, |
| "learning_rate": 2e-07, |
| "loss": -0.00030135512351989744, |
| "memory(GiB)": 113.5, |
| "reward": 0.3250000074505806, |
| "reward_std": 0.31046818792819975, |
| "rewards/MultiModalAccuracyORM/mean": 0.3250000074505806, |
| "rewards/MultiModalAccuracyORM/std": 0.31046818792819975, |
| "step": 1055, |
| "train_speed(iter/s)": 0.031895 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 727.3, |
| "completions/mean_length": 407.2916793823242, |
| "completions/min_length": 253.6, |
| "epoch": 0.42828282828282827, |
| "grad_norm": 2.1853625197058877, |
| "kl": 0.01122894287109375, |
| "learning_rate": 2e-07, |
| "loss": -0.009478866308927535, |
| "memory(GiB)": 113.5, |
| "reward": 0.2666666679084301, |
| "reward_std": 0.2940108567476273, |
| "rewards/MultiModalAccuracyORM/mean": 0.2666666679084301, |
| "rewards/MultiModalAccuracyORM/std": 0.2940108567476273, |
| "step": 1060, |
| "train_speed(iter/s)": 0.03188 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 693.8, |
| "completions/mean_length": 379.0666793823242, |
| "completions/min_length": 175.8, |
| "epoch": 0.4303030303030303, |
| "grad_norm": 1.8429441156917366, |
| "kl": 0.0084381103515625, |
| "learning_rate": 2e-07, |
| "loss": 0.008666989207267762, |
| "memory(GiB)": 113.5, |
| "reward": 0.3666666731238365, |
| "reward_std": 0.40242100059986113, |
| "rewards/MultiModalAccuracyORM/mean": 0.3666666731238365, |
| "rewards/MultiModalAccuracyORM/std": 0.40242100059986113, |
| "step": 1065, |
| "train_speed(iter/s)": 0.031897 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 652.8, |
| "completions/mean_length": 411.7250183105469, |
| "completions/min_length": 222.1, |
| "epoch": 0.43232323232323233, |
| "grad_norm": 1.8025359450969856, |
| "kl": 0.0067352294921875, |
| "learning_rate": 2e-07, |
| "loss": -0.020195412635803222, |
| "memory(GiB)": 113.5, |
| "reward": 0.1166666716337204, |
| "reward_std": 0.1745694547891617, |
| "rewards/MultiModalAccuracyORM/mean": 0.1166666716337204, |
| "rewards/MultiModalAccuracyORM/std": 0.1745694547891617, |
| "step": 1070, |
| "train_speed(iter/s)": 0.031919 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 562.0, |
| "completions/mean_length": 334.0916748046875, |
| "completions/min_length": 184.4, |
| "epoch": 0.43434343434343436, |
| "grad_norm": 1.6111066415316333, |
| "kl": 0.00709228515625, |
| "learning_rate": 2e-07, |
| "loss": -0.004982185363769531, |
| "memory(GiB)": 113.5, |
| "reward": 0.3583333410322666, |
| "reward_std": 0.27148364782333373, |
| "rewards/MultiModalAccuracyORM/mean": 0.3583333410322666, |
| "rewards/MultiModalAccuracyORM/std": 0.27148364782333373, |
| "step": 1075, |
| "train_speed(iter/s)": 0.031909 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 507.8, |
| "completions/mean_length": 305.34167404174804, |
| "completions/min_length": 176.1, |
| "epoch": 0.43636363636363634, |
| "grad_norm": 2.273654506806337, |
| "kl": 0.00942840576171875, |
| "learning_rate": 2e-07, |
| "loss": -0.0076661787927150725, |
| "memory(GiB)": 113.5, |
| "reward": 0.2583333417773247, |
| "reward_std": 0.2122136175632477, |
| "rewards/MultiModalAccuracyORM/mean": 0.2583333417773247, |
| "rewards/MultiModalAccuracyORM/std": 0.2122136175632477, |
| "step": 1080, |
| "train_speed(iter/s)": 0.031918 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 514.9, |
| "completions/mean_length": 298.60000839233396, |
| "completions/min_length": 156.5, |
| "epoch": 0.4383838383838384, |
| "grad_norm": 2.1394215246213495, |
| "kl": 0.0081207275390625, |
| "learning_rate": 2e-07, |
| "loss": 0.01651126444339752, |
| "memory(GiB)": 113.5, |
| "reward": 0.3416666768491268, |
| "reward_std": 0.4186849981546402, |
| "rewards/MultiModalAccuracyORM/mean": 0.3416666768491268, |
| "rewards/MultiModalAccuracyORM/std": 0.4186849981546402, |
| "step": 1085, |
| "train_speed(iter/s)": 0.031921 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 578.4, |
| "completions/mean_length": 338.9500137329102, |
| "completions/min_length": 196.0, |
| "epoch": 0.4404040404040404, |
| "grad_norm": 2.545454860927846, |
| "kl": 0.00835113525390625, |
| "learning_rate": 2e-07, |
| "loss": 0.04256980717182159, |
| "memory(GiB)": 113.5, |
| "reward": 0.3250000014901161, |
| "reward_std": 0.2712650209665298, |
| "rewards/MultiModalAccuracyORM/mean": 0.3250000014901161, |
| "rewards/MultiModalAccuracyORM/std": 0.2712650209665298, |
| "step": 1090, |
| "train_speed(iter/s)": 0.031917 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 737.9, |
| "completions/mean_length": 350.0750045776367, |
| "completions/min_length": 177.8, |
| "epoch": 0.44242424242424244, |
| "grad_norm": 1.1515652768332443, |
| "kl": 0.0083038330078125, |
| "learning_rate": 2e-07, |
| "loss": 0.05727236866950989, |
| "memory(GiB)": 113.5, |
| "reward": 0.1333333395421505, |
| "reward_std": 0.22625694572925567, |
| "rewards/MultiModalAccuracyORM/mean": 0.1333333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.22625694572925567, |
| "step": 1095, |
| "train_speed(iter/s)": 0.031885 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 639.9, |
| "completions/mean_length": 404.4166793823242, |
| "completions/min_length": 222.3, |
| "epoch": 0.4444444444444444, |
| "grad_norm": 2.0946897692044906, |
| "kl": 0.0076324462890625, |
| "learning_rate": 2e-07, |
| "loss": 0.03506229817867279, |
| "memory(GiB)": 113.5, |
| "reward": 0.45833334177732465, |
| "reward_std": 0.41185393929481506, |
| "rewards/MultiModalAccuracyORM/mean": 0.45833334177732465, |
| "rewards/MultiModalAccuracyORM/std": 0.41185393929481506, |
| "step": 1100, |
| "train_speed(iter/s)": 0.031869 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 388.2, |
| "completions/mean_length": 237.7166732788086, |
| "completions/min_length": 130.5, |
| "epoch": 0.44646464646464645, |
| "grad_norm": 2.7145244594416837, |
| "kl": 0.0112579345703125, |
| "learning_rate": 2e-07, |
| "loss": -0.004306972026824951, |
| "memory(GiB)": 113.5, |
| "reward": 0.27500000447034834, |
| "reward_std": 0.28853767216205595, |
| "rewards/MultiModalAccuracyORM/mean": 0.27500000447034834, |
| "rewards/MultiModalAccuracyORM/std": 0.28853767216205595, |
| "step": 1105, |
| "train_speed(iter/s)": 0.031861 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 555.2, |
| "completions/mean_length": 339.60834350585935, |
| "completions/min_length": 189.7, |
| "epoch": 0.4484848484848485, |
| "grad_norm": 1.990851354822169, |
| "kl": 0.04109954833984375, |
| "learning_rate": 2e-07, |
| "loss": 0.01136043295264244, |
| "memory(GiB)": 113.5, |
| "reward": 0.4666666768491268, |
| "reward_std": 0.29859510362148284, |
| "rewards/MultiModalAccuracyORM/mean": 0.4666666768491268, |
| "rewards/MultiModalAccuracyORM/std": 0.29859510362148284, |
| "step": 1110, |
| "train_speed(iter/s)": 0.031854 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 524.4, |
| "completions/mean_length": 347.5666748046875, |
| "completions/min_length": 218.3, |
| "epoch": 0.4505050505050505, |
| "grad_norm": 2.9268025842966563, |
| "kl": 0.0082672119140625, |
| "learning_rate": 2e-07, |
| "loss": -0.0031023643910884856, |
| "memory(GiB)": 113.5, |
| "reward": 0.35000001192092894, |
| "reward_std": 0.3800142765045166, |
| "rewards/MultiModalAccuracyORM/mean": 0.35000001192092894, |
| "rewards/MultiModalAccuracyORM/std": 0.3800142765045166, |
| "step": 1115, |
| "train_speed(iter/s)": 0.031867 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 514.2, |
| "completions/mean_length": 336.5333419799805, |
| "completions/min_length": 190.3, |
| "epoch": 0.45252525252525255, |
| "grad_norm": 2.361080053690534, |
| "kl": 0.009368896484375, |
| "learning_rate": 2e-07, |
| "loss": -0.007122965157032013, |
| "memory(GiB)": 113.5, |
| "reward": 0.4000000074505806, |
| "reward_std": 0.25241934359073637, |
| "rewards/MultiModalAccuracyORM/mean": 0.4000000074505806, |
| "rewards/MultiModalAccuracyORM/std": 0.25241934359073637, |
| "step": 1120, |
| "train_speed(iter/s)": 0.031889 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 627.4, |
| "completions/mean_length": 387.1333465576172, |
| "completions/min_length": 233.6, |
| "epoch": 0.45454545454545453, |
| "grad_norm": 1.323972233543725, |
| "kl": 0.008551025390625, |
| "learning_rate": 2e-07, |
| "loss": 0.03706555962562561, |
| "memory(GiB)": 113.5, |
| "reward": 0.19166667014360428, |
| "reward_std": 0.3109443962574005, |
| "rewards/MultiModalAccuracyORM/mean": 0.19166667014360428, |
| "rewards/MultiModalAccuracyORM/std": 0.3109443962574005, |
| "step": 1125, |
| "train_speed(iter/s)": 0.031889 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 558.3, |
| "completions/mean_length": 389.6666778564453, |
| "completions/min_length": 238.9, |
| "epoch": 0.45656565656565656, |
| "grad_norm": 0.7314973239006443, |
| "kl": 0.00838775634765625, |
| "learning_rate": 2e-07, |
| "loss": -0.0037152446806430818, |
| "memory(GiB)": 113.5, |
| "reward": 0.32500000223517417, |
| "reward_std": 0.2556006729602814, |
| "rewards/MultiModalAccuracyORM/mean": 0.32500000223517417, |
| "rewards/MultiModalAccuracyORM/std": 0.2556006729602814, |
| "step": 1130, |
| "train_speed(iter/s)": 0.031898 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 594.1, |
| "completions/mean_length": 376.69167175292966, |
| "completions/min_length": 193.9, |
| "epoch": 0.4585858585858586, |
| "grad_norm": 2.6192210055071947, |
| "kl": 0.008709716796875, |
| "learning_rate": 2e-07, |
| "loss": -0.010700675845146179, |
| "memory(GiB)": 113.5, |
| "reward": 0.2750000089406967, |
| "reward_std": 0.3663875609636307, |
| "rewards/MultiModalAccuracyORM/mean": 0.2750000089406967, |
| "rewards/MultiModalAccuracyORM/std": 0.3663875609636307, |
| "step": 1135, |
| "train_speed(iter/s)": 0.031914 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 606.0, |
| "completions/mean_length": 373.23334350585935, |
| "completions/min_length": 224.3, |
| "epoch": 0.46060606060606063, |
| "grad_norm": 7.217746674191174, |
| "kl": 0.05963134765625, |
| "learning_rate": 2e-07, |
| "loss": 0.005391424894332886, |
| "memory(GiB)": 113.5, |
| "reward": 0.11666666865348815, |
| "reward_std": 0.255160054564476, |
| "rewards/MultiModalAccuracyORM/mean": 0.11666666865348815, |
| "rewards/MultiModalAccuracyORM/std": 0.255160054564476, |
| "step": 1140, |
| "train_speed(iter/s)": 0.031929 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 430.7, |
| "completions/mean_length": 264.12500686645507, |
| "completions/min_length": 149.1, |
| "epoch": 0.4626262626262626, |
| "grad_norm": 2.7608756487475525, |
| "kl": 0.014227294921875, |
| "learning_rate": 2e-07, |
| "loss": 0.01821192502975464, |
| "memory(GiB)": 113.5, |
| "reward": 0.3916666731238365, |
| "reward_std": 0.29640085995197296, |
| "rewards/MultiModalAccuracyORM/mean": 0.3916666731238365, |
| "rewards/MultiModalAccuracyORM/std": 0.29640085995197296, |
| "step": 1145, |
| "train_speed(iter/s)": 0.031947 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 426.1, |
| "completions/mean_length": 274.7416763305664, |
| "completions/min_length": 172.5, |
| "epoch": 0.46464646464646464, |
| "grad_norm": 2.4711978185790886, |
| "kl": 0.1343414306640625, |
| "learning_rate": 2e-07, |
| "loss": 0.017589953541755677, |
| "memory(GiB)": 113.5, |
| "reward": 0.24166667237877845, |
| "reward_std": 0.28959646821022034, |
| "rewards/MultiModalAccuracyORM/mean": 0.24166667237877845, |
| "rewards/MultiModalAccuracyORM/std": 0.28959646821022034, |
| "step": 1150, |
| "train_speed(iter/s)": 0.031968 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 546.7, |
| "completions/mean_length": 316.32501220703125, |
| "completions/min_length": 170.1, |
| "epoch": 0.4666666666666667, |
| "grad_norm": 2.193137307137183, |
| "kl": 0.00921173095703125, |
| "learning_rate": 2e-07, |
| "loss": 0.03984123468399048, |
| "memory(GiB)": 113.5, |
| "reward": 0.25833334103226663, |
| "reward_std": 0.3578915596008301, |
| "rewards/MultiModalAccuracyORM/mean": 0.25833334103226663, |
| "rewards/MultiModalAccuracyORM/std": 0.3578915596008301, |
| "step": 1155, |
| "train_speed(iter/s)": 0.031978 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 478.9, |
| "completions/mean_length": 312.7916748046875, |
| "completions/min_length": 192.9, |
| "epoch": 0.4686868686868687, |
| "grad_norm": 2.4370225749301886, |
| "kl": 0.0099822998046875, |
| "learning_rate": 2e-07, |
| "loss": 0.04419963359832764, |
| "memory(GiB)": 113.5, |
| "reward": 0.36666667759418486, |
| "reward_std": 0.34560186266899107, |
| "rewards/MultiModalAccuracyORM/mean": 0.36666667759418486, |
| "rewards/MultiModalAccuracyORM/std": 0.34560186266899107, |
| "step": 1160, |
| "train_speed(iter/s)": 0.031974 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 661.0, |
| "completions/mean_length": 355.541674041748, |
| "completions/min_length": 156.5, |
| "epoch": 0.4707070707070707, |
| "grad_norm": 3.306493843440885, |
| "kl": 0.01005859375, |
| "learning_rate": 2e-07, |
| "loss": 0.021104392409324647, |
| "memory(GiB)": 113.5, |
| "reward": 0.31666667461395265, |
| "reward_std": 0.37450254559516905, |
| "rewards/MultiModalAccuracyORM/mean": 0.31666667461395265, |
| "rewards/MultiModalAccuracyORM/std": 0.37450254559516905, |
| "step": 1165, |
| "train_speed(iter/s)": 0.031991 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 452.5, |
| "completions/mean_length": 266.8083435058594, |
| "completions/min_length": 159.0, |
| "epoch": 0.4727272727272727, |
| "grad_norm": 3.2381508792172107, |
| "kl": 0.0092529296875, |
| "learning_rate": 2e-07, |
| "loss": 0.0005752682685852051, |
| "memory(GiB)": 113.5, |
| "reward": 0.19166667237877846, |
| "reward_std": 0.29159851372241974, |
| "rewards/MultiModalAccuracyORM/mean": 0.19166667237877846, |
| "rewards/MultiModalAccuracyORM/std": 0.29159851372241974, |
| "step": 1170, |
| "train_speed(iter/s)": 0.032017 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 651.1, |
| "completions/mean_length": 376.39167938232424, |
| "completions/min_length": 220.4, |
| "epoch": 0.47474747474747475, |
| "grad_norm": 1.6637816276606223, |
| "kl": 0.00755615234375, |
| "learning_rate": 2e-07, |
| "loss": 0.04589937329292297, |
| "memory(GiB)": 113.5, |
| "reward": 0.3083333417773247, |
| "reward_std": 0.37851486802101136, |
| "rewards/MultiModalAccuracyORM/mean": 0.3083333417773247, |
| "rewards/MultiModalAccuracyORM/std": 0.37851486802101136, |
| "step": 1175, |
| "train_speed(iter/s)": 0.032023 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 656.9, |
| "completions/mean_length": 448.13334197998046, |
| "completions/min_length": 217.0, |
| "epoch": 0.4767676767676768, |
| "grad_norm": 2.1978602872808075, |
| "kl": 0.0097900390625, |
| "learning_rate": 2e-07, |
| "loss": -0.009159280359745026, |
| "memory(GiB)": 113.5, |
| "reward": 0.3166666738688946, |
| "reward_std": 0.28452777564525605, |
| "rewards/MultiModalAccuracyORM/mean": 0.3166666738688946, |
| "rewards/MultiModalAccuracyORM/std": 0.28452777564525605, |
| "step": 1180, |
| "train_speed(iter/s)": 0.032033 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 451.5, |
| "completions/mean_length": 289.1250053405762, |
| "completions/min_length": 172.7, |
| "epoch": 0.47878787878787876, |
| "grad_norm": 1.3926480253734976, |
| "kl": 0.0114959716796875, |
| "learning_rate": 2e-07, |
| "loss": 0.009945812821388244, |
| "memory(GiB)": 113.5, |
| "reward": 0.5000000149011612, |
| "reward_std": 0.29630566835403443, |
| "rewards/MultiModalAccuracyORM/mean": 0.5000000149011612, |
| "rewards/MultiModalAccuracyORM/std": 0.29630566835403443, |
| "step": 1185, |
| "train_speed(iter/s)": 0.032058 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 582.5, |
| "completions/mean_length": 330.6666732788086, |
| "completions/min_length": 149.8, |
| "epoch": 0.4808080808080808, |
| "grad_norm": 1.9719497919311402, |
| "kl": 0.0093994140625, |
| "learning_rate": 2e-07, |
| "loss": 0.0009687811136245728, |
| "memory(GiB)": 113.5, |
| "reward": 0.3083333410322666, |
| "reward_std": 0.3478672981262207, |
| "rewards/MultiModalAccuracyORM/mean": 0.3083333410322666, |
| "rewards/MultiModalAccuracyORM/std": 0.3478672981262207, |
| "step": 1190, |
| "train_speed(iter/s)": 0.032072 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 687.4, |
| "completions/mean_length": 421.73334350585935, |
| "completions/min_length": 236.0, |
| "epoch": 0.48282828282828283, |
| "grad_norm": 1.243534573096356, |
| "kl": 0.0077484130859375, |
| "learning_rate": 2e-07, |
| "loss": -0.003622010350227356, |
| "memory(GiB)": 113.5, |
| "reward": 0.20833333879709243, |
| "reward_std": 0.29815449118614196, |
| "rewards/MultiModalAccuracyORM/mean": 0.20833333879709243, |
| "rewards/MultiModalAccuracyORM/std": 0.29815449118614196, |
| "step": 1195, |
| "train_speed(iter/s)": 0.03208 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 576.5, |
| "completions/mean_length": 360.5666748046875, |
| "completions/min_length": 198.9, |
| "epoch": 0.48484848484848486, |
| "grad_norm": 2.916364282012391, |
| "kl": 0.0125274658203125, |
| "learning_rate": 2e-07, |
| "loss": -0.021604710817337038, |
| "memory(GiB)": 113.5, |
| "reward": 0.22500000447034835, |
| "reward_std": 0.35037778615951537, |
| "rewards/MultiModalAccuracyORM/mean": 0.22500000447034835, |
| "rewards/MultiModalAccuracyORM/std": 0.35037778615951537, |
| "step": 1200, |
| "train_speed(iter/s)": 0.032073 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 488.0, |
| "completions/mean_length": 282.54167556762695, |
| "completions/min_length": 161.2, |
| "epoch": 0.4868686868686869, |
| "grad_norm": 1.8011146439004695, |
| "kl": 0.0113494873046875, |
| "learning_rate": 2e-07, |
| "loss": 0.006717947870492935, |
| "memory(GiB)": 113.5, |
| "reward": 0.3333333387970924, |
| "reward_std": 0.19114727079868316, |
| "rewards/MultiModalAccuracyORM/mean": 0.3333333387970924, |
| "rewards/MultiModalAccuracyORM/std": 0.19114727079868316, |
| "step": 1205, |
| "train_speed(iter/s)": 0.032089 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 834.7, |
| "completions/mean_length": 442.2083435058594, |
| "completions/min_length": 237.3, |
| "epoch": 0.4888888888888889, |
| "grad_norm": 1.4349681794675622, |
| "kl": 0.0094146728515625, |
| "learning_rate": 2e-07, |
| "loss": 0.0006179869174957276, |
| "memory(GiB)": 113.5, |
| "reward": 0.2333333410322666, |
| "reward_std": 0.3762586027383804, |
| "rewards/MultiModalAccuracyORM/mean": 0.2333333410322666, |
| "rewards/MultiModalAccuracyORM/std": 0.3762586027383804, |
| "step": 1210, |
| "train_speed(iter/s)": 0.032088 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 738.7, |
| "completions/mean_length": 433.8916748046875, |
| "completions/min_length": 249.1, |
| "epoch": 0.4909090909090909, |
| "grad_norm": 2.547575664275865, |
| "kl": 0.00975189208984375, |
| "learning_rate": 2e-07, |
| "loss": -0.026794981956481934, |
| "memory(GiB)": 113.5, |
| "reward": 0.18333333656191825, |
| "reward_std": 0.20118070244789124, |
| "rewards/MultiModalAccuracyORM/mean": 0.18333333656191825, |
| "rewards/MultiModalAccuracyORM/std": 0.20118070244789124, |
| "step": 1215, |
| "train_speed(iter/s)": 0.032076 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 627.1, |
| "completions/mean_length": 359.12500915527346, |
| "completions/min_length": 197.8, |
| "epoch": 0.49292929292929294, |
| "grad_norm": 1.645283475386655, |
| "kl": 0.0115875244140625, |
| "learning_rate": 2e-07, |
| "loss": 0.024244531989097595, |
| "memory(GiB)": 113.5, |
| "reward": 0.3333333387970924, |
| "reward_std": 0.2511145621538162, |
| "rewards/MultiModalAccuracyORM/mean": 0.3333333387970924, |
| "rewards/MultiModalAccuracyORM/std": 0.2511145621538162, |
| "step": 1220, |
| "train_speed(iter/s)": 0.032088 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 419.3, |
| "completions/mean_length": 267.17501068115234, |
| "completions/min_length": 152.0, |
| "epoch": 0.494949494949495, |
| "grad_norm": 1.2460930349970594, |
| "kl": 0.010992431640625, |
| "learning_rate": 2e-07, |
| "loss": 0.007182718813419342, |
| "memory(GiB)": 113.5, |
| "reward": 0.2583333358168602, |
| "reward_std": 0.2536582201719284, |
| "rewards/MultiModalAccuracyORM/mean": 0.2583333358168602, |
| "rewards/MultiModalAccuracyORM/std": 0.2536582201719284, |
| "step": 1225, |
| "train_speed(iter/s)": 0.032122 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 723.8, |
| "completions/mean_length": 405.7416702270508, |
| "completions/min_length": 233.4, |
| "epoch": 0.49696969696969695, |
| "grad_norm": 3.259598192610936, |
| "kl": 0.0076690673828125, |
| "learning_rate": 2e-07, |
| "loss": 0.0032115459442138674, |
| "memory(GiB)": 113.5, |
| "reward": 0.33333334177732465, |
| "reward_std": 0.3470627248287201, |
| "rewards/MultiModalAccuracyORM/mean": 0.33333334177732465, |
| "rewards/MultiModalAccuracyORM/std": 0.3470627248287201, |
| "step": 1230, |
| "train_speed(iter/s)": 0.032125 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 472.1, |
| "completions/mean_length": 306.12501068115233, |
| "completions/min_length": 188.9, |
| "epoch": 0.498989898989899, |
| "grad_norm": 0.7719456506471253, |
| "kl": 0.01029205322265625, |
| "learning_rate": 2e-07, |
| "loss": -0.016546979546546936, |
| "memory(GiB)": 113.5, |
| "reward": 0.1916666693985462, |
| "reward_std": 0.2895223259925842, |
| "rewards/MultiModalAccuracyORM/mean": 0.1916666693985462, |
| "rewards/MultiModalAccuracyORM/std": 0.2895223259925842, |
| "step": 1235, |
| "train_speed(iter/s)": 0.032159 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 534.6, |
| "completions/mean_length": 343.00834503173826, |
| "completions/min_length": 207.8, |
| "epoch": 0.501010101010101, |
| "grad_norm": 1.6531811897726711, |
| "kl": 0.0093170166015625, |
| "learning_rate": 2e-07, |
| "loss": 0.02186596691608429, |
| "memory(GiB)": 113.5, |
| "reward": 0.19166667610406876, |
| "reward_std": 0.2448128044605255, |
| "rewards/MultiModalAccuracyORM/mean": 0.19166667610406876, |
| "rewards/MultiModalAccuracyORM/std": 0.2448128044605255, |
| "step": 1240, |
| "train_speed(iter/s)": 0.032173 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 538.6, |
| "completions/mean_length": 299.2916717529297, |
| "completions/min_length": 172.4, |
| "epoch": 0.503030303030303, |
| "grad_norm": 3.19592384950856, |
| "kl": 0.0099273681640625, |
| "learning_rate": 2e-07, |
| "loss": -0.006601364910602569, |
| "memory(GiB)": 113.5, |
| "reward": 0.2083333410322666, |
| "reward_std": 0.3292782694101334, |
| "rewards/MultiModalAccuracyORM/mean": 0.2083333410322666, |
| "rewards/MultiModalAccuracyORM/std": 0.3292782694101334, |
| "step": 1245, |
| "train_speed(iter/s)": 0.032193 |
| }, |
| { |
| "epoch": 0.5050505050505051, |
| "grad_norm": 0.912303521551538, |
| "learning_rate": 2e-07, |
| "loss": -0.0002701073884963989, |
| "memory(GiB)": 113.5, |
| "step": 1250, |
| "train_speed(iter/s)": 0.032205 |
| }, |
| { |
| "epoch": 0.5050505050505051, |
| "eval_clip_ratio": 0.0, |
| "eval_completions/clipped_ratio": 0.0, |
| "eval_completions/max_length": 570.74, |
| "eval_completions/mean_length": 352.94834228515623, |
| "eval_completions/min_length": 210.42, |
| "eval_kl": 0.00790496826171875, |
| "eval_loss": 0.01708856225013733, |
| "eval_reward": 0.2983333393931389, |
| "eval_reward_std": 0.3327623122930527, |
| "eval_rewards/MultiModalAccuracyORM/mean": 0.2983333393931389, |
| "eval_rewards/MultiModalAccuracyORM/std": 0.3327623122930527, |
| "eval_runtime": 568.068, |
| "eval_samples_per_second": 0.088, |
| "eval_steps_per_second": 0.009, |
| "step": 1250 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 551.8, |
| "completions/mean_length": 358.1875087738037, |
| "completions/min_length": 223.5, |
| "epoch": 0.5070707070707071, |
| "grad_norm": 1.8888348326711508, |
| "kl": 0.01190643310546875, |
| "learning_rate": 2e-07, |
| "loss": 0.019428746402263643, |
| "memory(GiB)": 113.5, |
| "reward": 0.27916667275130747, |
| "reward_std": 0.38802969008684157, |
| "rewards/MultiModalAccuracyORM/mean": 0.27916667275130747, |
| "rewards/MultiModalAccuracyORM/std": 0.38802969008684157, |
| "step": 1255, |
| "train_speed(iter/s)": 0.031527 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 619.7, |
| "completions/mean_length": 373.8333465576172, |
| "completions/min_length": 225.6, |
| "epoch": 0.509090909090909, |
| "grad_norm": 1.8028043067539863, |
| "kl": 0.0100677490234375, |
| "learning_rate": 2e-07, |
| "loss": 0.027076438069343567, |
| "memory(GiB)": 113.5, |
| "reward": 0.19166667386889458, |
| "reward_std": 0.3207202464342117, |
| "rewards/MultiModalAccuracyORM/mean": 0.19166667386889458, |
| "rewards/MultiModalAccuracyORM/std": 0.3207202464342117, |
| "step": 1260, |
| "train_speed(iter/s)": 0.031526 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 690.3, |
| "completions/mean_length": 397.9250152587891, |
| "completions/min_length": 204.6, |
| "epoch": 0.5111111111111111, |
| "grad_norm": 2.2225728768142723, |
| "kl": 0.011029052734375, |
| "learning_rate": 2e-07, |
| "loss": -0.04860515892505646, |
| "memory(GiB)": 113.5, |
| "reward": 0.29166667684912684, |
| "reward_std": 0.33303394317626955, |
| "rewards/MultiModalAccuracyORM/mean": 0.29166667684912684, |
| "rewards/MultiModalAccuracyORM/std": 0.33303394317626955, |
| "step": 1265, |
| "train_speed(iter/s)": 0.031521 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 551.6, |
| "completions/mean_length": 342.73334350585935, |
| "completions/min_length": 191.1, |
| "epoch": 0.5131313131313131, |
| "grad_norm": 1.467354462173463, |
| "kl": 0.0107818603515625, |
| "learning_rate": 2e-07, |
| "loss": 0.03341347873210907, |
| "memory(GiB)": 113.5, |
| "reward": 0.34166667982935905, |
| "reward_std": 0.2812868595123291, |
| "rewards/MultiModalAccuracyORM/mean": 0.34166667982935905, |
| "rewards/MultiModalAccuracyORM/std": 0.2812868595123291, |
| "step": 1270, |
| "train_speed(iter/s)": 0.031536 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 447.8, |
| "completions/mean_length": 283.72500762939455, |
| "completions/min_length": 163.8, |
| "epoch": 0.5151515151515151, |
| "grad_norm": 3.716342095943599, |
| "kl": 0.014361572265625, |
| "learning_rate": 2e-07, |
| "loss": 0.02838865518569946, |
| "memory(GiB)": 113.5, |
| "reward": 0.433333345502615, |
| "reward_std": 0.3993005663156509, |
| "rewards/MultiModalAccuracyORM/mean": 0.433333345502615, |
| "rewards/MultiModalAccuracyORM/std": 0.3993005663156509, |
| "step": 1275, |
| "train_speed(iter/s)": 0.031554 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 554.3, |
| "completions/mean_length": 302.8583389282227, |
| "completions/min_length": 167.6, |
| "epoch": 0.5171717171717172, |
| "grad_norm": 2.503627797323309, |
| "kl": 0.0103668212890625, |
| "learning_rate": 2e-07, |
| "loss": 0.00705558955669403, |
| "memory(GiB)": 113.5, |
| "reward": 0.6333333551883698, |
| "reward_std": 0.43680969774723055, |
| "rewards/MultiModalAccuracyORM/mean": 0.6333333551883698, |
| "rewards/MultiModalAccuracyORM/std": 0.43680969774723055, |
| "step": 1280, |
| "train_speed(iter/s)": 0.031583 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 650.5, |
| "completions/mean_length": 403.7000076293945, |
| "completions/min_length": 185.7, |
| "epoch": 0.5191919191919192, |
| "grad_norm": 3.2251809838929315, |
| "kl": 0.0101654052734375, |
| "learning_rate": 2e-07, |
| "loss": -0.037446904182434085, |
| "memory(GiB)": 113.5, |
| "reward": 0.3083333395421505, |
| "reward_std": 0.3978011578321457, |
| "rewards/MultiModalAccuracyORM/mean": 0.3083333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.3978011578321457, |
| "step": 1285, |
| "train_speed(iter/s)": 0.031592 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 499.7, |
| "completions/mean_length": 300.60834045410155, |
| "completions/min_length": 158.4, |
| "epoch": 0.5212121212121212, |
| "grad_norm": 1.7359935662818697, |
| "kl": 0.0238189697265625, |
| "learning_rate": 2e-07, |
| "loss": 0.005645626783370971, |
| "memory(GiB)": 113.5, |
| "reward": 0.4916666761040688, |
| "reward_std": 0.37272491455078127, |
| "rewards/MultiModalAccuracyORM/mean": 0.4916666761040688, |
| "rewards/MultiModalAccuracyORM/std": 0.37272491455078127, |
| "step": 1290, |
| "train_speed(iter/s)": 0.031618 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 658.6, |
| "completions/mean_length": 381.3166809082031, |
| "completions/min_length": 189.5, |
| "epoch": 0.5232323232323233, |
| "grad_norm": 2.291063786312688, |
| "kl": 0.0126251220703125, |
| "learning_rate": 2e-07, |
| "loss": 0.03457438945770264, |
| "memory(GiB)": 113.5, |
| "reward": 0.29166667312383654, |
| "reward_std": 0.3760043799877167, |
| "rewards/MultiModalAccuracyORM/mean": 0.29166667312383654, |
| "rewards/MultiModalAccuracyORM/std": 0.3760043799877167, |
| "step": 1295, |
| "train_speed(iter/s)": 0.031626 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 535.2, |
| "completions/mean_length": 334.5833480834961, |
| "completions/min_length": 191.9, |
| "epoch": 0.5252525252525253, |
| "grad_norm": 2.290129258389379, |
| "kl": 0.0095916748046875, |
| "learning_rate": 2e-07, |
| "loss": -0.024787557125091553, |
| "memory(GiB)": 113.5, |
| "reward": 0.37500000894069674, |
| "reward_std": 0.29634126722812654, |
| "rewards/MultiModalAccuracyORM/mean": 0.37500000894069674, |
| "rewards/MultiModalAccuracyORM/std": 0.29634126722812654, |
| "step": 1300, |
| "train_speed(iter/s)": 0.031656 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 576.6, |
| "completions/mean_length": 374.36668395996094, |
| "completions/min_length": 192.7, |
| "epoch": 0.5272727272727272, |
| "grad_norm": 2.0627127342369556, |
| "kl": 0.0099151611328125, |
| "learning_rate": 2e-07, |
| "loss": 0.004585762321949005, |
| "memory(GiB)": 113.5, |
| "reward": 0.2250000037252903, |
| "reward_std": 0.40560232698917387, |
| "rewards/MultiModalAccuracyORM/mean": 0.2250000037252903, |
| "rewards/MultiModalAccuracyORM/std": 0.40560232698917387, |
| "step": 1305, |
| "train_speed(iter/s)": 0.031669 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 569.9, |
| "completions/mean_length": 281.84167556762696, |
| "completions/min_length": 151.0, |
| "epoch": 0.5292929292929293, |
| "grad_norm": 1.2890377388651022, |
| "kl": 0.012725830078125, |
| "learning_rate": 2e-07, |
| "loss": -0.00015339255332946777, |
| "memory(GiB)": 113.5, |
| "reward": 0.341666679084301, |
| "reward_std": 0.31068681478500365, |
| "rewards/MultiModalAccuracyORM/mean": 0.341666679084301, |
| "rewards/MultiModalAccuracyORM/std": 0.31068681478500365, |
| "step": 1310, |
| "train_speed(iter/s)": 0.031675 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 548.4, |
| "completions/mean_length": 307.5916717529297, |
| "completions/min_length": 186.8, |
| "epoch": 0.5313131313131313, |
| "grad_norm": 2.0391373518251648, |
| "kl": 0.0098358154296875, |
| "learning_rate": 2e-07, |
| "loss": 0.06573413610458374, |
| "memory(GiB)": 113.5, |
| "reward": 0.28333333805203437, |
| "reward_std": 0.351182359457016, |
| "rewards/MultiModalAccuracyORM/mean": 0.28333333805203437, |
| "rewards/MultiModalAccuracyORM/std": 0.351182359457016, |
| "step": 1315, |
| "train_speed(iter/s)": 0.031691 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 479.0, |
| "completions/mean_length": 273.75001068115233, |
| "completions/min_length": 144.0, |
| "epoch": 0.5333333333333333, |
| "grad_norm": 2.059578451448539, |
| "kl": 0.01175537109375, |
| "learning_rate": 2e-07, |
| "loss": 0.04888114631175995, |
| "memory(GiB)": 113.5, |
| "reward": 0.3500000096857548, |
| "reward_std": 0.4166352391242981, |
| "rewards/MultiModalAccuracyORM/mean": 0.3500000096857548, |
| "rewards/MultiModalAccuracyORM/std": 0.4166352391242981, |
| "step": 1320, |
| "train_speed(iter/s)": 0.031715 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 623.8, |
| "completions/mean_length": 376.00000915527346, |
| "completions/min_length": 193.4, |
| "epoch": 0.5353535353535354, |
| "grad_norm": 2.604163597368088, |
| "kl": 0.014556884765625, |
| "learning_rate": 2e-07, |
| "loss": 0.025493156909942628, |
| "memory(GiB)": 113.5, |
| "reward": 0.2833333432674408, |
| "reward_std": 0.33376437425613403, |
| "rewards/MultiModalAccuracyORM/mean": 0.2833333432674408, |
| "rewards/MultiModalAccuracyORM/std": 0.33376437425613403, |
| "step": 1325, |
| "train_speed(iter/s)": 0.031732 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 512.3, |
| "completions/mean_length": 319.8833465576172, |
| "completions/min_length": 185.2, |
| "epoch": 0.5373737373737374, |
| "grad_norm": 2.939920293327845, |
| "kl": 0.0071502685546875, |
| "learning_rate": 2e-07, |
| "loss": -0.0020159482955932617, |
| "memory(GiB)": 113.5, |
| "reward": 0.2500000037252903, |
| "reward_std": 0.33000870048999786, |
| "rewards/MultiModalAccuracyORM/mean": 0.2500000037252903, |
| "rewards/MultiModalAccuracyORM/std": 0.33000870048999786, |
| "step": 1330, |
| "train_speed(iter/s)": 0.031756 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 631.3, |
| "completions/mean_length": 409.9833419799805, |
| "completions/min_length": 273.6, |
| "epoch": 0.5393939393939394, |
| "grad_norm": 1.8600557381971845, |
| "kl": 0.011651611328125, |
| "learning_rate": 2e-07, |
| "loss": 0.014566189050674439, |
| "memory(GiB)": 113.5, |
| "reward": 0.2500000029802322, |
| "reward_std": 0.36642315685749055, |
| "rewards/MultiModalAccuracyORM/mean": 0.2500000029802322, |
| "rewards/MultiModalAccuracyORM/std": 0.36642315685749055, |
| "step": 1335, |
| "train_speed(iter/s)": 0.031764 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 512.0, |
| "completions/mean_length": 304.6666717529297, |
| "completions/min_length": 186.0, |
| "epoch": 0.5414141414141415, |
| "grad_norm": 2.250726659882682, |
| "kl": 0.0128570556640625, |
| "learning_rate": 2e-07, |
| "loss": 0.0025543123483657837, |
| "memory(GiB)": 113.5, |
| "reward": 0.1416666679084301, |
| "reward_std": 0.24939410090446473, |
| "rewards/MultiModalAccuracyORM/mean": 0.1416666679084301, |
| "rewards/MultiModalAccuracyORM/std": 0.24939410090446473, |
| "step": 1340, |
| "train_speed(iter/s)": 0.031764 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 534.8, |
| "completions/mean_length": 320.21667633056643, |
| "completions/min_length": 156.5, |
| "epoch": 0.5434343434343434, |
| "grad_norm": 3.49354443152123, |
| "kl": 0.01195068359375, |
| "learning_rate": 2e-07, |
| "loss": -0.026122617721557616, |
| "memory(GiB)": 113.5, |
| "reward": 0.10833333656191826, |
| "reward_std": 0.2714240521192551, |
| "rewards/MultiModalAccuracyORM/mean": 0.10833333656191826, |
| "rewards/MultiModalAccuracyORM/std": 0.2714240521192551, |
| "step": 1345, |
| "train_speed(iter/s)": 0.031794 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 567.0, |
| "completions/mean_length": 359.27501220703124, |
| "completions/min_length": 227.5, |
| "epoch": 0.5454545454545454, |
| "grad_norm": 1.3573423115932872, |
| "kl": 0.008941650390625, |
| "learning_rate": 2e-07, |
| "loss": 0.02098418176174164, |
| "memory(GiB)": 113.5, |
| "reward": 0.30833334401249884, |
| "reward_std": 0.3207202464342117, |
| "rewards/MultiModalAccuracyORM/mean": 0.30833334401249884, |
| "rewards/MultiModalAccuracyORM/std": 0.3207202464342117, |
| "step": 1350, |
| "train_speed(iter/s)": 0.031817 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 640.3, |
| "completions/mean_length": 368.5750106811523, |
| "completions/min_length": 194.4, |
| "epoch": 0.5474747474747474, |
| "grad_norm": 1.268246321814541, |
| "kl": 0.011553955078125, |
| "learning_rate": 2e-07, |
| "loss": -0.037621939182281496, |
| "memory(GiB)": 113.5, |
| "reward": 0.3083333447575569, |
| "reward_std": 0.3823301374912262, |
| "rewards/MultiModalAccuracyORM/mean": 0.3083333447575569, |
| "rewards/MultiModalAccuracyORM/std": 0.3823301374912262, |
| "step": 1355, |
| "train_speed(iter/s)": 0.031821 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 668.5, |
| "completions/mean_length": 337.325008392334, |
| "completions/min_length": 166.7, |
| "epoch": 0.5494949494949495, |
| "grad_norm": 1.1405744205796668, |
| "kl": 0.0093414306640625, |
| "learning_rate": 2e-07, |
| "loss": 0.05270506143569946, |
| "memory(GiB)": 113.5, |
| "reward": 0.5333333417773247, |
| "reward_std": 0.30996555387973784, |
| "rewards/MultiModalAccuracyORM/mean": 0.5333333417773247, |
| "rewards/MultiModalAccuracyORM/std": 0.30996555387973784, |
| "step": 1360, |
| "train_speed(iter/s)": 0.031838 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 511.6, |
| "completions/mean_length": 287.0166732788086, |
| "completions/min_length": 144.9, |
| "epoch": 0.5515151515151515, |
| "grad_norm": 2.505246091989759, |
| "kl": 0.0113037109375, |
| "learning_rate": 2e-07, |
| "loss": -0.027878284454345703, |
| "memory(GiB)": 113.5, |
| "reward": 0.24166667833924294, |
| "reward_std": 0.34710127115249634, |
| "rewards/MultiModalAccuracyORM/mean": 0.24166667833924294, |
| "rewards/MultiModalAccuracyORM/std": 0.34710127115249634, |
| "step": 1365, |
| "train_speed(iter/s)": 0.031858 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 603.9, |
| "completions/mean_length": 330.1916793823242, |
| "completions/min_length": 153.1, |
| "epoch": 0.5535353535353535, |
| "grad_norm": 3.131582003300663, |
| "kl": 0.0138336181640625, |
| "learning_rate": 2e-07, |
| "loss": -0.0038233429193496706, |
| "memory(GiB)": 113.5, |
| "reward": 0.31666667982935903, |
| "reward_std": 0.37345829904079436, |
| "rewards/MultiModalAccuracyORM/mean": 0.31666667982935903, |
| "rewards/MultiModalAccuracyORM/std": 0.37345829904079436, |
| "step": 1370, |
| "train_speed(iter/s)": 0.031869 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 445.4, |
| "completions/mean_length": 286.3666702270508, |
| "completions/min_length": 149.1, |
| "epoch": 0.5555555555555556, |
| "grad_norm": 1.9956984289591713, |
| "kl": 0.0112335205078125, |
| "learning_rate": 2e-07, |
| "loss": -0.012190797924995422, |
| "memory(GiB)": 113.5, |
| "reward": 0.40833333879709244, |
| "reward_std": 0.3855446308851242, |
| "rewards/MultiModalAccuracyORM/mean": 0.40833333879709244, |
| "rewards/MultiModalAccuracyORM/std": 0.3855446308851242, |
| "step": 1375, |
| "train_speed(iter/s)": 0.031897 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 538.7, |
| "completions/mean_length": 324.9166763305664, |
| "completions/min_length": 202.3, |
| "epoch": 0.5575757575757576, |
| "grad_norm": 2.1303502921633335, |
| "kl": 0.00983428955078125, |
| "learning_rate": 2e-07, |
| "loss": 0.02664785385131836, |
| "memory(GiB)": 113.5, |
| "reward": 0.40833333879709244, |
| "reward_std": 0.30971133410930635, |
| "rewards/MultiModalAccuracyORM/mean": 0.40833333879709244, |
| "rewards/MultiModalAccuracyORM/std": 0.30971133410930635, |
| "step": 1380, |
| "train_speed(iter/s)": 0.031915 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 551.6, |
| "completions/mean_length": 356.1666793823242, |
| "completions/min_length": 201.3, |
| "epoch": 0.5595959595959596, |
| "grad_norm": 2.9931509831712524, |
| "kl": 0.012689208984375, |
| "learning_rate": 2e-07, |
| "loss": -0.039350539445877075, |
| "memory(GiB)": 113.5, |
| "reward": 0.1666666716337204, |
| "reward_std": 0.2917931377887726, |
| "rewards/MultiModalAccuracyORM/mean": 0.1666666716337204, |
| "rewards/MultiModalAccuracyORM/std": 0.2917931377887726, |
| "step": 1385, |
| "train_speed(iter/s)": 0.031924 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 564.0, |
| "completions/mean_length": 356.6083358764648, |
| "completions/min_length": 193.0, |
| "epoch": 0.5616161616161616, |
| "grad_norm": 2.198573582527943, |
| "kl": 0.008941650390625, |
| "learning_rate": 2e-07, |
| "loss": -0.022810643911361693, |
| "memory(GiB)": 113.5, |
| "reward": 0.35000001415610316, |
| "reward_std": 0.32673218548297883, |
| "rewards/MultiModalAccuracyORM/mean": 0.35000001415610316, |
| "rewards/MultiModalAccuracyORM/std": 0.32673218548297883, |
| "step": 1390, |
| "train_speed(iter/s)": 0.031929 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 485.0, |
| "completions/mean_length": 314.82500686645506, |
| "completions/min_length": 182.1, |
| "epoch": 0.5636363636363636, |
| "grad_norm": 2.150533068523157, |
| "kl": 0.0106353759765625, |
| "learning_rate": 2e-07, |
| "loss": -0.013728708028793335, |
| "memory(GiB)": 113.5, |
| "reward": 0.46666667312383653, |
| "reward_std": 0.25897532403469087, |
| "rewards/MultiModalAccuracyORM/mean": 0.46666667312383653, |
| "rewards/MultiModalAccuracyORM/std": 0.25897532403469087, |
| "step": 1395, |
| "train_speed(iter/s)": 0.031954 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 552.3, |
| "completions/mean_length": 353.3833465576172, |
| "completions/min_length": 204.6, |
| "epoch": 0.5656565656565656, |
| "grad_norm": 1.940941493471918, |
| "kl": 0.0095428466796875, |
| "learning_rate": 2e-07, |
| "loss": -0.006394821405410767, |
| "memory(GiB)": 113.5, |
| "reward": 0.3916666753590107, |
| "reward_std": 0.34550372064113616, |
| "rewards/MultiModalAccuracyORM/mean": 0.3916666753590107, |
| "rewards/MultiModalAccuracyORM/std": 0.34550372064113616, |
| "step": 1400, |
| "train_speed(iter/s)": 0.031962 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 705.4, |
| "completions/mean_length": 432.6000045776367, |
| "completions/min_length": 228.0, |
| "epoch": 0.5676767676767677, |
| "grad_norm": 2.4378327864764286, |
| "kl": 0.011553955078125, |
| "learning_rate": 2e-07, |
| "loss": 0.04005226194858551, |
| "memory(GiB)": 113.5, |
| "reward": 0.2916666746139526, |
| "reward_std": 0.3370794355869293, |
| "rewards/MultiModalAccuracyORM/mean": 0.2916666746139526, |
| "rewards/MultiModalAccuracyORM/std": 0.3370794355869293, |
| "step": 1405, |
| "train_speed(iter/s)": 0.031967 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 689.2, |
| "completions/mean_length": 384.7333465576172, |
| "completions/min_length": 220.2, |
| "epoch": 0.5696969696969697, |
| "grad_norm": 0.780805540568698, |
| "kl": 0.01131591796875, |
| "learning_rate": 2e-07, |
| "loss": 0.03709500730037689, |
| "memory(GiB)": 113.5, |
| "reward": 0.33333333730697634, |
| "reward_std": 0.3572298943996429, |
| "rewards/MultiModalAccuracyORM/mean": 0.33333333730697634, |
| "rewards/MultiModalAccuracyORM/std": 0.3572298943996429, |
| "step": 1410, |
| "train_speed(iter/s)": 0.031973 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 595.0, |
| "completions/mean_length": 321.6333465576172, |
| "completions/min_length": 164.9, |
| "epoch": 0.5717171717171717, |
| "grad_norm": 1.430362343806847, |
| "kl": 0.0101104736328125, |
| "learning_rate": 2e-07, |
| "loss": 0.013754424452781678, |
| "memory(GiB)": 113.5, |
| "reward": 0.2583333387970924, |
| "reward_std": 0.28555097579956057, |
| "rewards/MultiModalAccuracyORM/mean": 0.2583333387970924, |
| "rewards/MultiModalAccuracyORM/std": 0.28555097579956057, |
| "step": 1415, |
| "train_speed(iter/s)": 0.031992 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 816.5, |
| "completions/mean_length": 438.3833435058594, |
| "completions/min_length": 264.8, |
| "epoch": 0.5737373737373738, |
| "grad_norm": 1.6263448971015675, |
| "kl": 0.010430908203125, |
| "learning_rate": 2e-07, |
| "loss": -0.0029776930809020997, |
| "memory(GiB)": 113.5, |
| "reward": 0.2333333395421505, |
| "reward_std": 0.3883536756038666, |
| "rewards/MultiModalAccuracyORM/mean": 0.2333333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.3883536756038666, |
| "step": 1420, |
| "train_speed(iter/s)": 0.031979 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 766.6, |
| "completions/mean_length": 387.12500915527346, |
| "completions/min_length": 211.4, |
| "epoch": 0.5757575757575758, |
| "grad_norm": 2.1728432922463274, |
| "kl": 0.0098236083984375, |
| "learning_rate": 2e-07, |
| "loss": -0.004918041825294495, |
| "memory(GiB)": 113.5, |
| "reward": 0.23333333656191826, |
| "reward_std": 0.10697162449359894, |
| "rewards/MultiModalAccuracyORM/mean": 0.23333333656191826, |
| "rewards/MultiModalAccuracyORM/std": 0.10697162449359894, |
| "step": 1425, |
| "train_speed(iter/s)": 0.031977 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 637.8, |
| "completions/mean_length": 388.7416793823242, |
| "completions/min_length": 235.4, |
| "epoch": 0.5777777777777777, |
| "grad_norm": 1.7935893801244052, |
| "kl": 0.0087493896484375, |
| "learning_rate": 2e-07, |
| "loss": 0.04609963297843933, |
| "memory(GiB)": 113.5, |
| "reward": 0.3500000134110451, |
| "reward_std": 0.32297651171684266, |
| "rewards/MultiModalAccuracyORM/mean": 0.3500000134110451, |
| "rewards/MultiModalAccuracyORM/std": 0.32297651171684266, |
| "step": 1430, |
| "train_speed(iter/s)": 0.031989 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 682.9, |
| "completions/mean_length": 398.00001220703126, |
| "completions/min_length": 208.4, |
| "epoch": 0.5797979797979798, |
| "grad_norm": 2.549829840865519, |
| "kl": 0.010107421875, |
| "learning_rate": 2e-07, |
| "loss": -0.0018973067402839662, |
| "memory(GiB)": 113.5, |
| "reward": 0.4250000089406967, |
| "reward_std": 0.3973225235939026, |
| "rewards/MultiModalAccuracyORM/mean": 0.4250000089406967, |
| "rewards/MultiModalAccuracyORM/std": 0.3973225235939026, |
| "step": 1435, |
| "train_speed(iter/s)": 0.031994 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 586.2, |
| "completions/mean_length": 325.5333450317383, |
| "completions/min_length": 163.1, |
| "epoch": 0.5818181818181818, |
| "grad_norm": 2.800120485549645, |
| "kl": 0.0125640869140625, |
| "learning_rate": 2e-07, |
| "loss": -0.016949039697647095, |
| "memory(GiB)": 113.5, |
| "reward": 0.4166666716337204, |
| "reward_std": 0.34232239723205565, |
| "rewards/MultiModalAccuracyORM/mean": 0.4166666716337204, |
| "rewards/MultiModalAccuracyORM/std": 0.34232239723205565, |
| "step": 1440, |
| "train_speed(iter/s)": 0.032007 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 611.5, |
| "completions/mean_length": 359.07500915527345, |
| "completions/min_length": 200.2, |
| "epoch": 0.5838383838383838, |
| "grad_norm": 2.2400645386442526, |
| "kl": 0.0367034912109375, |
| "learning_rate": 2e-07, |
| "loss": 0.027681028842926024, |
| "memory(GiB)": 113.5, |
| "reward": 0.29166667312383654, |
| "reward_std": 0.29815449118614196, |
| "rewards/MultiModalAccuracyORM/mean": 0.29166667312383654, |
| "rewards/MultiModalAccuracyORM/std": 0.29815449118614196, |
| "step": 1445, |
| "train_speed(iter/s)": 0.032019 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 663.8, |
| "completions/mean_length": 411.00001068115233, |
| "completions/min_length": 244.1, |
| "epoch": 0.5858585858585859, |
| "grad_norm": 2.864884904580614, |
| "kl": 0.009783935546875, |
| "learning_rate": 2e-07, |
| "loss": 0.00823460817337036, |
| "memory(GiB)": 113.5, |
| "reward": 0.3416666768491268, |
| "reward_std": 0.3438218057155609, |
| "rewards/MultiModalAccuracyORM/mean": 0.3416666768491268, |
| "rewards/MultiModalAccuracyORM/std": 0.3438218057155609, |
| "step": 1450, |
| "train_speed(iter/s)": 0.032023 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 554.1, |
| "completions/mean_length": 335.3583465576172, |
| "completions/min_length": 205.1, |
| "epoch": 0.5878787878787879, |
| "grad_norm": 1.4688157931726233, |
| "kl": 0.0092010498046875, |
| "learning_rate": 2e-07, |
| "loss": 0.01696823239326477, |
| "memory(GiB)": 113.5, |
| "reward": 0.37500000968575475, |
| "reward_std": 0.35413345992565154, |
| "rewards/MultiModalAccuracyORM/mean": 0.37500000968575475, |
| "rewards/MultiModalAccuracyORM/std": 0.35413345992565154, |
| "step": 1455, |
| "train_speed(iter/s)": 0.03204 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 827.3, |
| "completions/mean_length": 473.9416748046875, |
| "completions/min_length": 249.7, |
| "epoch": 0.5898989898989899, |
| "grad_norm": 1.1646459187041633, |
| "kl": 0.0099945068359375, |
| "learning_rate": 2e-07, |
| "loss": 0.014775393903255463, |
| "memory(GiB)": 113.5, |
| "reward": 0.2666666738688946, |
| "reward_std": 0.30333785712718964, |
| "rewards/MultiModalAccuracyORM/mean": 0.2666666738688946, |
| "rewards/MultiModalAccuracyORM/std": 0.30333785712718964, |
| "step": 1460, |
| "train_speed(iter/s)": 0.032026 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 701.1, |
| "completions/mean_length": 384.5083404541016, |
| "completions/min_length": 204.1, |
| "epoch": 0.591919191919192, |
| "grad_norm": 0.04302173761513684, |
| "kl": 0.012548828125, |
| "learning_rate": 2e-07, |
| "loss": -0.001154869794845581, |
| "memory(GiB)": 113.5, |
| "reward": 0.3000000141561031, |
| "reward_std": 0.3127244532108307, |
| "rewards/MultiModalAccuracyORM/mean": 0.3000000141561031, |
| "rewards/MultiModalAccuracyORM/std": 0.3127244532108307, |
| "step": 1465, |
| "train_speed(iter/s)": 0.032029 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 546.3, |
| "completions/mean_length": 352.4416763305664, |
| "completions/min_length": 195.1, |
| "epoch": 0.593939393939394, |
| "grad_norm": 2.051161125641378, |
| "kl": 0.014813232421875, |
| "learning_rate": 2e-07, |
| "loss": 0.0119085431098938, |
| "memory(GiB)": 113.5, |
| "reward": 0.3083333395421505, |
| "reward_std": 0.34488060176372526, |
| "rewards/MultiModalAccuracyORM/mean": 0.3083333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.34488060176372526, |
| "step": 1470, |
| "train_speed(iter/s)": 0.03204 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 772.7, |
| "completions/mean_length": 466.6416809082031, |
| "completions/min_length": 251.3, |
| "epoch": 0.5959595959595959, |
| "grad_norm": 1.842366669706851, |
| "kl": 0.01016082763671875, |
| "learning_rate": 2e-07, |
| "loss": 0.015132546424865723, |
| "memory(GiB)": 113.5, |
| "reward": 0.22500001043081283, |
| "reward_std": 0.3044206529855728, |
| "rewards/MultiModalAccuracyORM/mean": 0.22500001043081283, |
| "rewards/MultiModalAccuracyORM/std": 0.3044206529855728, |
| "step": 1475, |
| "train_speed(iter/s)": 0.032046 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 568.7, |
| "completions/mean_length": 344.0333480834961, |
| "completions/min_length": 205.0, |
| "epoch": 0.597979797979798, |
| "grad_norm": 0.07710895823869458, |
| "kl": 0.01250762939453125, |
| "learning_rate": 2e-07, |
| "loss": 0.02509859800338745, |
| "memory(GiB)": 113.5, |
| "reward": 0.47500001192092894, |
| "reward_std": 0.2752393215894699, |
| "rewards/MultiModalAccuracyORM/mean": 0.47500001192092894, |
| "rewards/MultiModalAccuracyORM/std": 0.2752393215894699, |
| "step": 1480, |
| "train_speed(iter/s)": 0.032062 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 540.5, |
| "completions/mean_length": 306.80834197998047, |
| "completions/min_length": 174.4, |
| "epoch": 0.6, |
| "grad_norm": 0.084452934933302, |
| "kl": 0.0158172607421875, |
| "learning_rate": 2e-07, |
| "loss": -0.027300435304641723, |
| "memory(GiB)": 113.5, |
| "reward": 0.17500000521540643, |
| "reward_std": 0.24105713069438933, |
| "rewards/MultiModalAccuracyORM/mean": 0.17500000521540643, |
| "rewards/MultiModalAccuracyORM/std": 0.24105713069438933, |
| "step": 1485, |
| "train_speed(iter/s)": 0.032084 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 501.7, |
| "completions/mean_length": 324.4833389282227, |
| "completions/min_length": 169.9, |
| "epoch": 0.602020202020202, |
| "grad_norm": 1.3165133966084028, |
| "kl": 0.0114501953125, |
| "learning_rate": 2e-07, |
| "loss": 0.004012265801429748, |
| "memory(GiB)": 113.5, |
| "reward": 0.3916666753590107, |
| "reward_std": 0.31046818792819975, |
| "rewards/MultiModalAccuracyORM/mean": 0.3916666753590107, |
| "rewards/MultiModalAccuracyORM/std": 0.31046818792819975, |
| "step": 1490, |
| "train_speed(iter/s)": 0.032103 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 588.7, |
| "completions/mean_length": 336.21668243408203, |
| "completions/min_length": 202.5, |
| "epoch": 0.604040404040404, |
| "grad_norm": 3.938520632284254, |
| "kl": 0.0132232666015625, |
| "learning_rate": 2e-07, |
| "loss": -0.02633047103881836, |
| "memory(GiB)": 113.5, |
| "reward": 0.3416666708886623, |
| "reward_std": 0.3149157464504242, |
| "rewards/MultiModalAccuracyORM/mean": 0.3416666708886623, |
| "rewards/MultiModalAccuracyORM/std": 0.3149157464504242, |
| "step": 1495, |
| "train_speed(iter/s)": 0.032103 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "grad_norm": 2.7010910619752164, |
| "learning_rate": 2e-07, |
| "loss": 0.023089283704757692, |
| "memory(GiB)": 113.5, |
| "step": 1500, |
| "train_speed(iter/s)": 0.032112 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "eval_clip_ratio": 0.0, |
| "eval_completions/clipped_ratio": 0.0, |
| "eval_completions/max_length": 598.76, |
| "eval_completions/mean_length": 375.5383447265625, |
| "eval_completions/min_length": 218.18, |
| "eval_kl": 0.00917266845703125, |
| "eval_loss": -0.012349152937531471, |
| "eval_reward": 0.32000000730156897, |
| "eval_reward_std": 0.3092414766550064, |
| "eval_rewards/MultiModalAccuracyORM/mean": 0.32000000730156897, |
| "eval_rewards/MultiModalAccuracyORM/std": 0.3092414766550064, |
| "eval_runtime": 601.161, |
| "eval_samples_per_second": 0.083, |
| "eval_steps_per_second": 0.008, |
| "step": 1500 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 618.6, |
| "completions/mean_length": 392.15001182556153, |
| "completions/min_length": 216.0, |
| "epoch": 0.6080808080808081, |
| "grad_norm": 1.4655160488310728, |
| "kl": 0.010688018798828126, |
| "learning_rate": 2e-07, |
| "loss": 0.00576329231262207, |
| "memory(GiB)": 113.5, |
| "reward": 0.40416667349636554, |
| "reward_std": 0.31379757523536683, |
| "rewards/MultiModalAccuracyORM/mean": 0.40416667349636554, |
| "rewards/MultiModalAccuracyORM/std": 0.31379757523536683, |
| "step": 1505, |
| "train_speed(iter/s)": 0.031582 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 642.6, |
| "completions/mean_length": 392.9166763305664, |
| "completions/min_length": 185.9, |
| "epoch": 0.6101010101010101, |
| "grad_norm": 2.300152870135833, |
| "kl": 0.0118072509765625, |
| "learning_rate": 2e-07, |
| "loss": 0.01058935523033142, |
| "memory(GiB)": 113.5, |
| "reward": 0.15833333656191825, |
| "reward_std": 0.27622397541999816, |
| "rewards/MultiModalAccuracyORM/mean": 0.15833333656191825, |
| "rewards/MultiModalAccuracyORM/std": 0.27622397541999816, |
| "step": 1510, |
| "train_speed(iter/s)": 0.031594 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 668.5, |
| "completions/mean_length": 401.3166809082031, |
| "completions/min_length": 227.3, |
| "epoch": 0.6121212121212121, |
| "grad_norm": 2.0573660536714256, |
| "kl": 0.01282958984375, |
| "learning_rate": 2e-07, |
| "loss": 0.028659382462501527, |
| "memory(GiB)": 113.5, |
| "reward": 0.27500000819563863, |
| "reward_std": 0.3438218057155609, |
| "rewards/MultiModalAccuracyORM/mean": 0.27500000819563863, |
| "rewards/MultiModalAccuracyORM/std": 0.3438218057155609, |
| "step": 1515, |
| "train_speed(iter/s)": 0.031593 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 645.0, |
| "completions/mean_length": 390.6333465576172, |
| "completions/min_length": 240.3, |
| "epoch": 0.6141414141414141, |
| "grad_norm": 1.4644802229965364, |
| "kl": 0.0115325927734375, |
| "learning_rate": 2e-07, |
| "loss": 0.009964641928672791, |
| "memory(GiB)": 113.5, |
| "reward": 0.20833334624767302, |
| "reward_std": 0.25113856196403506, |
| "rewards/MultiModalAccuracyORM/mean": 0.20833334624767302, |
| "rewards/MultiModalAccuracyORM/std": 0.25113856196403506, |
| "step": 1520, |
| "train_speed(iter/s)": 0.031607 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 525.1, |
| "completions/mean_length": 338.3666763305664, |
| "completions/min_length": 187.9, |
| "epoch": 0.6161616161616161, |
| "grad_norm": 2.312953380739967, |
| "kl": 0.011322021484375, |
| "learning_rate": 2e-07, |
| "loss": 0.0045973040163516995, |
| "memory(GiB)": 113.5, |
| "reward": 0.22500000521540642, |
| "reward_std": 0.22224704921245575, |
| "rewards/MultiModalAccuracyORM/mean": 0.22500000521540642, |
| "rewards/MultiModalAccuracyORM/std": 0.22224704921245575, |
| "step": 1525, |
| "train_speed(iter/s)": 0.03163 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 639.6, |
| "completions/mean_length": 368.7583465576172, |
| "completions/min_length": 193.4, |
| "epoch": 0.6181818181818182, |
| "grad_norm": 3.0723153433233095, |
| "kl": 0.0133697509765625, |
| "learning_rate": 2e-07, |
| "loss": -0.030410391092300416, |
| "memory(GiB)": 113.5, |
| "reward": 0.25000000819563867, |
| "reward_std": 0.35340302884578706, |
| "rewards/MultiModalAccuracyORM/mean": 0.25000000819563867, |
| "rewards/MultiModalAccuracyORM/std": 0.35340302884578706, |
| "step": 1530, |
| "train_speed(iter/s)": 0.03164 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 520.6, |
| "completions/mean_length": 351.21667633056643, |
| "completions/min_length": 227.9, |
| "epoch": 0.6202020202020202, |
| "grad_norm": 1.4538179467280616, |
| "kl": 0.01126708984375, |
| "learning_rate": 2e-07, |
| "loss": 0.0038071274757385254, |
| "memory(GiB)": 113.5, |
| "reward": 0.31666667610406873, |
| "reward_std": 0.27749558687210085, |
| "rewards/MultiModalAccuracyORM/mean": 0.31666667610406873, |
| "rewards/MultiModalAccuracyORM/std": 0.27749558687210085, |
| "step": 1535, |
| "train_speed(iter/s)": 0.031636 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 564.2, |
| "completions/mean_length": 362.9000076293945, |
| "completions/min_length": 196.3, |
| "epoch": 0.6222222222222222, |
| "grad_norm": 2.3834408729545817, |
| "kl": 0.011865234375, |
| "learning_rate": 2e-07, |
| "loss": -0.007588768005371093, |
| "memory(GiB)": 113.5, |
| "reward": 0.2500000029802322, |
| "reward_std": 0.2885732680559158, |
| "rewards/MultiModalAccuracyORM/mean": 0.2500000029802322, |
| "rewards/MultiModalAccuracyORM/std": 0.2885732680559158, |
| "step": 1540, |
| "train_speed(iter/s)": 0.031648 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 635.7, |
| "completions/mean_length": 418.4583480834961, |
| "completions/min_length": 246.6, |
| "epoch": 0.6242424242424243, |
| "grad_norm": 1.498638277562189, |
| "kl": 0.0112030029296875, |
| "learning_rate": 2e-07, |
| "loss": 0.00476650595664978, |
| "memory(GiB)": 113.5, |
| "reward": 0.2583333387970924, |
| "reward_std": 0.3297544777393341, |
| "rewards/MultiModalAccuracyORM/mean": 0.2583333387970924, |
| "rewards/MultiModalAccuracyORM/std": 0.3297544777393341, |
| "step": 1545, |
| "train_speed(iter/s)": 0.031653 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 536.1, |
| "completions/mean_length": 338.0750076293945, |
| "completions/min_length": 180.2, |
| "epoch": 0.6262626262626263, |
| "grad_norm": 1.3673556260797224, |
| "kl": 0.012890625, |
| "learning_rate": 2e-07, |
| "loss": 0.011944988369941711, |
| "memory(GiB)": 113.5, |
| "reward": 0.2666666731238365, |
| "reward_std": 0.36717758774757386, |
| "rewards/MultiModalAccuracyORM/mean": 0.2666666731238365, |
| "rewards/MultiModalAccuracyORM/std": 0.36717758774757386, |
| "step": 1550, |
| "train_speed(iter/s)": 0.031667 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 601.0, |
| "completions/mean_length": 389.5916778564453, |
| "completions/min_length": 242.8, |
| "epoch": 0.6282828282828283, |
| "grad_norm": 2.327729044871898, |
| "kl": 0.014337158203125, |
| "learning_rate": 2e-07, |
| "loss": -0.015535221993923187, |
| "memory(GiB)": 113.5, |
| "reward": 0.17500000298023224, |
| "reward_std": 0.3498097449541092, |
| "rewards/MultiModalAccuracyORM/mean": 0.17500000298023224, |
| "rewards/MultiModalAccuracyORM/std": 0.3498097449541092, |
| "step": 1555, |
| "train_speed(iter/s)": 0.031684 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 617.0, |
| "completions/mean_length": 392.79168090820315, |
| "completions/min_length": 227.0, |
| "epoch": 0.6303030303030303, |
| "grad_norm": 0.053806194925700226, |
| "kl": 0.0107635498046875, |
| "learning_rate": 2e-07, |
| "loss": 0.017643353343009947, |
| "memory(GiB)": 113.5, |
| "reward": 0.19166667014360428, |
| "reward_std": 0.3011411875486374, |
| "rewards/MultiModalAccuracyORM/mean": 0.19166667014360428, |
| "rewards/MultiModalAccuracyORM/std": 0.3011411875486374, |
| "step": 1560, |
| "train_speed(iter/s)": 0.031689 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 519.9, |
| "completions/mean_length": 332.08333587646484, |
| "completions/min_length": 183.7, |
| "epoch": 0.6323232323232323, |
| "grad_norm": 0.570186834834556, |
| "kl": 0.016534423828125, |
| "learning_rate": 2e-07, |
| "loss": -0.02576545476913452, |
| "memory(GiB)": 113.5, |
| "reward": 0.3000000089406967, |
| "reward_std": 0.3503421902656555, |
| "rewards/MultiModalAccuracyORM/mean": 0.3000000089406967, |
| "rewards/MultiModalAccuracyORM/std": 0.3503421902656555, |
| "step": 1565, |
| "train_speed(iter/s)": 0.031696 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 687.6, |
| "completions/mean_length": 380.7333465576172, |
| "completions/min_length": 206.4, |
| "epoch": 0.6343434343434343, |
| "grad_norm": 2.2565482508451735, |
| "kl": 0.0091888427734375, |
| "learning_rate": 2e-07, |
| "loss": 0.01603304147720337, |
| "memory(GiB)": 113.5, |
| "reward": 0.2833333440124989, |
| "reward_std": 0.3637146830558777, |
| "rewards/MultiModalAccuracyORM/mean": 0.2833333440124989, |
| "rewards/MultiModalAccuracyORM/std": 0.3637146830558777, |
| "step": 1570, |
| "train_speed(iter/s)": 0.031705 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 555.3, |
| "completions/mean_length": 343.33334197998045, |
| "completions/min_length": 181.6, |
| "epoch": 0.6363636363636364, |
| "grad_norm": 1.578397296268303, |
| "kl": 0.013397216796875, |
| "learning_rate": 2e-07, |
| "loss": 0.04952932298183441, |
| "memory(GiB)": 113.5, |
| "reward": 0.4666666738688946, |
| "reward_std": 0.37498117983341217, |
| "rewards/MultiModalAccuracyORM/mean": 0.4666666738688946, |
| "rewards/MultiModalAccuracyORM/std": 0.37498117983341217, |
| "step": 1575, |
| "train_speed(iter/s)": 0.031725 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 713.8, |
| "completions/mean_length": 377.916682434082, |
| "completions/min_length": 199.8, |
| "epoch": 0.6383838383838384, |
| "grad_norm": 1.458403513622403, |
| "kl": 0.0122344970703125, |
| "learning_rate": 2e-07, |
| "loss": 0.016104981303215027, |
| "memory(GiB)": 113.5, |
| "reward": 0.4833333417773247, |
| "reward_std": 0.3252659499645233, |
| "rewards/MultiModalAccuracyORM/mean": 0.4833333417773247, |
| "rewards/MultiModalAccuracyORM/std": 0.3252659499645233, |
| "step": 1580, |
| "train_speed(iter/s)": 0.031726 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 678.8, |
| "completions/mean_length": 418.26668243408204, |
| "completions/min_length": 202.3, |
| "epoch": 0.6404040404040404, |
| "grad_norm": 1.8686390380230793, |
| "kl": 0.013201904296875, |
| "learning_rate": 2e-07, |
| "loss": 0.011665409803390503, |
| "memory(GiB)": 113.5, |
| "reward": 0.3000000096857548, |
| "reward_std": 0.2652414858341217, |
| "rewards/MultiModalAccuracyORM/mean": 0.3000000096857548, |
| "rewards/MultiModalAccuracyORM/std": 0.2652414858341217, |
| "step": 1585, |
| "train_speed(iter/s)": 0.031728 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 723.2, |
| "completions/mean_length": 388.46668395996096, |
| "completions/min_length": 206.4, |
| "epoch": 0.6424242424242425, |
| "grad_norm": 1.992138254292841, |
| "kl": 0.011810302734375, |
| "learning_rate": 2e-07, |
| "loss": 0.08419913649559022, |
| "memory(GiB)": 113.5, |
| "reward": 0.2916666746139526, |
| "reward_std": 0.4093579977750778, |
| "rewards/MultiModalAccuracyORM/mean": 0.2916666746139526, |
| "rewards/MultiModalAccuracyORM/std": 0.4093579977750778, |
| "step": 1590, |
| "train_speed(iter/s)": 0.031721 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 478.3, |
| "completions/mean_length": 303.75001220703126, |
| "completions/min_length": 176.1, |
| "epoch": 0.6444444444444445, |
| "grad_norm": 1.636979804109864, |
| "kl": 0.0161895751953125, |
| "learning_rate": 2e-07, |
| "loss": 0.011809319257736206, |
| "memory(GiB)": 113.5, |
| "reward": 0.3333333425223827, |
| "reward_std": 0.25897532403469087, |
| "rewards/MultiModalAccuracyORM/mean": 0.3333333425223827, |
| "rewards/MultiModalAccuracyORM/std": 0.25897532403469087, |
| "step": 1595, |
| "train_speed(iter/s)": 0.031748 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 609.9, |
| "completions/mean_length": 333.1666763305664, |
| "completions/min_length": 160.1, |
| "epoch": 0.6464646464646465, |
| "grad_norm": 2.2869229330092393, |
| "kl": 0.01339111328125, |
| "learning_rate": 2e-07, |
| "loss": 0.005678671598434448, |
| "memory(GiB)": 113.5, |
| "reward": 0.3416666753590107, |
| "reward_std": 0.3189666152000427, |
| "rewards/MultiModalAccuracyORM/mean": 0.3416666753590107, |
| "rewards/MultiModalAccuracyORM/std": 0.3189666152000427, |
| "step": 1600, |
| "train_speed(iter/s)": 0.031761 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 457.7, |
| "completions/mean_length": 279.3166778564453, |
| "completions/min_length": 174.7, |
| "epoch": 0.6484848484848484, |
| "grad_norm": 1.3720767401777028, |
| "kl": 0.014947509765625, |
| "learning_rate": 2e-07, |
| "loss": 0.0007772698998451232, |
| "memory(GiB)": 113.5, |
| "reward": 0.2916666753590107, |
| "reward_std": 0.29786467254161836, |
| "rewards/MultiModalAccuracyORM/mean": 0.2916666753590107, |
| "rewards/MultiModalAccuracyORM/std": 0.29786467254161836, |
| "step": 1605, |
| "train_speed(iter/s)": 0.031778 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 467.7, |
| "completions/mean_length": 286.30834197998047, |
| "completions/min_length": 167.6, |
| "epoch": 0.6505050505050505, |
| "grad_norm": 2.272498565917859, |
| "kl": 0.0147857666015625, |
| "learning_rate": 2e-07, |
| "loss": 0.03825833797454834, |
| "memory(GiB)": 113.5, |
| "reward": 0.30833333656191825, |
| "reward_std": 0.3430673748254776, |
| "rewards/MultiModalAccuracyORM/mean": 0.30833333656191825, |
| "rewards/MultiModalAccuracyORM/std": 0.3430673748254776, |
| "step": 1610, |
| "train_speed(iter/s)": 0.031791 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 478.7, |
| "completions/mean_length": 305.6833435058594, |
| "completions/min_length": 176.9, |
| "epoch": 0.6525252525252525, |
| "grad_norm": 2.101651221741828, |
| "kl": 0.01689453125, |
| "learning_rate": 2e-07, |
| "loss": -0.010073482990264893, |
| "memory(GiB)": 113.5, |
| "reward": 0.40833334550261496, |
| "reward_std": 0.3845028102397919, |
| "rewards/MultiModalAccuracyORM/mean": 0.40833334550261496, |
| "rewards/MultiModalAccuracyORM/std": 0.3845028102397919, |
| "step": 1615, |
| "train_speed(iter/s)": 0.031812 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 573.2, |
| "completions/mean_length": 355.3666732788086, |
| "completions/min_length": 217.0, |
| "epoch": 0.6545454545454545, |
| "grad_norm": 1.7437833008639363, |
| "kl": 0.014605712890625, |
| "learning_rate": 2e-07, |
| "loss": 0.03341163992881775, |
| "memory(GiB)": 113.5, |
| "reward": 0.3083333432674408, |
| "reward_std": 0.3104085922241211, |
| "rewards/MultiModalAccuracyORM/mean": 0.3083333432674408, |
| "rewards/MultiModalAccuracyORM/std": 0.3104085922241211, |
| "step": 1620, |
| "train_speed(iter/s)": 0.031815 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 498.7, |
| "completions/mean_length": 320.88334197998046, |
| "completions/min_length": 176.8, |
| "epoch": 0.6565656565656566, |
| "grad_norm": 2.214426657751653, |
| "kl": 0.012939453125, |
| "learning_rate": 2e-07, |
| "loss": 0.0038519926369190217, |
| "memory(GiB)": 113.5, |
| "reward": 0.4500000111758709, |
| "reward_std": 0.3840597689151764, |
| "rewards/MultiModalAccuracyORM/mean": 0.4500000111758709, |
| "rewards/MultiModalAccuracyORM/std": 0.3840597689151764, |
| "step": 1625, |
| "train_speed(iter/s)": 0.031834 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 475.5, |
| "completions/mean_length": 307.9750091552734, |
| "completions/min_length": 179.6, |
| "epoch": 0.6585858585858586, |
| "grad_norm": 2.3559044349874965, |
| "kl": 0.011468505859375, |
| "learning_rate": 2e-07, |
| "loss": -0.007926353812217712, |
| "memory(GiB)": 113.5, |
| "reward": 0.3500000089406967, |
| "reward_std": 0.21594529151916503, |
| "rewards/MultiModalAccuracyORM/mean": 0.3500000089406967, |
| "rewards/MultiModalAccuracyORM/std": 0.21594529151916503, |
| "step": 1630, |
| "train_speed(iter/s)": 0.031861 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 464.0, |
| "completions/mean_length": 272.0000061035156, |
| "completions/min_length": 140.0, |
| "epoch": 0.6606060606060606, |
| "grad_norm": 2.3218216739931163, |
| "kl": 0.01510009765625, |
| "learning_rate": 2e-07, |
| "loss": -0.017690959572792053, |
| "memory(GiB)": 113.5, |
| "reward": 0.28333333805203437, |
| "reward_std": 0.20416739881038665, |
| "rewards/MultiModalAccuracyORM/mean": 0.28333333805203437, |
| "rewards/MultiModalAccuracyORM/std": 0.20416739881038665, |
| "step": 1635, |
| "train_speed(iter/s)": 0.03188 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 571.0, |
| "completions/mean_length": 346.666682434082, |
| "completions/min_length": 213.6, |
| "epoch": 0.6626262626262627, |
| "grad_norm": 1.7521312796960462, |
| "kl": 0.011456298828125, |
| "learning_rate": 2e-07, |
| "loss": -0.01213396042585373, |
| "memory(GiB)": 113.5, |
| "reward": 0.34166667237877846, |
| "reward_std": 0.2464074045419693, |
| "rewards/MultiModalAccuracyORM/mean": 0.34166667237877846, |
| "rewards/MultiModalAccuracyORM/std": 0.2464074045419693, |
| "step": 1640, |
| "train_speed(iter/s)": 0.031896 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 373.0, |
| "completions/mean_length": 235.60834426879882, |
| "completions/min_length": 120.5, |
| "epoch": 0.6646464646464646, |
| "grad_norm": 2.846675522202014, |
| "kl": 0.0129638671875, |
| "learning_rate": 2e-07, |
| "loss": -0.01681770384311676, |
| "memory(GiB)": 113.5, |
| "reward": 0.4000000089406967, |
| "reward_std": 0.364131298661232, |
| "rewards/MultiModalAccuracyORM/mean": 0.4000000089406967, |
| "rewards/MultiModalAccuracyORM/std": 0.364131298661232, |
| "step": 1645, |
| "train_speed(iter/s)": 0.031917 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 670.8, |
| "completions/mean_length": 355.72500762939455, |
| "completions/min_length": 171.8, |
| "epoch": 0.6666666666666666, |
| "grad_norm": 2.5063086447109546, |
| "kl": 0.01486053466796875, |
| "learning_rate": 2e-07, |
| "loss": 0.005304119735956192, |
| "memory(GiB)": 113.5, |
| "reward": 0.3500000134110451, |
| "reward_std": 0.41141627728939056, |
| "rewards/MultiModalAccuracyORM/mean": 0.3500000134110451, |
| "rewards/MultiModalAccuracyORM/std": 0.41141627728939056, |
| "step": 1650, |
| "train_speed(iter/s)": 0.031924 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 733.8, |
| "completions/mean_length": 412.1666793823242, |
| "completions/min_length": 210.8, |
| "epoch": 0.6686868686868687, |
| "grad_norm": 2.9814971352286297, |
| "kl": 0.018035888671875, |
| "learning_rate": 2e-07, |
| "loss": 0.0013743340969085693, |
| "memory(GiB)": 113.5, |
| "reward": 0.2083333373069763, |
| "reward_std": 0.28402756750583646, |
| "rewards/MultiModalAccuracyORM/mean": 0.2083333373069763, |
| "rewards/MultiModalAccuracyORM/std": 0.28402756750583646, |
| "step": 1655, |
| "train_speed(iter/s)": 0.031912 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 504.7, |
| "completions/mean_length": 315.45000915527345, |
| "completions/min_length": 164.2, |
| "epoch": 0.6707070707070707, |
| "grad_norm": 2.420560710043236, |
| "kl": 0.013409423828125, |
| "learning_rate": 2e-07, |
| "loss": -0.0018982872366905212, |
| "memory(GiB)": 113.5, |
| "reward": 0.20000000521540642, |
| "reward_std": 0.25270916223526, |
| "rewards/MultiModalAccuracyORM/mean": 0.20000000521540642, |
| "rewards/MultiModalAccuracyORM/std": 0.25270916223526, |
| "step": 1660, |
| "train_speed(iter/s)": 0.031926 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 637.3, |
| "completions/mean_length": 373.15834045410156, |
| "completions/min_length": 187.0, |
| "epoch": 0.6727272727272727, |
| "grad_norm": 2.099245716082938, |
| "kl": 0.0146942138671875, |
| "learning_rate": 2e-07, |
| "loss": 0.0194022536277771, |
| "memory(GiB)": 113.5, |
| "reward": 0.24166667237877845, |
| "reward_std": 0.29383077621459963, |
| "rewards/MultiModalAccuracyORM/mean": 0.24166667237877845, |
| "rewards/MultiModalAccuracyORM/std": 0.29383077621459963, |
| "step": 1665, |
| "train_speed(iter/s)": 0.031939 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 639.1, |
| "completions/mean_length": 409.43335113525393, |
| "completions/min_length": 252.2, |
| "epoch": 0.6747474747474748, |
| "grad_norm": 0.8827203782530715, |
| "kl": 0.01336669921875, |
| "learning_rate": 2e-07, |
| "loss": 0.022216227650642396, |
| "memory(GiB)": 113.5, |
| "reward": 0.2833333410322666, |
| "reward_std": 0.23704480826854707, |
| "rewards/MultiModalAccuracyORM/mean": 0.2833333410322666, |
| "rewards/MultiModalAccuracyORM/std": 0.23704480826854707, |
| "step": 1670, |
| "train_speed(iter/s)": 0.031942 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 583.7, |
| "completions/mean_length": 372.35001068115236, |
| "completions/min_length": 223.6, |
| "epoch": 0.6767676767676768, |
| "grad_norm": 2.67307804927538, |
| "kl": 0.012176513671875, |
| "learning_rate": 2e-07, |
| "loss": -0.025462892651557923, |
| "memory(GiB)": 113.5, |
| "reward": 0.3583333410322666, |
| "reward_std": 0.30489686131477356, |
| "rewards/MultiModalAccuracyORM/mean": 0.3583333410322666, |
| "rewards/MultiModalAccuracyORM/std": 0.30489686131477356, |
| "step": 1675, |
| "train_speed(iter/s)": 0.031947 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 509.9, |
| "completions/mean_length": 252.71667709350587, |
| "completions/min_length": 134.5, |
| "epoch": 0.6787878787878788, |
| "grad_norm": 2.948416033259282, |
| "kl": 0.013824462890625, |
| "learning_rate": 2e-07, |
| "loss": 0.007326580584049225, |
| "memory(GiB)": 113.5, |
| "reward": 0.5083333514630795, |
| "reward_std": 0.3945842385292053, |
| "rewards/MultiModalAccuracyORM/mean": 0.5083333514630795, |
| "rewards/MultiModalAccuracyORM/std": 0.3945842385292053, |
| "step": 1680, |
| "train_speed(iter/s)": 0.031971 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 576.6, |
| "completions/mean_length": 367.6166732788086, |
| "completions/min_length": 225.0, |
| "epoch": 0.6808080808080809, |
| "grad_norm": 0.07197046485321759, |
| "kl": 0.0118194580078125, |
| "learning_rate": 2e-07, |
| "loss": 0.03796108365058899, |
| "memory(GiB)": 113.5, |
| "reward": 0.23333333879709245, |
| "reward_std": 0.20995735228061677, |
| "rewards/MultiModalAccuracyORM/mean": 0.23333333879709245, |
| "rewards/MultiModalAccuracyORM/std": 0.20995735228061677, |
| "step": 1685, |
| "train_speed(iter/s)": 0.031972 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 558.0, |
| "completions/mean_length": 348.7500061035156, |
| "completions/min_length": 212.9, |
| "epoch": 0.6828282828282828, |
| "grad_norm": 1.5442560082143544, |
| "kl": 0.01568603515625, |
| "learning_rate": 2e-07, |
| "loss": 0.017047417163848878, |
| "memory(GiB)": 113.5, |
| "reward": 0.358333345502615, |
| "reward_std": 0.4405413746833801, |
| "rewards/MultiModalAccuracyORM/mean": 0.358333345502615, |
| "rewards/MultiModalAccuracyORM/std": 0.4405413746833801, |
| "step": 1690, |
| "train_speed(iter/s)": 0.031977 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 568.5, |
| "completions/mean_length": 318.30000228881835, |
| "completions/min_length": 162.4, |
| "epoch": 0.6848484848484848, |
| "grad_norm": 2.822151558666746, |
| "kl": 0.013775634765625, |
| "learning_rate": 2e-07, |
| "loss": 0.03140446245670318, |
| "memory(GiB)": 113.5, |
| "reward": 0.3583333469927311, |
| "reward_std": 0.399324569106102, |
| "rewards/MultiModalAccuracyORM/mean": 0.3583333469927311, |
| "rewards/MultiModalAccuracyORM/std": 0.399324569106102, |
| "step": 1695, |
| "train_speed(iter/s)": 0.031999 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 743.9, |
| "completions/mean_length": 485.1916870117187, |
| "completions/min_length": 305.5, |
| "epoch": 0.6868686868686869, |
| "grad_norm": 0.9869928398468556, |
| "kl": 0.0103057861328125, |
| "learning_rate": 2e-07, |
| "loss": 0.018257686495780946, |
| "memory(GiB)": 113.5, |
| "reward": 0.2583333417773247, |
| "reward_std": 0.29035089910030365, |
| "rewards/MultiModalAccuracyORM/mean": 0.2583333417773247, |
| "rewards/MultiModalAccuracyORM/std": 0.29035089910030365, |
| "step": 1700, |
| "train_speed(iter/s)": 0.031993 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 553.9, |
| "completions/mean_length": 329.5166725158691, |
| "completions/min_length": 196.6, |
| "epoch": 0.6888888888888889, |
| "grad_norm": 2.390331116834798, |
| "kl": 0.0239990234375, |
| "learning_rate": 2e-07, |
| "loss": -0.02088260054588318, |
| "memory(GiB)": 113.5, |
| "reward": 0.4750000089406967, |
| "reward_std": 0.27753118276596067, |
| "rewards/MultiModalAccuracyORM/mean": 0.4750000089406967, |
| "rewards/MultiModalAccuracyORM/std": 0.27753118276596067, |
| "step": 1705, |
| "train_speed(iter/s)": 0.032012 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 486.8, |
| "completions/mean_length": 286.5166694641113, |
| "completions/min_length": 153.0, |
| "epoch": 0.6909090909090909, |
| "grad_norm": 3.070912031712293, |
| "kl": 0.0171630859375, |
| "learning_rate": 2e-07, |
| "loss": 0.00493430495262146, |
| "memory(GiB)": 113.5, |
| "reward": 0.45833334028720857, |
| "reward_std": 0.31192905008792876, |
| "rewards/MultiModalAccuracyORM/mean": 0.45833334028720857, |
| "rewards/MultiModalAccuracyORM/std": 0.31192905008792876, |
| "step": 1710, |
| "train_speed(iter/s)": 0.032022 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 416.9, |
| "completions/mean_length": 243.5666763305664, |
| "completions/min_length": 117.3, |
| "epoch": 0.692929292929293, |
| "grad_norm": 2.7698758058054214, |
| "kl": 0.0126708984375, |
| "learning_rate": 2e-07, |
| "loss": -0.0016166016459465027, |
| "memory(GiB)": 113.5, |
| "reward": 0.42500000819563866, |
| "reward_std": 0.25512445867061617, |
| "rewards/MultiModalAccuracyORM/mean": 0.42500000819563866, |
| "rewards/MultiModalAccuracyORM/std": 0.25512445867061617, |
| "step": 1715, |
| "train_speed(iter/s)": 0.032047 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 366.2, |
| "completions/mean_length": 255.48334503173828, |
| "completions/min_length": 158.4, |
| "epoch": 0.694949494949495, |
| "grad_norm": 2.744690041316947, |
| "kl": 0.015838623046875, |
| "learning_rate": 2e-07, |
| "loss": -0.019546210765838623, |
| "memory(GiB)": 113.5, |
| "reward": 0.26666667833924296, |
| "reward_std": 0.2754935443401337, |
| "rewards/MultiModalAccuracyORM/mean": 0.26666667833924296, |
| "rewards/MultiModalAccuracyORM/std": 0.2754935443401337, |
| "step": 1720, |
| "train_speed(iter/s)": 0.032071 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 612.9, |
| "completions/mean_length": 379.2666839599609, |
| "completions/min_length": 221.8, |
| "epoch": 0.696969696969697, |
| "grad_norm": 1.777431935717832, |
| "kl": 0.012689208984375, |
| "learning_rate": 2e-07, |
| "loss": 0.009233607351779938, |
| "memory(GiB)": 113.5, |
| "reward": 0.3083333395421505, |
| "reward_std": 0.28128685653209684, |
| "rewards/MultiModalAccuracyORM/mean": 0.3083333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.28128685653209684, |
| "step": 1725, |
| "train_speed(iter/s)": 0.032079 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 641.0, |
| "completions/mean_length": 354.1666770935059, |
| "completions/min_length": 171.8, |
| "epoch": 0.6989898989898989, |
| "grad_norm": 3.0383086202130616, |
| "kl": 0.01773681640625, |
| "learning_rate": 2e-07, |
| "loss": 0.03813132643699646, |
| "memory(GiB)": 113.5, |
| "reward": 0.31666666865348814, |
| "reward_std": 0.27938000559806825, |
| "rewards/MultiModalAccuracyORM/mean": 0.31666666865348814, |
| "rewards/MultiModalAccuracyORM/std": 0.27938000559806825, |
| "step": 1730, |
| "train_speed(iter/s)": 0.032094 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 615.8, |
| "completions/mean_length": 351.7666717529297, |
| "completions/min_length": 157.5, |
| "epoch": 0.701010101010101, |
| "grad_norm": 1.8778050454869868, |
| "kl": 0.014495849609375, |
| "learning_rate": 2e-07, |
| "loss": 0.00038725733757019045, |
| "memory(GiB)": 113.5, |
| "reward": 0.4833333432674408, |
| "reward_std": 0.33153211176395414, |
| "rewards/MultiModalAccuracyORM/mean": 0.4833333432674408, |
| "rewards/MultiModalAccuracyORM/std": 0.33153211176395414, |
| "step": 1735, |
| "train_speed(iter/s)": 0.032098 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 565.2, |
| "completions/mean_length": 350.0083404541016, |
| "completions/min_length": 189.6, |
| "epoch": 0.703030303030303, |
| "grad_norm": 2.1562095065119053, |
| "kl": 0.0187957763671875, |
| "learning_rate": 2e-07, |
| "loss": -0.02958904504776001, |
| "memory(GiB)": 113.5, |
| "reward": 0.28333333879709244, |
| "reward_std": 0.3487591862678528, |
| "rewards/MultiModalAccuracyORM/mean": 0.28333333879709244, |
| "rewards/MultiModalAccuracyORM/std": 0.3487591862678528, |
| "step": 1740, |
| "train_speed(iter/s)": 0.032101 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 524.2, |
| "completions/mean_length": 310.36668090820314, |
| "completions/min_length": 187.0, |
| "epoch": 0.705050505050505, |
| "grad_norm": 2.8598050443718797, |
| "kl": 0.013079833984375, |
| "learning_rate": 2e-07, |
| "loss": -0.007939225435256958, |
| "memory(GiB)": 113.5, |
| "reward": 0.35000000819563865, |
| "reward_std": 0.3908045649528503, |
| "rewards/MultiModalAccuracyORM/mean": 0.35000000819563865, |
| "rewards/MultiModalAccuracyORM/std": 0.3908045649528503, |
| "step": 1745, |
| "train_speed(iter/s)": 0.032116 |
| }, |
| { |
| "epoch": 0.7070707070707071, |
| "grad_norm": 2.0976510908729256, |
| "learning_rate": 2e-07, |
| "loss": 0.05007731318473816, |
| "memory(GiB)": 113.5, |
| "step": 1750, |
| "train_speed(iter/s)": 0.03212 |
| }, |
| { |
| "epoch": 0.7070707070707071, |
| "eval_clip_ratio": 0.0, |
| "eval_completions/clipped_ratio": 0.0016666666666666666, |
| "eval_completions/max_length": 587.9, |
| "eval_completions/mean_length": 354.56501251220703, |
| "eval_completions/min_length": 214.28, |
| "eval_kl": 0.01150848388671875, |
| "eval_loss": 0.0095694400370121, |
| "eval_reward": 0.3250000074505806, |
| "eval_reward_std": 0.32090782165527343, |
| "eval_rewards/MultiModalAccuracyORM/mean": 0.3250000074505806, |
| "eval_rewards/MultiModalAccuracyORM/std": 0.32090782165527343, |
| "eval_runtime": 581.3868, |
| "eval_samples_per_second": 0.086, |
| "eval_steps_per_second": 0.009, |
| "step": 1750 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 585.3, |
| "completions/mean_length": 361.1250114440918, |
| "completions/min_length": 205.05, |
| "epoch": 0.7090909090909091, |
| "grad_norm": 1.92993215123609, |
| "kl": 0.01456298828125, |
| "learning_rate": 2e-07, |
| "loss": 0.013172458112239837, |
| "memory(GiB)": 113.5, |
| "reward": 0.3041666720062494, |
| "reward_std": 0.3517512962222099, |
| "rewards/MultiModalAccuracyORM/mean": 0.3041666720062494, |
| "rewards/MultiModalAccuracyORM/std": 0.3517512962222099, |
| "step": 1755, |
| "train_speed(iter/s)": 0.031659 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.016666666666666666, |
| "completions/max_length": 877.0, |
| "completions/mean_length": 508.4500274658203, |
| "completions/min_length": 241.1, |
| "epoch": 0.7111111111111111, |
| "grad_norm": 1.960501031799004, |
| "kl": 0.01246490478515625, |
| "learning_rate": 2e-07, |
| "loss": -0.01800227165222168, |
| "memory(GiB)": 113.5, |
| "reward": 0.24166667461395264, |
| "reward_std": 0.40063177347183226, |
| "rewards/MultiModalAccuracyORM/mean": 0.24166667461395264, |
| "rewards/MultiModalAccuracyORM/std": 0.40063177347183226, |
| "step": 1760, |
| "train_speed(iter/s)": 0.031653 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 679.9, |
| "completions/mean_length": 389.05833740234374, |
| "completions/min_length": 194.6, |
| "epoch": 0.7131313131313132, |
| "grad_norm": 0.06856184885436768, |
| "kl": 0.0163360595703125, |
| "learning_rate": 2e-07, |
| "loss": 0.05879574418067932, |
| "memory(GiB)": 113.5, |
| "reward": 0.45000000223517417, |
| "reward_std": 0.26600751280784607, |
| "rewards/MultiModalAccuracyORM/mean": 0.45000000223517417, |
| "rewards/MultiModalAccuracyORM/std": 0.26600751280784607, |
| "step": 1765, |
| "train_speed(iter/s)": 0.031655 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 542.8, |
| "completions/mean_length": 364.608349609375, |
| "completions/min_length": 206.9, |
| "epoch": 0.7151515151515152, |
| "grad_norm": 2.5267727464559195, |
| "kl": 0.013262939453125, |
| "learning_rate": 2e-07, |
| "loss": -0.05543935298919678, |
| "memory(GiB)": 113.5, |
| "reward": 0.4583333507180214, |
| "reward_std": 0.349611759185791, |
| "rewards/MultiModalAccuracyORM/mean": 0.4583333507180214, |
| "rewards/MultiModalAccuracyORM/std": 0.349611759185791, |
| "step": 1770, |
| "train_speed(iter/s)": 0.031665 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 620.2, |
| "completions/mean_length": 349.9750091552734, |
| "completions/min_length": 182.8, |
| "epoch": 0.7171717171717171, |
| "grad_norm": 1.8053530203955317, |
| "kl": 0.016015625, |
| "learning_rate": 2e-07, |
| "loss": 0.003249824047088623, |
| "memory(GiB)": 113.5, |
| "reward": 0.21666667535901069, |
| "reward_std": 0.36190145611763, |
| "rewards/MultiModalAccuracyORM/mean": 0.21666667535901069, |
| "rewards/MultiModalAccuracyORM/std": 0.36190145611763, |
| "step": 1775, |
| "train_speed(iter/s)": 0.031667 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 411.5, |
| "completions/mean_length": 257.55000534057615, |
| "completions/min_length": 140.9, |
| "epoch": 0.7191919191919192, |
| "grad_norm": 2.81422482443103, |
| "kl": 0.0193359375, |
| "learning_rate": 2e-07, |
| "loss": -0.02224818170070648, |
| "memory(GiB)": 113.5, |
| "reward": 0.30000000819563866, |
| "reward_std": 0.3563897281885147, |
| "rewards/MultiModalAccuracyORM/mean": 0.30000000819563866, |
| "rewards/MultiModalAccuracyORM/std": 0.3563897281885147, |
| "step": 1780, |
| "train_speed(iter/s)": 0.031687 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 481.3, |
| "completions/mean_length": 291.2166748046875, |
| "completions/min_length": 164.3, |
| "epoch": 0.7212121212121212, |
| "grad_norm": 2.8650400951164525, |
| "kl": 0.0154205322265625, |
| "learning_rate": 2e-07, |
| "loss": -0.02759958803653717, |
| "memory(GiB)": 113.5, |
| "reward": 0.21666667014360427, |
| "reward_std": 0.2892681032419205, |
| "rewards/MultiModalAccuracyORM/mean": 0.21666667014360427, |
| "rewards/MultiModalAccuracyORM/std": 0.2892681032419205, |
| "step": 1785, |
| "train_speed(iter/s)": 0.031703 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 651.8, |
| "completions/mean_length": 382.2583404541016, |
| "completions/min_length": 222.3, |
| "epoch": 0.7232323232323232, |
| "grad_norm": 0.04924378999535635, |
| "kl": 0.0165863037109375, |
| "learning_rate": 2e-07, |
| "loss": 0.002944570779800415, |
| "memory(GiB)": 113.5, |
| "reward": 0.1833333395421505, |
| "reward_std": 0.3059200614690781, |
| "rewards/MultiModalAccuracyORM/mean": 0.1833333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.3059200614690781, |
| "step": 1790, |
| "train_speed(iter/s)": 0.031714 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 550.8, |
| "completions/mean_length": 361.8166778564453, |
| "completions/min_length": 215.4, |
| "epoch": 0.7252525252525253, |
| "grad_norm": 1.0400490856507523, |
| "kl": 0.015380859375, |
| "learning_rate": 2e-07, |
| "loss": 0.0032314777374267576, |
| "memory(GiB)": 113.5, |
| "reward": 0.21666667312383653, |
| "reward_std": 0.3141998678445816, |
| "rewards/MultiModalAccuracyORM/mean": 0.21666667312383653, |
| "rewards/MultiModalAccuracyORM/std": 0.3141998678445816, |
| "step": 1795, |
| "train_speed(iter/s)": 0.031727 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 648.5, |
| "completions/mean_length": 366.5666793823242, |
| "completions/min_length": 172.7, |
| "epoch": 0.7272727272727273, |
| "grad_norm": 1.579739169824459, |
| "kl": 0.015093994140625, |
| "learning_rate": 2e-07, |
| "loss": -0.01905302405357361, |
| "memory(GiB)": 113.5, |
| "reward": 0.3166666753590107, |
| "reward_std": 0.320466023683548, |
| "rewards/MultiModalAccuracyORM/mean": 0.3166666753590107, |
| "rewards/MultiModalAccuracyORM/std": 0.320466023683548, |
| "step": 1800, |
| "train_speed(iter/s)": 0.031739 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 538.7, |
| "completions/mean_length": 319.3916732788086, |
| "completions/min_length": 186.6, |
| "epoch": 0.7292929292929293, |
| "grad_norm": 3.0687917767271022, |
| "kl": 0.0137054443359375, |
| "learning_rate": 2e-07, |
| "loss": 0.02089669108390808, |
| "memory(GiB)": 113.5, |
| "reward": 0.3666666693985462, |
| "reward_std": 0.47085520029067995, |
| "rewards/MultiModalAccuracyORM/mean": 0.3666666693985462, |
| "rewards/MultiModalAccuracyORM/std": 0.47085520029067995, |
| "step": 1805, |
| "train_speed(iter/s)": 0.031757 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 628.7, |
| "completions/mean_length": 375.0416778564453, |
| "completions/min_length": 215.7, |
| "epoch": 0.7313131313131314, |
| "grad_norm": 1.8933797302813846, |
| "kl": 0.0150848388671875, |
| "learning_rate": 2e-07, |
| "loss": 0.03909637928009033, |
| "memory(GiB)": 113.5, |
| "reward": 0.39166667610406875, |
| "reward_std": 0.34688264429569243, |
| "rewards/MultiModalAccuracyORM/mean": 0.39166667610406875, |
| "rewards/MultiModalAccuracyORM/std": 0.34688264429569243, |
| "step": 1810, |
| "train_speed(iter/s)": 0.031762 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 555.9, |
| "completions/mean_length": 305.2583404541016, |
| "completions/min_length": 157.4, |
| "epoch": 0.7333333333333333, |
| "grad_norm": 1.8206918881783931, |
| "kl": 0.0149169921875, |
| "learning_rate": 2e-07, |
| "loss": -0.016247293353080748, |
| "memory(GiB)": 113.5, |
| "reward": 0.2666666701436043, |
| "reward_std": 0.32451151609420775, |
| "rewards/MultiModalAccuracyORM/mean": 0.2666666701436043, |
| "rewards/MultiModalAccuracyORM/std": 0.32451151609420775, |
| "step": 1815, |
| "train_speed(iter/s)": 0.031772 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 502.8, |
| "completions/mean_length": 322.85000762939455, |
| "completions/min_length": 183.5, |
| "epoch": 0.7353535353535353, |
| "grad_norm": 1.918825325754338, |
| "kl": 0.01243896484375, |
| "learning_rate": 2e-07, |
| "loss": 0.010172617435455323, |
| "memory(GiB)": 113.5, |
| "reward": 0.20000000223517417, |
| "reward_std": 0.21999078392982482, |
| "rewards/MultiModalAccuracyORM/mean": 0.20000000223517417, |
| "rewards/MultiModalAccuracyORM/std": 0.21999078392982482, |
| "step": 1820, |
| "train_speed(iter/s)": 0.031781 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 442.5, |
| "completions/mean_length": 253.28334197998046, |
| "completions/min_length": 136.0, |
| "epoch": 0.7373737373737373, |
| "grad_norm": 2.5646468814628482, |
| "kl": 0.01630859375, |
| "learning_rate": 2e-07, |
| "loss": 0.08878597021102905, |
| "memory(GiB)": 113.5, |
| "reward": 0.4000000089406967, |
| "reward_std": 0.3767348140478134, |
| "rewards/MultiModalAccuracyORM/mean": 0.4000000089406967, |
| "rewards/MultiModalAccuracyORM/std": 0.3767348140478134, |
| "step": 1825, |
| "train_speed(iter/s)": 0.031805 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 786.5, |
| "completions/mean_length": 391.63334197998046, |
| "completions/min_length": 175.8, |
| "epoch": 0.7393939393939394, |
| "grad_norm": 2.279597838394587, |
| "kl": 0.016058349609375, |
| "learning_rate": 2e-07, |
| "loss": -0.01255677342414856, |
| "memory(GiB)": 113.5, |
| "reward": 0.33333333656191827, |
| "reward_std": 0.30187161862850187, |
| "rewards/MultiModalAccuracyORM/mean": 0.33333333656191827, |
| "rewards/MultiModalAccuracyORM/std": 0.30187161862850187, |
| "step": 1830, |
| "train_speed(iter/s)": 0.031799 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 601.0, |
| "completions/mean_length": 351.30834045410154, |
| "completions/min_length": 184.7, |
| "epoch": 0.7414141414141414, |
| "grad_norm": 1.220249950163537, |
| "kl": 0.014288330078125, |
| "learning_rate": 2e-07, |
| "loss": -0.03182802200317383, |
| "memory(GiB)": 113.5, |
| "reward": 0.4250000067055225, |
| "reward_std": 0.40566191971302035, |
| "rewards/MultiModalAccuracyORM/mean": 0.4250000067055225, |
| "rewards/MultiModalAccuracyORM/std": 0.40566191971302035, |
| "step": 1835, |
| "train_speed(iter/s)": 0.031813 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 500.3, |
| "completions/mean_length": 289.9583404541016, |
| "completions/min_length": 139.6, |
| "epoch": 0.7434343434343434, |
| "grad_norm": 3.7311094209711153, |
| "kl": 0.019146728515625, |
| "learning_rate": 2e-07, |
| "loss": -0.02434406876564026, |
| "memory(GiB)": 113.5, |
| "reward": 0.4333333432674408, |
| "reward_std": 0.3922538310289383, |
| "rewards/MultiModalAccuracyORM/mean": 0.4333333432674408, |
| "rewards/MultiModalAccuracyORM/std": 0.3922538310289383, |
| "step": 1840, |
| "train_speed(iter/s)": 0.031824 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 493.3, |
| "completions/mean_length": 292.6333404541016, |
| "completions/min_length": 165.6, |
| "epoch": 0.7454545454545455, |
| "grad_norm": 2.2491131974096503, |
| "kl": 0.018963623046875, |
| "learning_rate": 2e-07, |
| "loss": -0.029304242134094237, |
| "memory(GiB)": 113.5, |
| "reward": 0.4250000096857548, |
| "reward_std": 0.3370794355869293, |
| "rewards/MultiModalAccuracyORM/mean": 0.4250000096857548, |
| "rewards/MultiModalAccuracyORM/std": 0.3370794355869293, |
| "step": 1845, |
| "train_speed(iter/s)": 0.031836 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 587.7, |
| "completions/mean_length": 369.1416778564453, |
| "completions/min_length": 209.3, |
| "epoch": 0.7474747474747475, |
| "grad_norm": 1.8956894287229566, |
| "kl": 0.015484619140625, |
| "learning_rate": 2e-07, |
| "loss": 0.015110939741134644, |
| "memory(GiB)": 113.5, |
| "reward": 0.2916666716337204, |
| "reward_std": 0.4038462698459625, |
| "rewards/MultiModalAccuracyORM/mean": 0.2916666716337204, |
| "rewards/MultiModalAccuracyORM/std": 0.4038462698459625, |
| "step": 1850, |
| "train_speed(iter/s)": 0.031843 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 591.0, |
| "completions/mean_length": 357.9083419799805, |
| "completions/min_length": 162.4, |
| "epoch": 0.7494949494949495, |
| "grad_norm": 2.5484409209581504, |
| "kl": 0.0146087646484375, |
| "learning_rate": 2e-07, |
| "loss": -0.023239874839782716, |
| "memory(GiB)": 113.5, |
| "reward": 0.20000000894069672, |
| "reward_std": 0.31517534554004667, |
| "rewards/MultiModalAccuracyORM/mean": 0.20000000894069672, |
| "rewards/MultiModalAccuracyORM/std": 0.31517534554004667, |
| "step": 1855, |
| "train_speed(iter/s)": 0.031855 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 534.3, |
| "completions/mean_length": 354.6916748046875, |
| "completions/min_length": 208.8, |
| "epoch": 0.7515151515151515, |
| "grad_norm": 2.0846151526365655, |
| "kl": 0.0114715576171875, |
| "learning_rate": 2e-07, |
| "loss": 0.0073637284338474275, |
| "memory(GiB)": 113.5, |
| "reward": 0.18333333879709243, |
| "reward_std": 0.2907939374446869, |
| "rewards/MultiModalAccuracyORM/mean": 0.18333333879709243, |
| "rewards/MultiModalAccuracyORM/std": 0.2907939374446869, |
| "step": 1860, |
| "train_speed(iter/s)": 0.031861 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 490.8, |
| "completions/mean_length": 304.43334045410154, |
| "completions/min_length": 155.7, |
| "epoch": 0.7535353535353535, |
| "grad_norm": 2.0624318263809047, |
| "kl": 0.01436767578125, |
| "learning_rate": 2e-07, |
| "loss": 0.040461289882659915, |
| "memory(GiB)": 113.5, |
| "reward": 0.2250000059604645, |
| "reward_std": 0.28959646821022034, |
| "rewards/MultiModalAccuracyORM/mean": 0.2250000059604645, |
| "rewards/MultiModalAccuracyORM/std": 0.28959646821022034, |
| "step": 1865, |
| "train_speed(iter/s)": 0.031882 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 615.0, |
| "completions/mean_length": 348.55001678466795, |
| "completions/min_length": 185.6, |
| "epoch": 0.7555555555555555, |
| "grad_norm": 0.07620984486729401, |
| "kl": 0.02213134765625, |
| "learning_rate": 2e-07, |
| "loss": 0.014231646060943603, |
| "memory(GiB)": 113.5, |
| "reward": 0.1916666716337204, |
| "reward_std": 0.23860623836517333, |
| "rewards/MultiModalAccuracyORM/mean": 0.1916666716337204, |
| "rewards/MultiModalAccuracyORM/std": 0.23860623836517333, |
| "step": 1870, |
| "train_speed(iter/s)": 0.031881 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 763.2, |
| "completions/mean_length": 389.03334045410156, |
| "completions/min_length": 191.9, |
| "epoch": 0.7575757575757576, |
| "grad_norm": 2.525300571346317, |
| "kl": 0.02110443115234375, |
| "learning_rate": 2e-07, |
| "loss": 0.0036004871129989625, |
| "memory(GiB)": 113.5, |
| "reward": 0.2916666716337204, |
| "reward_std": 0.41791602075099943, |
| "rewards/MultiModalAccuracyORM/mean": 0.2916666716337204, |
| "rewards/MultiModalAccuracyORM/std": 0.41791602075099943, |
| "step": 1875, |
| "train_speed(iter/s)": 0.03188 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 509.2, |
| "completions/mean_length": 309.6250030517578, |
| "completions/min_length": 170.7, |
| "epoch": 0.7595959595959596, |
| "grad_norm": 1.8476207975789374, |
| "kl": 0.0132843017578125, |
| "learning_rate": 2e-07, |
| "loss": 0.01698073446750641, |
| "memory(GiB)": 113.5, |
| "reward": 0.1416666679084301, |
| "reward_std": 0.24939410090446473, |
| "rewards/MultiModalAccuracyORM/mean": 0.1416666679084301, |
| "rewards/MultiModalAccuracyORM/std": 0.24939410090446473, |
| "step": 1880, |
| "train_speed(iter/s)": 0.031898 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 651.4, |
| "completions/mean_length": 324.4583480834961, |
| "completions/min_length": 173.3, |
| "epoch": 0.7616161616161616, |
| "grad_norm": 2.8918258139669333, |
| "kl": 0.0152374267578125, |
| "learning_rate": 2e-07, |
| "loss": -0.01050989031791687, |
| "memory(GiB)": 113.5, |
| "reward": 0.3916666731238365, |
| "reward_std": 0.3340185970067978, |
| "rewards/MultiModalAccuracyORM/mean": 0.3916666731238365, |
| "rewards/MultiModalAccuracyORM/std": 0.3340185970067978, |
| "step": 1885, |
| "train_speed(iter/s)": 0.031901 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 763.0, |
| "completions/mean_length": 448.85834350585935, |
| "completions/min_length": 173.1, |
| "epoch": 0.7636363636363637, |
| "grad_norm": 1.4150104336871425, |
| "kl": 0.0143951416015625, |
| "learning_rate": 2e-07, |
| "loss": 0.013275668025016785, |
| "memory(GiB)": 113.5, |
| "reward": 0.2500000104308128, |
| "reward_std": 0.33704383969306945, |
| "rewards/MultiModalAccuracyORM/mean": 0.2500000104308128, |
| "rewards/MultiModalAccuracyORM/std": 0.33704383969306945, |
| "step": 1890, |
| "train_speed(iter/s)": 0.031898 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 557.5, |
| "completions/mean_length": 351.9833450317383, |
| "completions/min_length": 186.2, |
| "epoch": 0.7656565656565657, |
| "grad_norm": 1.8175161534038837, |
| "kl": 0.014886474609375, |
| "learning_rate": 2e-07, |
| "loss": -0.021983048319816588, |
| "memory(GiB)": 113.5, |
| "reward": 0.2750000089406967, |
| "reward_std": 0.30795769989490507, |
| "rewards/MultiModalAccuracyORM/mean": 0.2750000089406967, |
| "rewards/MultiModalAccuracyORM/std": 0.30795769989490507, |
| "step": 1895, |
| "train_speed(iter/s)": 0.031911 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 515.1, |
| "completions/mean_length": 327.0833404541016, |
| "completions/min_length": 144.5, |
| "epoch": 0.7676767676767676, |
| "grad_norm": 2.5158628276606025, |
| "kl": 0.0138824462890625, |
| "learning_rate": 2e-07, |
| "loss": 0.03910906314849853, |
| "memory(GiB)": 113.5, |
| "reward": 0.30833333656191825, |
| "reward_std": 0.3422983974218369, |
| "rewards/MultiModalAccuracyORM/mean": 0.30833333656191825, |
| "rewards/MultiModalAccuracyORM/std": 0.3422983974218369, |
| "step": 1900, |
| "train_speed(iter/s)": 0.031926 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 696.3, |
| "completions/mean_length": 446.5250076293945, |
| "completions/min_length": 241.6, |
| "epoch": 0.7696969696969697, |
| "grad_norm": 2.2012277452389415, |
| "kl": 0.026849365234375, |
| "learning_rate": 2e-07, |
| "loss": 0.0031028717756271364, |
| "memory(GiB)": 113.5, |
| "reward": 0.1916666716337204, |
| "reward_std": 0.2526139706373215, |
| "rewards/MultiModalAccuracyORM/mean": 0.1916666716337204, |
| "rewards/MultiModalAccuracyORM/std": 0.2526139706373215, |
| "step": 1905, |
| "train_speed(iter/s)": 0.031932 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 578.5, |
| "completions/mean_length": 332.84167633056643, |
| "completions/min_length": 201.7, |
| "epoch": 0.7717171717171717, |
| "grad_norm": 1.9126113362129455, |
| "kl": 0.0396942138671875, |
| "learning_rate": 2e-07, |
| "loss": -0.03872146010398865, |
| "memory(GiB)": 113.5, |
| "reward": 0.28333334252238274, |
| "reward_std": 0.22631654143333435, |
| "rewards/MultiModalAccuracyORM/mean": 0.28333334252238274, |
| "rewards/MultiModalAccuracyORM/std": 0.22631654143333435, |
| "step": 1910, |
| "train_speed(iter/s)": 0.031948 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 675.2, |
| "completions/mean_length": 324.3500061035156, |
| "completions/min_length": 168.6, |
| "epoch": 0.7737373737373737, |
| "grad_norm": 3.0923030780646883, |
| "kl": 0.0193115234375, |
| "learning_rate": 2e-07, |
| "loss": -0.00021869316697120667, |
| "memory(GiB)": 113.5, |
| "reward": 0.21666667312383653, |
| "reward_std": 0.3495877593755722, |
| "rewards/MultiModalAccuracyORM/mean": 0.21666667312383653, |
| "rewards/MultiModalAccuracyORM/std": 0.3495877593755722, |
| "step": 1915, |
| "train_speed(iter/s)": 0.031953 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 851.2, |
| "completions/mean_length": 429.6666732788086, |
| "completions/min_length": 246.7, |
| "epoch": 0.7757575757575758, |
| "grad_norm": 1.4230705183827115, |
| "kl": 0.0119659423828125, |
| "learning_rate": 2e-07, |
| "loss": -0.007732442766427994, |
| "memory(GiB)": 113.5, |
| "reward": 0.28333334177732467, |
| "reward_std": 0.3922538310289383, |
| "rewards/MultiModalAccuracyORM/mean": 0.28333334177732467, |
| "rewards/MultiModalAccuracyORM/std": 0.3922538310289383, |
| "step": 1920, |
| "train_speed(iter/s)": 0.03195 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 578.1, |
| "completions/mean_length": 327.52501220703124, |
| "completions/min_length": 145.3, |
| "epoch": 0.7777777777777778, |
| "grad_norm": 2.492778960496138, |
| "kl": 0.0209716796875, |
| "learning_rate": 2e-07, |
| "loss": 0.058314287662506105, |
| "memory(GiB)": 113.5, |
| "reward": 0.3416666746139526, |
| "reward_std": 0.3370794355869293, |
| "rewards/MultiModalAccuracyORM/mean": 0.3416666746139526, |
| "rewards/MultiModalAccuracyORM/std": 0.3370794355869293, |
| "step": 1925, |
| "train_speed(iter/s)": 0.031957 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 559.2, |
| "completions/mean_length": 314.4666748046875, |
| "completions/min_length": 159.8, |
| "epoch": 0.7797979797979798, |
| "grad_norm": 1.3216256644694324, |
| "kl": 0.019390869140625, |
| "learning_rate": 2e-07, |
| "loss": 0.003662779927253723, |
| "memory(GiB)": 113.5, |
| "reward": 0.40000000447034834, |
| "reward_std": 0.19031869769096374, |
| "rewards/MultiModalAccuracyORM/mean": 0.40000000447034834, |
| "rewards/MultiModalAccuracyORM/std": 0.19031869769096374, |
| "step": 1930, |
| "train_speed(iter/s)": 0.031969 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 614.5, |
| "completions/mean_length": 340.2666778564453, |
| "completions/min_length": 194.2, |
| "epoch": 0.7818181818181819, |
| "grad_norm": 0.16139191599066427, |
| "kl": 0.0214599609375, |
| "learning_rate": 2e-07, |
| "loss": -0.047375884652137754, |
| "memory(GiB)": 113.5, |
| "reward": 0.5166666835546494, |
| "reward_std": 0.33453335165977477, |
| "rewards/MultiModalAccuracyORM/mean": 0.5166666835546494, |
| "rewards/MultiModalAccuracyORM/std": 0.33453335165977477, |
| "step": 1935, |
| "train_speed(iter/s)": 0.031969 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 647.0, |
| "completions/mean_length": 385.9500137329102, |
| "completions/min_length": 210.7, |
| "epoch": 0.7838383838383839, |
| "grad_norm": 1.6715004236392428, |
| "kl": 0.0131805419921875, |
| "learning_rate": 2e-07, |
| "loss": -0.010814064741134643, |
| "memory(GiB)": 113.5, |
| "reward": 0.2250000037252903, |
| "reward_std": 0.2325587034225464, |
| "rewards/MultiModalAccuracyORM/mean": 0.2250000037252903, |
| "rewards/MultiModalAccuracyORM/std": 0.2325587034225464, |
| "step": 1940, |
| "train_speed(iter/s)": 0.031974 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 803.8, |
| "completions/mean_length": 441.1000091552734, |
| "completions/min_length": 249.1, |
| "epoch": 0.7858585858585858, |
| "grad_norm": 1.1825903834954647, |
| "kl": 0.0154388427734375, |
| "learning_rate": 2e-07, |
| "loss": 0.0033442020416259766, |
| "memory(GiB)": 113.5, |
| "reward": 0.20833333656191827, |
| "reward_std": 0.2938903748989105, |
| "rewards/MultiModalAccuracyORM/mean": 0.20833333656191827, |
| "rewards/MultiModalAccuracyORM/std": 0.2938903748989105, |
| "step": 1945, |
| "train_speed(iter/s)": 0.031969 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 559.7, |
| "completions/mean_length": 325.9666748046875, |
| "completions/min_length": 175.2, |
| "epoch": 0.7878787878787878, |
| "grad_norm": 1.0389835461828303, |
| "kl": 0.029302978515625, |
| "learning_rate": 2e-07, |
| "loss": 0.0020487613976001738, |
| "memory(GiB)": 113.5, |
| "reward": 0.31666667461395265, |
| "reward_std": 0.2074468642473221, |
| "rewards/MultiModalAccuracyORM/mean": 0.31666667461395265, |
| "rewards/MultiModalAccuracyORM/std": 0.2074468642473221, |
| "step": 1950, |
| "train_speed(iter/s)": 0.031978 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 779.1, |
| "completions/mean_length": 420.0750106811523, |
| "completions/min_length": 233.8, |
| "epoch": 0.7898989898989899, |
| "grad_norm": 2.259966046846081, |
| "kl": 0.0141693115234375, |
| "learning_rate": 2e-07, |
| "loss": 0.0017102479934692383, |
| "memory(GiB)": 113.5, |
| "reward": 0.09166666939854622, |
| "reward_std": 0.18332210481166838, |
| "rewards/MultiModalAccuracyORM/mean": 0.09166666939854622, |
| "rewards/MultiModalAccuracyORM/std": 0.18332210481166838, |
| "step": 1955, |
| "train_speed(iter/s)": 0.031981 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 633.8, |
| "completions/mean_length": 396.0333450317383, |
| "completions/min_length": 206.8, |
| "epoch": 0.7919191919191919, |
| "grad_norm": 2.0736200850407713, |
| "kl": 0.0152496337890625, |
| "learning_rate": 2e-07, |
| "loss": -0.005099079012870789, |
| "memory(GiB)": 113.5, |
| "reward": 0.5583333425223828, |
| "reward_std": 0.28784283697605134, |
| "rewards/MultiModalAccuracyORM/mean": 0.5583333425223828, |
| "rewards/MultiModalAccuracyORM/std": 0.28784283697605134, |
| "step": 1960, |
| "train_speed(iter/s)": 0.031988 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 954.2, |
| "completions/mean_length": 473.5583465576172, |
| "completions/min_length": 247.3, |
| "epoch": 0.793939393939394, |
| "grad_norm": 2.157212917514597, |
| "kl": 0.0150390625, |
| "learning_rate": 2e-07, |
| "loss": 0.024318861961364745, |
| "memory(GiB)": 113.5, |
| "reward": 0.21666667535901069, |
| "reward_std": 0.36190145611763, |
| "rewards/MultiModalAccuracyORM/mean": 0.21666667535901069, |
| "rewards/MultiModalAccuracyORM/std": 0.36190145611763, |
| "step": 1965, |
| "train_speed(iter/s)": 0.031976 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 702.4, |
| "completions/mean_length": 456.8000152587891, |
| "completions/min_length": 269.1, |
| "epoch": 0.795959595959596, |
| "grad_norm": 2.346804141928421, |
| "kl": 0.0154296875, |
| "learning_rate": 2e-07, |
| "loss": 0.011195459961891174, |
| "memory(GiB)": 113.5, |
| "reward": 0.1500000037252903, |
| "reward_std": 0.25897532403469087, |
| "rewards/MultiModalAccuracyORM/mean": 0.1500000037252903, |
| "rewards/MultiModalAccuracyORM/std": 0.25897532403469087, |
| "step": 1970, |
| "train_speed(iter/s)": 0.031975 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 649.7, |
| "completions/mean_length": 426.95000610351565, |
| "completions/min_length": 288.6, |
| "epoch": 0.797979797979798, |
| "grad_norm": 1.937444109706918, |
| "kl": 0.012738037109375, |
| "learning_rate": 2e-07, |
| "loss": 0.050849252939224245, |
| "memory(GiB)": 113.5, |
| "reward": 0.33333334401249887, |
| "reward_std": 0.35569489002227783, |
| "rewards/MultiModalAccuracyORM/mean": 0.33333334401249887, |
| "rewards/MultiModalAccuracyORM/std": 0.35569489002227783, |
| "step": 1975, |
| "train_speed(iter/s)": 0.031976 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 537.9, |
| "completions/mean_length": 308.3166732788086, |
| "completions/min_length": 159.5, |
| "epoch": 0.8, |
| "grad_norm": 1.2310292555448101, |
| "kl": 0.01746826171875, |
| "learning_rate": 2e-07, |
| "loss": 0.021820831298828124, |
| "memory(GiB)": 113.5, |
| "reward": 0.25000000149011614, |
| "reward_std": 0.34010172784328463, |
| "rewards/MultiModalAccuracyORM/mean": 0.25000000149011614, |
| "rewards/MultiModalAccuracyORM/std": 0.34010172784328463, |
| "step": 1980, |
| "train_speed(iter/s)": 0.031988 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 607.3, |
| "completions/mean_length": 397.74167861938474, |
| "completions/min_length": 229.1, |
| "epoch": 0.802020202020202, |
| "grad_norm": 1.2006546705713226, |
| "kl": 0.012060546875, |
| "learning_rate": 2e-07, |
| "loss": -0.00946882963180542, |
| "memory(GiB)": 113.5, |
| "reward": 0.3666666768491268, |
| "reward_std": 0.21775851845741273, |
| "rewards/MultiModalAccuracyORM/mean": 0.3666666768491268, |
| "rewards/MultiModalAccuracyORM/std": 0.21775851845741273, |
| "step": 1985, |
| "train_speed(iter/s)": 0.031999 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 689.0, |
| "completions/mean_length": 455.6916870117187, |
| "completions/min_length": 271.2, |
| "epoch": 0.804040404040404, |
| "grad_norm": 1.7247663724146078, |
| "kl": 0.0120758056640625, |
| "learning_rate": 2e-07, |
| "loss": -0.013834655284881592, |
| "memory(GiB)": 113.5, |
| "reward": 0.2916666746139526, |
| "reward_std": 0.34933353662490846, |
| "rewards/MultiModalAccuracyORM/mean": 0.2916666746139526, |
| "rewards/MultiModalAccuracyORM/std": 0.34933353662490846, |
| "step": 1990, |
| "train_speed(iter/s)": 0.032003 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 614.3, |
| "completions/mean_length": 388.1000122070312, |
| "completions/min_length": 220.5, |
| "epoch": 0.806060606060606, |
| "grad_norm": 1.2751536443809328, |
| "kl": 0.0211517333984375, |
| "learning_rate": 2e-07, |
| "loss": 0.026651501655578613, |
| "memory(GiB)": 113.5, |
| "reward": 0.2750000096857548, |
| "reward_std": 0.29452561140060424, |
| "rewards/MultiModalAccuracyORM/mean": 0.2750000096857548, |
| "rewards/MultiModalAccuracyORM/std": 0.29452561140060424, |
| "step": 1995, |
| "train_speed(iter/s)": 0.032003 |
| }, |
| { |
| "epoch": 0.8080808080808081, |
| "grad_norm": 2.53993588975996, |
| "learning_rate": 2e-07, |
| "loss": 0.008918963372707367, |
| "memory(GiB)": 113.5, |
| "step": 2000, |
| "train_speed(iter/s)": 0.032019 |
| }, |
| { |
| "epoch": 0.8080808080808081, |
| "eval_clip_ratio": 0.0, |
| "eval_completions/clipped_ratio": 0.0, |
| "eval_completions/max_length": 640.5, |
| "eval_completions/mean_length": 393.71500930786135, |
| "eval_completions/min_length": 218.08, |
| "eval_kl": 0.01480712890625, |
| "eval_loss": 0.023003682494163513, |
| "eval_reward": 0.30333334133028983, |
| "eval_reward_std": 0.2836029249429703, |
| "eval_rewards/MultiModalAccuracyORM/mean": 0.30333334133028983, |
| "eval_rewards/MultiModalAccuracyORM/std": 0.2836029249429703, |
| "eval_runtime": 625.7559, |
| "eval_samples_per_second": 0.08, |
| "eval_steps_per_second": 0.008, |
| "step": 2000 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 671.1, |
| "completions/mean_length": 439.12501831054686, |
| "completions/min_length": 245.5, |
| "epoch": 0.8101010101010101, |
| "grad_norm": 1.574060720208308, |
| "kl": 0.01459503173828125, |
| "learning_rate": 2e-07, |
| "loss": -0.005982875823974609, |
| "memory(GiB)": 113.5, |
| "reward": 0.33333334103226664, |
| "reward_std": 0.3096754729747772, |
| "rewards/MultiModalAccuracyORM/mean": 0.33333334103226664, |
| "rewards/MultiModalAccuracyORM/std": 0.3096754729747772, |
| "step": 2005, |
| "train_speed(iter/s)": 0.031605 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 514.3, |
| "completions/mean_length": 312.4916702270508, |
| "completions/min_length": 184.5, |
| "epoch": 0.8121212121212121, |
| "grad_norm": 1.8875313816028536, |
| "kl": 0.01746826171875, |
| "learning_rate": 2e-07, |
| "loss": 0.04548422992229462, |
| "memory(GiB)": 113.5, |
| "reward": 0.4666666842997074, |
| "reward_std": 0.4252053827047348, |
| "rewards/MultiModalAccuracyORM/mean": 0.4666666842997074, |
| "rewards/MultiModalAccuracyORM/std": 0.4252053827047348, |
| "step": 2010, |
| "train_speed(iter/s)": 0.031626 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 527.2, |
| "completions/mean_length": 329.9166778564453, |
| "completions/min_length": 202.0, |
| "epoch": 0.8141414141414142, |
| "grad_norm": 1.858641750452265, |
| "kl": 0.0157196044921875, |
| "learning_rate": 2e-07, |
| "loss": 0.023762321472167967, |
| "memory(GiB)": 113.5, |
| "reward": 0.40000001043081285, |
| "reward_std": 0.3144780844449997, |
| "rewards/MultiModalAccuracyORM/mean": 0.40000001043081285, |
| "rewards/MultiModalAccuracyORM/std": 0.3144780844449997, |
| "step": 2015, |
| "train_speed(iter/s)": 0.031642 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 684.0, |
| "completions/mean_length": 422.71667633056643, |
| "completions/min_length": 200.3, |
| "epoch": 0.8161616161616162, |
| "grad_norm": 3.0722357868631334, |
| "kl": 0.0186279296875, |
| "learning_rate": 2e-07, |
| "loss": -0.03257267475128174, |
| "memory(GiB)": 113.5, |
| "reward": 0.32500000968575476, |
| "reward_std": 0.4204265087842941, |
| "rewards/MultiModalAccuracyORM/mean": 0.32500000968575476, |
| "rewards/MultiModalAccuracyORM/std": 0.4204265087842941, |
| "step": 2020, |
| "train_speed(iter/s)": 0.031653 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 590.7, |
| "completions/mean_length": 355.5083511352539, |
| "completions/min_length": 214.7, |
| "epoch": 0.8181818181818182, |
| "grad_norm": 2.729236730716231, |
| "kl": 0.022601318359375, |
| "learning_rate": 2e-07, |
| "loss": -0.003387349843978882, |
| "memory(GiB)": 113.5, |
| "reward": 0.4250000074505806, |
| "reward_std": 0.45008404850959777, |
| "rewards/MultiModalAccuracyORM/mean": 0.4250000074505806, |
| "rewards/MultiModalAccuracyORM/std": 0.45008404850959777, |
| "step": 2025, |
| "train_speed(iter/s)": 0.031658 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 653.6, |
| "completions/mean_length": 421.4000183105469, |
| "completions/min_length": 253.0, |
| "epoch": 0.8202020202020202, |
| "grad_norm": 1.744543874583184, |
| "kl": 0.0112030029296875, |
| "learning_rate": 2e-07, |
| "loss": -0.013242574036121368, |
| "memory(GiB)": 113.5, |
| "reward": 0.1083333358168602, |
| "reward_std": 0.29628167152404783, |
| "rewards/MultiModalAccuracyORM/mean": 0.1083333358168602, |
| "rewards/MultiModalAccuracyORM/std": 0.29628167152404783, |
| "step": 2030, |
| "train_speed(iter/s)": 0.031663 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 570.7, |
| "completions/mean_length": 348.20834197998045, |
| "completions/min_length": 219.4, |
| "epoch": 0.8222222222222222, |
| "grad_norm": 1.3474739820299675, |
| "kl": 0.018927001953125, |
| "learning_rate": 2e-07, |
| "loss": 0.04633485376834869, |
| "memory(GiB)": 113.5, |
| "reward": 0.37500000596046446, |
| "reward_std": 0.27622397541999816, |
| "rewards/MultiModalAccuracyORM/mean": 0.37500000596046446, |
| "rewards/MultiModalAccuracyORM/std": 0.27622397541999816, |
| "step": 2035, |
| "train_speed(iter/s)": 0.031672 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 589.4, |
| "completions/mean_length": 350.50000915527346, |
| "completions/min_length": 204.4, |
| "epoch": 0.8242424242424242, |
| "grad_norm": 1.5018646106657063, |
| "kl": 0.019403076171875, |
| "learning_rate": 2e-07, |
| "loss": 0.030666446685791014, |
| "memory(GiB)": 113.5, |
| "reward": 0.49166667014360427, |
| "reward_std": 0.32050161957740786, |
| "rewards/MultiModalAccuracyORM/mean": 0.49166667014360427, |
| "rewards/MultiModalAccuracyORM/std": 0.32050161957740786, |
| "step": 2040, |
| "train_speed(iter/s)": 0.031681 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 580.7, |
| "completions/mean_length": 343.3500045776367, |
| "completions/min_length": 214.3, |
| "epoch": 0.8262626262626263, |
| "grad_norm": 1.212062767454231, |
| "kl": 0.0136077880859375, |
| "learning_rate": 2e-07, |
| "loss": 0.00010424554347991944, |
| "memory(GiB)": 113.5, |
| "reward": 0.2833333402872086, |
| "reward_std": 0.3485885590314865, |
| "rewards/MultiModalAccuracyORM/mean": 0.2833333402872086, |
| "rewards/MultiModalAccuracyORM/std": 0.3485885590314865, |
| "step": 2045, |
| "train_speed(iter/s)": 0.031692 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 597.0, |
| "completions/mean_length": 329.8250106811523, |
| "completions/min_length": 163.3, |
| "epoch": 0.8282828282828283, |
| "grad_norm": 1.4699358421537125, |
| "kl": 0.015545654296875, |
| "learning_rate": 2e-07, |
| "loss": 0.02045893669128418, |
| "memory(GiB)": 113.5, |
| "reward": 0.23333333656191826, |
| "reward_std": 0.21999078392982482, |
| "rewards/MultiModalAccuracyORM/mean": 0.23333333656191826, |
| "rewards/MultiModalAccuracyORM/std": 0.21999078392982482, |
| "step": 2050, |
| "train_speed(iter/s)": 0.031697 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 470.3, |
| "completions/mean_length": 291.33334197998045, |
| "completions/min_length": 162.8, |
| "epoch": 0.8303030303030303, |
| "grad_norm": 1.4524213577819918, |
| "kl": 0.0140533447265625, |
| "learning_rate": 2e-07, |
| "loss": 0.008110976219177246, |
| "memory(GiB)": 113.5, |
| "reward": 0.3166666738688946, |
| "reward_std": 0.20369119048118592, |
| "rewards/MultiModalAccuracyORM/mean": 0.3166666738688946, |
| "rewards/MultiModalAccuracyORM/std": 0.20369119048118592, |
| "step": 2055, |
| "train_speed(iter/s)": 0.031712 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 486.6, |
| "completions/mean_length": 289.6416732788086, |
| "completions/min_length": 159.3, |
| "epoch": 0.8323232323232324, |
| "grad_norm": 4.225291158056462, |
| "kl": 0.0178955078125, |
| "learning_rate": 2e-07, |
| "loss": 0.025725898146629334, |
| "memory(GiB)": 113.5, |
| "reward": 0.20000000447034835, |
| "reward_std": 0.29414459466934206, |
| "rewards/MultiModalAccuracyORM/mean": 0.20000000447034835, |
| "rewards/MultiModalAccuracyORM/std": 0.29414459466934206, |
| "step": 2060, |
| "train_speed(iter/s)": 0.031722 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 478.6, |
| "completions/mean_length": 311.5416793823242, |
| "completions/min_length": 190.1, |
| "epoch": 0.8343434343434344, |
| "grad_norm": 2.6613790818964134, |
| "kl": 0.021978759765625, |
| "learning_rate": 2e-07, |
| "loss": 0.006576963514089584, |
| "memory(GiB)": 113.5, |
| "reward": 0.2916666753590107, |
| "reward_std": 0.40155683159828187, |
| "rewards/MultiModalAccuracyORM/mean": 0.2916666753590107, |
| "rewards/MultiModalAccuracyORM/std": 0.40155683159828187, |
| "step": 2065, |
| "train_speed(iter/s)": 0.03173 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 609.6, |
| "completions/mean_length": 315.6750038146973, |
| "completions/min_length": 170.0, |
| "epoch": 0.8363636363636363, |
| "grad_norm": 1.5051205676406512, |
| "kl": 0.0233642578125, |
| "learning_rate": 2e-07, |
| "loss": 0.09363476037979127, |
| "memory(GiB)": 113.5, |
| "reward": 0.508333345502615, |
| "reward_std": 0.2822715103626251, |
| "rewards/MultiModalAccuracyORM/mean": 0.508333345502615, |
| "rewards/MultiModalAccuracyORM/std": 0.2822715103626251, |
| "step": 2070, |
| "train_speed(iter/s)": 0.03174 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 633.5, |
| "completions/mean_length": 375.75000762939453, |
| "completions/min_length": 224.4, |
| "epoch": 0.8383838383838383, |
| "grad_norm": 1.8057057513107744, |
| "kl": 0.018963623046875, |
| "learning_rate": 2e-07, |
| "loss": -0.023636098206043243, |
| "memory(GiB)": 113.5, |
| "reward": 0.4000000111758709, |
| "reward_std": 0.33306954205036166, |
| "rewards/MultiModalAccuracyORM/mean": 0.4000000111758709, |
| "rewards/MultiModalAccuracyORM/std": 0.33306954205036166, |
| "step": 2075, |
| "train_speed(iter/s)": 0.031751 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 704.7, |
| "completions/mean_length": 414.0416793823242, |
| "completions/min_length": 194.4, |
| "epoch": 0.8404040404040404, |
| "grad_norm": 1.7580953588231485, |
| "kl": 0.015985107421875, |
| "learning_rate": 2e-07, |
| "loss": 0.004860112071037292, |
| "memory(GiB)": 113.5, |
| "reward": 0.33333334028720857, |
| "reward_std": 0.30333785712718964, |
| "rewards/MultiModalAccuracyORM/mean": 0.33333334028720857, |
| "rewards/MultiModalAccuracyORM/std": 0.30333785712718964, |
| "step": 2080, |
| "train_speed(iter/s)": 0.031761 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 591.9, |
| "completions/mean_length": 372.9250122070313, |
| "completions/min_length": 192.9, |
| "epoch": 0.8424242424242424, |
| "grad_norm": 1.6888227745633726, |
| "kl": 0.018841552734375, |
| "learning_rate": 2e-07, |
| "loss": 0.0038746654987335204, |
| "memory(GiB)": 113.5, |
| "reward": 0.40000001564621923, |
| "reward_std": 0.3948384612798691, |
| "rewards/MultiModalAccuracyORM/mean": 0.40000001564621923, |
| "rewards/MultiModalAccuracyORM/std": 0.3948384612798691, |
| "step": 2085, |
| "train_speed(iter/s)": 0.031778 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 547.5, |
| "completions/mean_length": 359.8833480834961, |
| "completions/min_length": 211.2, |
| "epoch": 0.8444444444444444, |
| "grad_norm": 2.0473894442291605, |
| "kl": 0.0135040283203125, |
| "learning_rate": 2e-07, |
| "loss": -0.005132901668548584, |
| "memory(GiB)": 113.5, |
| "reward": 0.4833333469927311, |
| "reward_std": 0.38904850780963895, |
| "rewards/MultiModalAccuracyORM/mean": 0.4833333469927311, |
| "rewards/MultiModalAccuracyORM/std": 0.38904850780963895, |
| "step": 2090, |
| "train_speed(iter/s)": 0.031799 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 740.2, |
| "completions/mean_length": 453.96668243408203, |
| "completions/min_length": 251.3, |
| "epoch": 0.8464646464646465, |
| "grad_norm": 1.9528281428716412, |
| "kl": 0.015765380859375, |
| "learning_rate": 2e-07, |
| "loss": -0.00459083616733551, |
| "memory(GiB)": 113.5, |
| "reward": 0.3416666783392429, |
| "reward_std": 0.4211809396743774, |
| "rewards/MultiModalAccuracyORM/mean": 0.3416666783392429, |
| "rewards/MultiModalAccuracyORM/std": 0.4211809396743774, |
| "step": 2095, |
| "train_speed(iter/s)": 0.031796 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 640.1, |
| "completions/mean_length": 360.11667785644534, |
| "completions/min_length": 181.9, |
| "epoch": 0.8484848484848485, |
| "grad_norm": 1.6610621083165809, |
| "kl": 0.0178131103515625, |
| "learning_rate": 2e-07, |
| "loss": 0.0021423667669296263, |
| "memory(GiB)": 113.5, |
| "reward": 0.31666667237877844, |
| "reward_std": 0.33000870048999786, |
| "rewards/MultiModalAccuracyORM/mean": 0.31666667237877844, |
| "rewards/MultiModalAccuracyORM/std": 0.33000870048999786, |
| "step": 2100, |
| "train_speed(iter/s)": 0.031806 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 480.3, |
| "completions/mean_length": 264.67501068115234, |
| "completions/min_length": 136.9, |
| "epoch": 0.8505050505050505, |
| "grad_norm": 2.0696467764003192, |
| "kl": 0.163067626953125, |
| "learning_rate": 2e-07, |
| "loss": 0.0025389432907104493, |
| "memory(GiB)": 113.5, |
| "reward": 0.5000000096857548, |
| "reward_std": 0.22625694572925567, |
| "rewards/MultiModalAccuracyORM/mean": 0.5000000096857548, |
| "rewards/MultiModalAccuracyORM/std": 0.22625694572925567, |
| "step": 2105, |
| "train_speed(iter/s)": 0.03182 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 529.7, |
| "completions/mean_length": 341.6750091552734, |
| "completions/min_length": 188.4, |
| "epoch": 0.8525252525252526, |
| "grad_norm": 1.7692403149903426, |
| "kl": 0.0196319580078125, |
| "learning_rate": 2e-07, |
| "loss": 0.016690313816070557, |
| "memory(GiB)": 113.5, |
| "reward": 0.3583333343267441, |
| "reward_std": 0.21292004883289337, |
| "rewards/MultiModalAccuracyORM/mean": 0.3583333343267441, |
| "rewards/MultiModalAccuracyORM/std": 0.21292004883289337, |
| "step": 2110, |
| "train_speed(iter/s)": 0.031829 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 717.9, |
| "completions/mean_length": 410.6416839599609, |
| "completions/min_length": 215.5, |
| "epoch": 0.8545454545454545, |
| "grad_norm": 1.0155490841614827, |
| "kl": 0.0239013671875, |
| "learning_rate": 2e-07, |
| "loss": 0.06116962432861328, |
| "memory(GiB)": 113.5, |
| "reward": 0.3000000067055225, |
| "reward_std": 0.3330695390701294, |
| "rewards/MultiModalAccuracyORM/mean": 0.3000000067055225, |
| "rewards/MultiModalAccuracyORM/std": 0.3330695390701294, |
| "step": 2115, |
| "train_speed(iter/s)": 0.031834 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 756.7, |
| "completions/mean_length": 427.03334350585936, |
| "completions/min_length": 240.2, |
| "epoch": 0.8565656565656565, |
| "grad_norm": 1.5010329222185153, |
| "kl": 0.0179931640625, |
| "learning_rate": 2e-07, |
| "loss": 0.008207672834396362, |
| "memory(GiB)": 113.5, |
| "reward": 0.3000000067055225, |
| "reward_std": 0.26822818219661715, |
| "rewards/MultiModalAccuracyORM/mean": 0.3000000067055225, |
| "rewards/MultiModalAccuracyORM/std": 0.26822818219661715, |
| "step": 2120, |
| "train_speed(iter/s)": 0.031834 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 474.0, |
| "completions/mean_length": 278.35834350585935, |
| "completions/min_length": 133.5, |
| "epoch": 0.8585858585858586, |
| "grad_norm": 2.4751168878714296, |
| "kl": 0.0186279296875, |
| "learning_rate": 2e-07, |
| "loss": 0.002880534529685974, |
| "memory(GiB)": 113.5, |
| "reward": 0.44166667833924295, |
| "reward_std": 0.26897315979003905, |
| "rewards/MultiModalAccuracyORM/mean": 0.44166667833924295, |
| "rewards/MultiModalAccuracyORM/std": 0.26897315979003905, |
| "step": 2125, |
| "train_speed(iter/s)": 0.03185 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 778.6, |
| "completions/mean_length": 397.366682434082, |
| "completions/min_length": 238.8, |
| "epoch": 0.8606060606060606, |
| "grad_norm": 2.5358484406016406, |
| "kl": 0.024908447265625, |
| "learning_rate": 2e-07, |
| "loss": -0.008894717693328858, |
| "memory(GiB)": 113.5, |
| "reward": 0.4166666753590107, |
| "reward_std": 0.39010730385780334, |
| "rewards/MultiModalAccuracyORM/mean": 0.4166666753590107, |
| "rewards/MultiModalAccuracyORM/std": 0.39010730385780334, |
| "step": 2130, |
| "train_speed(iter/s)": 0.031849 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 571.6, |
| "completions/mean_length": 347.92500762939454, |
| "completions/min_length": 210.0, |
| "epoch": 0.8626262626262626, |
| "grad_norm": 1.4480874521635712, |
| "kl": 0.013836669921875, |
| "learning_rate": 2e-07, |
| "loss": -0.02624996304512024, |
| "memory(GiB)": 113.5, |
| "reward": 0.24166666865348815, |
| "reward_std": 0.2815766751766205, |
| "rewards/MultiModalAccuracyORM/mean": 0.24166666865348815, |
| "rewards/MultiModalAccuracyORM/std": 0.2815766751766205, |
| "step": 2135, |
| "train_speed(iter/s)": 0.031857 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 792.0, |
| "completions/mean_length": 399.3750129699707, |
| "completions/min_length": 233.7, |
| "epoch": 0.8646464646464647, |
| "grad_norm": 2.3120304434709595, |
| "kl": 0.01986083984375, |
| "learning_rate": 2e-07, |
| "loss": -0.004719728231430053, |
| "memory(GiB)": 113.5, |
| "reward": 0.22500000149011612, |
| "reward_std": 0.22384164929389955, |
| "rewards/MultiModalAccuracyORM/mean": 0.22500000149011612, |
| "rewards/MultiModalAccuracyORM/std": 0.22384164929389955, |
| "step": 2140, |
| "train_speed(iter/s)": 0.03185 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 787.2, |
| "completions/mean_length": 441.508349609375, |
| "completions/min_length": 236.5, |
| "epoch": 0.8666666666666667, |
| "grad_norm": 2.17037271282662, |
| "kl": 0.01793212890625, |
| "learning_rate": 2e-07, |
| "loss": -0.012784427404403687, |
| "memory(GiB)": 113.5, |
| "reward": 0.23333333432674408, |
| "reward_std": 0.2581467509269714, |
| "rewards/MultiModalAccuracyORM/mean": 0.23333333432674408, |
| "rewards/MultiModalAccuracyORM/std": 0.2581467509269714, |
| "step": 2145, |
| "train_speed(iter/s)": 0.03185 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 453.1, |
| "completions/mean_length": 304.8583419799805, |
| "completions/min_length": 197.2, |
| "epoch": 0.8686868686868687, |
| "grad_norm": 2.4684483286798313, |
| "kl": 0.018658447265625, |
| "learning_rate": 2e-07, |
| "loss": -0.013285607099533081, |
| "memory(GiB)": 113.5, |
| "reward": 0.4250000067055225, |
| "reward_std": 0.3696640759706497, |
| "rewards/MultiModalAccuracyORM/mean": 0.4250000067055225, |
| "rewards/MultiModalAccuracyORM/std": 0.3696640759706497, |
| "step": 2150, |
| "train_speed(iter/s)": 0.031865 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 579.6, |
| "completions/mean_length": 357.44168243408205, |
| "completions/min_length": 206.1, |
| "epoch": 0.8707070707070707, |
| "grad_norm": 2.4866065792724794, |
| "kl": 0.01856689453125, |
| "learning_rate": 2e-07, |
| "loss": -0.014015734195709229, |
| "memory(GiB)": 113.5, |
| "reward": 0.23333334177732468, |
| "reward_std": 0.24436976611614228, |
| "rewards/MultiModalAccuracyORM/mean": 0.23333334177732468, |
| "rewards/MultiModalAccuracyORM/std": 0.24436976611614228, |
| "step": 2155, |
| "train_speed(iter/s)": 0.03187 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 542.2, |
| "completions/mean_length": 316.7916778564453, |
| "completions/min_length": 156.8, |
| "epoch": 0.8727272727272727, |
| "grad_norm": 0.10342060356020474, |
| "kl": 0.019561767578125, |
| "learning_rate": 2e-07, |
| "loss": 0.015072919428348541, |
| "memory(GiB)": 113.5, |
| "reward": 0.5250000067055225, |
| "reward_std": 0.23303491175174712, |
| "rewards/MultiModalAccuracyORM/mean": 0.5250000067055225, |
| "rewards/MultiModalAccuracyORM/std": 0.23303491175174712, |
| "step": 2160, |
| "train_speed(iter/s)": 0.031885 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 591.5, |
| "completions/mean_length": 359.40834350585936, |
| "completions/min_length": 213.1, |
| "epoch": 0.8747474747474747, |
| "grad_norm": 2.001531798018373, |
| "kl": 0.02108154296875, |
| "learning_rate": 2e-07, |
| "loss": 0.03580483496189117, |
| "memory(GiB)": 113.5, |
| "reward": 0.2916666708886623, |
| "reward_std": 0.37593023777008056, |
| "rewards/MultiModalAccuracyORM/mean": 0.2916666708886623, |
| "rewards/MultiModalAccuracyORM/std": 0.37593023777008056, |
| "step": 2165, |
| "train_speed(iter/s)": 0.03189 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 609.1, |
| "completions/mean_length": 376.5666778564453, |
| "completions/min_length": 215.1, |
| "epoch": 0.8767676767676768, |
| "grad_norm": 1.7513379048306494, |
| "kl": 0.01739501953125, |
| "learning_rate": 2e-07, |
| "loss": -0.001603315770626068, |
| "memory(GiB)": 113.5, |
| "reward": 0.35833333656191824, |
| "reward_std": 0.27927026748657224, |
| "rewards/MultiModalAccuracyORM/mean": 0.35833333656191824, |
| "rewards/MultiModalAccuracyORM/std": 0.27927026748657224, |
| "step": 2170, |
| "train_speed(iter/s)": 0.031901 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 529.4, |
| "completions/mean_length": 326.4833419799805, |
| "completions/min_length": 178.7, |
| "epoch": 0.8787878787878788, |
| "grad_norm": 1.8257626566364757, |
| "kl": 0.01832275390625, |
| "learning_rate": 2e-07, |
| "loss": -0.0064360305666923525, |
| "memory(GiB)": 113.5, |
| "reward": 0.4000000096857548, |
| "reward_std": 0.2528681933879852, |
| "rewards/MultiModalAccuracyORM/mean": 0.4000000096857548, |
| "rewards/MultiModalAccuracyORM/std": 0.2528681933879852, |
| "step": 2175, |
| "train_speed(iter/s)": 0.031922 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 654.1, |
| "completions/mean_length": 443.8500183105469, |
| "completions/min_length": 252.4, |
| "epoch": 0.8808080808080808, |
| "grad_norm": 2.0080312898238777, |
| "kl": 0.0168212890625, |
| "learning_rate": 2e-07, |
| "loss": 0.003071814775466919, |
| "memory(GiB)": 113.5, |
| "reward": 0.25000001341104505, |
| "reward_std": 0.27749558687210085, |
| "rewards/MultiModalAccuracyORM/mean": 0.25000001341104505, |
| "rewards/MultiModalAccuracyORM/std": 0.27749558687210085, |
| "step": 2180, |
| "train_speed(iter/s)": 0.031939 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 546.9, |
| "completions/mean_length": 286.4000068664551, |
| "completions/min_length": 137.1, |
| "epoch": 0.8828282828282829, |
| "grad_norm": 2.6881230452510176, |
| "kl": 0.0223968505859375, |
| "learning_rate": 2e-07, |
| "loss": 0.0006526708602905273, |
| "memory(GiB)": 113.5, |
| "reward": 0.5333333387970924, |
| "reward_std": 0.20369119048118592, |
| "rewards/MultiModalAccuracyORM/mean": 0.5333333387970924, |
| "rewards/MultiModalAccuracyORM/std": 0.20369119048118592, |
| "step": 2185, |
| "train_speed(iter/s)": 0.031947 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 610.5, |
| "completions/mean_length": 402.3166793823242, |
| "completions/min_length": 210.6, |
| "epoch": 0.8848484848484849, |
| "grad_norm": 2.1762586962994126, |
| "kl": 0.02080078125, |
| "learning_rate": 2e-07, |
| "loss": 0.01941031664609909, |
| "memory(GiB)": 113.5, |
| "reward": 0.13333333805203437, |
| "reward_std": 0.28399197161197665, |
| "rewards/MultiModalAccuracyORM/mean": 0.13333333805203437, |
| "rewards/MultiModalAccuracyORM/std": 0.28399197161197665, |
| "step": 2190, |
| "train_speed(iter/s)": 0.031947 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 578.7, |
| "completions/mean_length": 376.9000160217285, |
| "completions/min_length": 233.4, |
| "epoch": 0.8868686868686869, |
| "grad_norm": 1.5099724310943463, |
| "kl": 0.013751220703125, |
| "learning_rate": 2e-07, |
| "loss": 0.018771827220916748, |
| "memory(GiB)": 113.5, |
| "reward": 0.3083333395421505, |
| "reward_std": 0.22406027615070342, |
| "rewards/MultiModalAccuracyORM/mean": 0.3083333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.22406027615070342, |
| "step": 2195, |
| "train_speed(iter/s)": 0.031954 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 564.0, |
| "completions/mean_length": 270.4416732788086, |
| "completions/min_length": 142.9, |
| "epoch": 0.8888888888888888, |
| "grad_norm": 2.2743682671690997, |
| "kl": 0.023577880859375, |
| "learning_rate": 2e-07, |
| "loss": 0.025069376826286315, |
| "memory(GiB)": 113.5, |
| "reward": 0.41666666939854624, |
| "reward_std": 0.34936913251876833, |
| "rewards/MultiModalAccuracyORM/mean": 0.41666666939854624, |
| "rewards/MultiModalAccuracyORM/std": 0.34936913251876833, |
| "step": 2200, |
| "train_speed(iter/s)": 0.031957 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 508.7, |
| "completions/mean_length": 299.5250076293945, |
| "completions/min_length": 173.3, |
| "epoch": 0.8909090909090909, |
| "grad_norm": 1.7147767163429606, |
| "kl": 0.015533447265625, |
| "learning_rate": 2e-07, |
| "loss": -0.01650981158018112, |
| "memory(GiB)": 113.5, |
| "reward": 0.3666666761040688, |
| "reward_std": 0.26142621636390684, |
| "rewards/MultiModalAccuracyORM/mean": 0.3666666761040688, |
| "rewards/MultiModalAccuracyORM/std": 0.26142621636390684, |
| "step": 2205, |
| "train_speed(iter/s)": 0.031967 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 678.7, |
| "completions/mean_length": 303.3583396911621, |
| "completions/min_length": 148.9, |
| "epoch": 0.8929292929292929, |
| "grad_norm": 0.07977564640032515, |
| "kl": 0.022882080078125, |
| "learning_rate": 2e-07, |
| "loss": -0.015148724615573882, |
| "memory(GiB)": 113.5, |
| "reward": 0.2750000052154064, |
| "reward_std": 0.2333131343126297, |
| "rewards/MultiModalAccuracyORM/mean": 0.2750000052154064, |
| "rewards/MultiModalAccuracyORM/std": 0.2333131343126297, |
| "step": 2210, |
| "train_speed(iter/s)": 0.031965 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 739.5, |
| "completions/mean_length": 388.18333740234374, |
| "completions/min_length": 198.5, |
| "epoch": 0.8949494949494949, |
| "grad_norm": 0.8099900753838608, |
| "kl": 0.0284912109375, |
| "learning_rate": 2e-07, |
| "loss": 0.00753181129693985, |
| "memory(GiB)": 113.5, |
| "reward": 0.416666679084301, |
| "reward_std": 0.34156554043292997, |
| "rewards/MultiModalAccuracyORM/mean": 0.416666679084301, |
| "rewards/MultiModalAccuracyORM/std": 0.34156554043292997, |
| "step": 2215, |
| "train_speed(iter/s)": 0.03197 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.025, |
| "completions/max_length": 779.3, |
| "completions/mean_length": 450.6333526611328, |
| "completions/min_length": 239.0, |
| "epoch": 0.896969696969697, |
| "grad_norm": 1.7310208765669708, |
| "kl": 0.0198486328125, |
| "learning_rate": 2e-07, |
| "loss": -0.004081086814403534, |
| "memory(GiB)": 113.5, |
| "reward": 0.2500000074505806, |
| "reward_std": 0.3800142765045166, |
| "rewards/MultiModalAccuracyORM/mean": 0.2500000074505806, |
| "rewards/MultiModalAccuracyORM/std": 0.3800142765045166, |
| "step": 2220, |
| "train_speed(iter/s)": 0.03197 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 676.4, |
| "completions/mean_length": 360.75001220703126, |
| "completions/min_length": 217.2, |
| "epoch": 0.898989898989899, |
| "grad_norm": 2.1020973545612702, |
| "kl": 0.01806640625, |
| "learning_rate": 2e-07, |
| "loss": 0.03712728023529053, |
| "memory(GiB)": 113.5, |
| "reward": 0.3000000134110451, |
| "reward_std": 0.32673218548297883, |
| "rewards/MultiModalAccuracyORM/mean": 0.3000000134110451, |
| "rewards/MultiModalAccuracyORM/std": 0.32673218548297883, |
| "step": 2225, |
| "train_speed(iter/s)": 0.031975 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 820.2, |
| "completions/mean_length": 446.9583396911621, |
| "completions/min_length": 246.3, |
| "epoch": 0.901010101010101, |
| "grad_norm": 1.2524422904219505, |
| "kl": 0.016302490234375, |
| "learning_rate": 2e-07, |
| "loss": -0.02771589457988739, |
| "memory(GiB)": 113.5, |
| "reward": 0.32500000596046447, |
| "reward_std": 0.31088480055332185, |
| "rewards/MultiModalAccuracyORM/mean": 0.32500000596046447, |
| "rewards/MultiModalAccuracyORM/std": 0.31088480055332185, |
| "step": 2230, |
| "train_speed(iter/s)": 0.031963 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 511.1, |
| "completions/mean_length": 314.425008392334, |
| "completions/min_length": 211.4, |
| "epoch": 0.9030303030303031, |
| "grad_norm": 1.941568088508899, |
| "kl": 0.0159149169921875, |
| "learning_rate": 2e-07, |
| "loss": 0.03777821063995361, |
| "memory(GiB)": 113.5, |
| "reward": 0.450000011920929, |
| "reward_std": 0.391499400138855, |
| "rewards/MultiModalAccuracyORM/mean": 0.450000011920929, |
| "rewards/MultiModalAccuracyORM/std": 0.391499400138855, |
| "step": 2235, |
| "train_speed(iter/s)": 0.031975 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 520.2, |
| "completions/mean_length": 344.6333435058594, |
| "completions/min_length": 198.5, |
| "epoch": 0.9050505050505051, |
| "grad_norm": 2.0763848655087673, |
| "kl": 0.019061279296875, |
| "learning_rate": 2e-07, |
| "loss": -0.0011584073305130004, |
| "memory(GiB)": 113.5, |
| "reward": 0.40000000670552255, |
| "reward_std": 0.34407602846622465, |
| "rewards/MultiModalAccuracyORM/mean": 0.40000000670552255, |
| "rewards/MultiModalAccuracyORM/std": 0.34407602846622465, |
| "step": 2240, |
| "train_speed(iter/s)": 0.031993 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 573.4, |
| "completions/mean_length": 319.63334197998046, |
| "completions/min_length": 160.3, |
| "epoch": 0.907070707070707, |
| "grad_norm": 2.59722388457303, |
| "kl": 0.022515869140625, |
| "learning_rate": 2e-07, |
| "loss": -0.017506715655326844, |
| "memory(GiB)": 113.5, |
| "reward": 0.27500001043081285, |
| "reward_std": 0.3227818846702576, |
| "rewards/MultiModalAccuracyORM/mean": 0.27500001043081285, |
| "rewards/MultiModalAccuracyORM/std": 0.3227818846702576, |
| "step": 2245, |
| "train_speed(iter/s)": 0.031998 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 1.3781323461012882, |
| "learning_rate": 2e-07, |
| "loss": 0.01341366171836853, |
| "memory(GiB)": 113.5, |
| "step": 2250, |
| "train_speed(iter/s)": 0.032003 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "eval_clip_ratio": 0.0, |
| "eval_completions/clipped_ratio": 0.0016666666666666666, |
| "eval_completions/max_length": 642.72, |
| "eval_completions/mean_length": 376.58501220703124, |
| "eval_completions/min_length": 201.48, |
| "eval_kl": 0.01755615234375, |
| "eval_loss": 0.022878510877490044, |
| "eval_reward": 0.3366666728258133, |
| "eval_reward_std": 0.29963068544864657, |
| "eval_rewards/MultiModalAccuracyORM/mean": 0.3366666728258133, |
| "eval_rewards/MultiModalAccuracyORM/std": 0.29963068544864657, |
| "eval_runtime": 620.6156, |
| "eval_samples_per_second": 0.081, |
| "eval_steps_per_second": 0.008, |
| "step": 2250 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 717.55, |
| "completions/mean_length": 408.7458442687988, |
| "completions/min_length": 214.2, |
| "epoch": 0.9111111111111111, |
| "grad_norm": 0.0911724129495613, |
| "kl": 0.01767578125, |
| "learning_rate": 2e-07, |
| "loss": 0.05687015056610108, |
| "memory(GiB)": 113.5, |
| "reward": 0.3166666738688946, |
| "reward_std": 0.32789033353328706, |
| "rewards/MultiModalAccuracyORM/mean": 0.3166666738688946, |
| "rewards/MultiModalAccuracyORM/std": 0.32789033353328706, |
| "step": 2255, |
| "train_speed(iter/s)": 0.031634 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 646.3, |
| "completions/mean_length": 310.35000762939455, |
| "completions/min_length": 147.8, |
| "epoch": 0.9131313131313131, |
| "grad_norm": 2.3215328543725837, |
| "kl": 0.02152099609375, |
| "learning_rate": 2e-07, |
| "loss": -0.02131924331188202, |
| "memory(GiB)": 113.5, |
| "reward": 0.3750000037252903, |
| "reward_std": 0.2659719169139862, |
| "rewards/MultiModalAccuracyORM/mean": 0.3750000037252903, |
| "rewards/MultiModalAccuracyORM/std": 0.2659719169139862, |
| "step": 2260, |
| "train_speed(iter/s)": 0.03164 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 628.3, |
| "completions/mean_length": 371.5416793823242, |
| "completions/min_length": 220.7, |
| "epoch": 0.9151515151515152, |
| "grad_norm": 2.126621344773754, |
| "kl": 0.018896484375, |
| "learning_rate": 2e-07, |
| "loss": 0.024756547808647156, |
| "memory(GiB)": 113.5, |
| "reward": 0.2750000052154064, |
| "reward_std": 0.2619264245033264, |
| "rewards/MultiModalAccuracyORM/mean": 0.2750000052154064, |
| "rewards/MultiModalAccuracyORM/std": 0.2619264245033264, |
| "step": 2265, |
| "train_speed(iter/s)": 0.031637 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 638.4, |
| "completions/mean_length": 415.3916778564453, |
| "completions/min_length": 254.9, |
| "epoch": 0.9171717171717172, |
| "grad_norm": 2.9790243495572137, |
| "kl": 0.0215087890625, |
| "learning_rate": 2e-07, |
| "loss": -0.012356171011924743, |
| "memory(GiB)": 113.5, |
| "reward": 0.1666666679084301, |
| "reward_std": 0.27520077526569364, |
| "rewards/MultiModalAccuracyORM/mean": 0.1666666679084301, |
| "rewards/MultiModalAccuracyORM/std": 0.27520077526569364, |
| "step": 2270, |
| "train_speed(iter/s)": 0.031641 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 771.9, |
| "completions/mean_length": 425.84167633056643, |
| "completions/min_length": 242.2, |
| "epoch": 0.9191919191919192, |
| "grad_norm": 1.0861715717638791, |
| "kl": 0.0269012451171875, |
| "learning_rate": 2e-07, |
| "loss": 0.010645134747028351, |
| "memory(GiB)": 113.5, |
| "reward": 0.2583333425223827, |
| "reward_std": 0.30260742604732516, |
| "rewards/MultiModalAccuracyORM/mean": 0.2583333425223827, |
| "rewards/MultiModalAccuracyORM/std": 0.30260742604732516, |
| "step": 2275, |
| "train_speed(iter/s)": 0.031636 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 393.9, |
| "completions/mean_length": 271.05000762939454, |
| "completions/min_length": 149.0, |
| "epoch": 0.9212121212121213, |
| "grad_norm": 0.05924941442962051, |
| "kl": 0.0225830078125, |
| "learning_rate": 2e-07, |
| "loss": 0.031935521960258485, |
| "memory(GiB)": 113.5, |
| "reward": 0.28333333805203437, |
| "reward_std": 0.304396653175354, |
| "rewards/MultiModalAccuracyORM/mean": 0.28333333805203437, |
| "rewards/MultiModalAccuracyORM/std": 0.304396653175354, |
| "step": 2280, |
| "train_speed(iter/s)": 0.031652 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 711.7, |
| "completions/mean_length": 391.0416763305664, |
| "completions/min_length": 203.9, |
| "epoch": 0.9232323232323232, |
| "grad_norm": 2.8587723324821566, |
| "kl": 0.024896240234375, |
| "learning_rate": 2e-07, |
| "loss": 0.017455708980560303, |
| "memory(GiB)": 113.5, |
| "reward": 0.33333333805203436, |
| "reward_std": 0.29177859127521516, |
| "rewards/MultiModalAccuracyORM/mean": 0.33333333805203436, |
| "rewards/MultiModalAccuracyORM/std": 0.29177859127521516, |
| "step": 2285, |
| "train_speed(iter/s)": 0.031656 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 544.5, |
| "completions/mean_length": 356.05834197998047, |
| "completions/min_length": 179.4, |
| "epoch": 0.9252525252525252, |
| "grad_norm": 3.219718675307709, |
| "kl": 0.0145965576171875, |
| "learning_rate": 2e-07, |
| "loss": -0.032944440841674805, |
| "memory(GiB)": 113.5, |
| "reward": 0.3000000067055225, |
| "reward_std": 0.2840515673160553, |
| "rewards/MultiModalAccuracyORM/mean": 0.3000000067055225, |
| "rewards/MultiModalAccuracyORM/std": 0.2840515673160553, |
| "step": 2290, |
| "train_speed(iter/s)": 0.031663 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 663.9, |
| "completions/mean_length": 448.50000762939453, |
| "completions/min_length": 247.4, |
| "epoch": 0.9272727272727272, |
| "grad_norm": 0.8757317301258869, |
| "kl": 0.0155517578125, |
| "learning_rate": 2e-07, |
| "loss": -0.008566761016845703, |
| "memory(GiB)": 113.5, |
| "reward": 0.2833333432674408, |
| "reward_std": 0.27596975266933443, |
| "rewards/MultiModalAccuracyORM/mean": 0.2833333432674408, |
| "rewards/MultiModalAccuracyORM/std": 0.27596975266933443, |
| "step": 2295, |
| "train_speed(iter/s)": 0.031671 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 786.8, |
| "completions/mean_length": 507.6833526611328, |
| "completions/min_length": 320.1, |
| "epoch": 0.9292929292929293, |
| "grad_norm": 0.9362166291898165, |
| "kl": 0.02156982421875, |
| "learning_rate": 2e-07, |
| "loss": 0.018462255597114563, |
| "memory(GiB)": 113.5, |
| "reward": 0.3083333373069763, |
| "reward_std": 0.2464074045419693, |
| "rewards/MultiModalAccuracyORM/mean": 0.3083333373069763, |
| "rewards/MultiModalAccuracyORM/std": 0.2464074045419693, |
| "step": 2300, |
| "train_speed(iter/s)": 0.03166 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 479.9, |
| "completions/mean_length": 257.65000381469724, |
| "completions/min_length": 128.1, |
| "epoch": 0.9313131313131313, |
| "grad_norm": 2.658818675138077, |
| "kl": 0.029730224609375, |
| "learning_rate": 2e-07, |
| "loss": 0.023678554594516753, |
| "memory(GiB)": 113.5, |
| "reward": 0.30000000521540643, |
| "reward_std": 0.15821026563644408, |
| "rewards/MultiModalAccuracyORM/mean": 0.30000000521540643, |
| "rewards/MultiModalAccuracyORM/std": 0.15821026563644408, |
| "step": 2305, |
| "train_speed(iter/s)": 0.031671 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 800.1, |
| "completions/mean_length": 461.5750137329102, |
| "completions/min_length": 264.1, |
| "epoch": 0.9333333333333333, |
| "grad_norm": 1.525329758838897, |
| "kl": 0.0238433837890625, |
| "learning_rate": 2e-07, |
| "loss": 0.016385090351104737, |
| "memory(GiB)": 113.5, |
| "reward": 0.1666666693985462, |
| "reward_std": 0.3190022110939026, |
| "rewards/MultiModalAccuracyORM/mean": 0.1666666693985462, |
| "rewards/MultiModalAccuracyORM/std": 0.3190022110939026, |
| "step": 2310, |
| "train_speed(iter/s)": 0.031667 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 619.0, |
| "completions/mean_length": 360.6000061035156, |
| "completions/min_length": 211.2, |
| "epoch": 0.9353535353535354, |
| "grad_norm": 2.555254446139955, |
| "kl": 0.01925048828125, |
| "learning_rate": 2e-07, |
| "loss": -0.025304621458053587, |
| "memory(GiB)": 113.5, |
| "reward": 0.24166667088866234, |
| "reward_std": 0.309637188911438, |
| "rewards/MultiModalAccuracyORM/mean": 0.24166667088866234, |
| "rewards/MultiModalAccuracyORM/std": 0.309637188911438, |
| "step": 2315, |
| "train_speed(iter/s)": 0.031675 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 662.0, |
| "completions/mean_length": 406.8916778564453, |
| "completions/min_length": 230.6, |
| "epoch": 0.9373737373737374, |
| "grad_norm": 1.8305198392785023, |
| "kl": 0.0230133056640625, |
| "learning_rate": 2e-07, |
| "loss": -0.014680406451225281, |
| "memory(GiB)": 113.5, |
| "reward": 0.3500000037252903, |
| "reward_std": 0.3111986190080643, |
| "rewards/MultiModalAccuracyORM/mean": 0.3500000037252903, |
| "rewards/MultiModalAccuracyORM/std": 0.3111986190080643, |
| "step": 2320, |
| "train_speed(iter/s)": 0.031676 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 467.8, |
| "completions/mean_length": 282.15833892822263, |
| "completions/min_length": 162.0, |
| "epoch": 0.9393939393939394, |
| "grad_norm": 2.852841229555507, |
| "kl": 0.02640380859375, |
| "learning_rate": 2e-07, |
| "loss": 0.0326883852481842, |
| "memory(GiB)": 113.5, |
| "reward": 0.3083333432674408, |
| "reward_std": 0.4167425513267517, |
| "rewards/MultiModalAccuracyORM/mean": 0.3083333432674408, |
| "rewards/MultiModalAccuracyORM/std": 0.4167425513267517, |
| "step": 2325, |
| "train_speed(iter/s)": 0.031687 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 790.1, |
| "completions/mean_length": 384.29168395996095, |
| "completions/min_length": 177.1, |
| "epoch": 0.9414141414141414, |
| "grad_norm": 2.0178414254144723, |
| "kl": 0.018560791015625, |
| "learning_rate": 2e-07, |
| "loss": 0.008831435441970825, |
| "memory(GiB)": 113.5, |
| "reward": 0.38333334028720856, |
| "reward_std": 0.36893364489078523, |
| "rewards/MultiModalAccuracyORM/mean": 0.38333334028720856, |
| "rewards/MultiModalAccuracyORM/std": 0.36893364489078523, |
| "step": 2330, |
| "train_speed(iter/s)": 0.031685 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 679.0, |
| "completions/mean_length": 368.05834503173827, |
| "completions/min_length": 206.5, |
| "epoch": 0.9434343434343434, |
| "grad_norm": 1.5228784773962754, |
| "kl": 0.015521240234375, |
| "learning_rate": 2e-07, |
| "loss": -0.008360534906387329, |
| "memory(GiB)": 113.5, |
| "reward": 0.3083333373069763, |
| "reward_std": 0.3352662086486816, |
| "rewards/MultiModalAccuracyORM/mean": 0.3083333373069763, |
| "rewards/MultiModalAccuracyORM/std": 0.3352662086486816, |
| "step": 2335, |
| "train_speed(iter/s)": 0.031686 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 1030.2, |
| "completions/mean_length": 481.2666900634766, |
| "completions/min_length": 237.4, |
| "epoch": 0.9454545454545454, |
| "grad_norm": 1.418697346446445, |
| "kl": 0.0329925537109375, |
| "learning_rate": 2e-07, |
| "loss": 0.0726934552192688, |
| "memory(GiB)": 113.5, |
| "reward": 0.2833333395421505, |
| "reward_std": 0.3713845372200012, |
| "rewards/MultiModalAccuracyORM/mean": 0.2833333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.3713845372200012, |
| "step": 2340, |
| "train_speed(iter/s)": 0.031672 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 668.9, |
| "completions/mean_length": 321.70001220703125, |
| "completions/min_length": 150.2, |
| "epoch": 0.9474747474747475, |
| "grad_norm": 2.0538342098414333, |
| "kl": 0.03148193359375, |
| "learning_rate": 2e-07, |
| "loss": 0.035471782088279724, |
| "memory(GiB)": 113.5, |
| "reward": 0.29166667610406877, |
| "reward_std": 0.1973894327878952, |
| "rewards/MultiModalAccuracyORM/mean": 0.29166667610406877, |
| "rewards/MultiModalAccuracyORM/std": 0.1973894327878952, |
| "step": 2345, |
| "train_speed(iter/s)": 0.031674 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 554.4, |
| "completions/mean_length": 358.76668243408204, |
| "completions/min_length": 218.4, |
| "epoch": 0.9494949494949495, |
| "grad_norm": 2.6339218970926903, |
| "kl": 0.0245635986328125, |
| "learning_rate": 2e-07, |
| "loss": 0.004336267709732056, |
| "memory(GiB)": 113.5, |
| "reward": 0.36666667833924294, |
| "reward_std": 0.32297651171684266, |
| "rewards/MultiModalAccuracyORM/mean": 0.36666667833924294, |
| "rewards/MultiModalAccuracyORM/std": 0.32297651171684266, |
| "step": 2350, |
| "train_speed(iter/s)": 0.031691 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 460.4, |
| "completions/mean_length": 282.98334274291994, |
| "completions/min_length": 158.3, |
| "epoch": 0.9515151515151515, |
| "grad_norm": 2.0291656458591145, |
| "kl": 0.020587158203125, |
| "learning_rate": 2e-07, |
| "loss": -0.05831232666969299, |
| "memory(GiB)": 113.5, |
| "reward": 0.5000000081956386, |
| "reward_std": 0.3330099433660507, |
| "rewards/MultiModalAccuracyORM/mean": 0.5000000081956386, |
| "rewards/MultiModalAccuracyORM/std": 0.3330099433660507, |
| "step": 2355, |
| "train_speed(iter/s)": 0.031702 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 455.3, |
| "completions/mean_length": 282.77500610351564, |
| "completions/min_length": 139.0, |
| "epoch": 0.9535353535353536, |
| "grad_norm": 0.11573786538748869, |
| "kl": 0.0304229736328125, |
| "learning_rate": 2e-07, |
| "loss": 0.03489102721214295, |
| "memory(GiB)": 113.5, |
| "reward": 0.24166666939854622, |
| "reward_std": 0.2355453997850418, |
| "rewards/MultiModalAccuracyORM/mean": 0.24166666939854622, |
| "rewards/MultiModalAccuracyORM/std": 0.2355453997850418, |
| "step": 2360, |
| "train_speed(iter/s)": 0.031718 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 628.4, |
| "completions/mean_length": 412.06668243408205, |
| "completions/min_length": 245.3, |
| "epoch": 0.9555555555555556, |
| "grad_norm": 2.17622948866824, |
| "kl": 0.019061279296875, |
| "learning_rate": 2e-07, |
| "loss": 0.005562397837638855, |
| "memory(GiB)": 113.5, |
| "reward": 0.4250000111758709, |
| "reward_std": 0.45383972525596616, |
| "rewards/MultiModalAccuracyORM/mean": 0.4250000111758709, |
| "rewards/MultiModalAccuracyORM/std": 0.45383972525596616, |
| "step": 2365, |
| "train_speed(iter/s)": 0.031722 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 605.1, |
| "completions/mean_length": 371.3166793823242, |
| "completions/min_length": 208.3, |
| "epoch": 0.9575757575757575, |
| "grad_norm": 2.3917395292059282, |
| "kl": 0.031591796875, |
| "learning_rate": 2e-07, |
| "loss": 0.00018071085214614867, |
| "memory(GiB)": 113.5, |
| "reward": 0.291666679084301, |
| "reward_std": 0.26498726308345794, |
| "rewards/MultiModalAccuracyORM/mean": 0.291666679084301, |
| "rewards/MultiModalAccuracyORM/std": 0.26498726308345794, |
| "step": 2370, |
| "train_speed(iter/s)": 0.031735 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 455.6, |
| "completions/mean_length": 301.4333435058594, |
| "completions/min_length": 178.5, |
| "epoch": 0.9595959595959596, |
| "grad_norm": 3.5970167327213822, |
| "kl": 0.02054443359375, |
| "learning_rate": 2e-07, |
| "loss": 0.01565767079591751, |
| "memory(GiB)": 113.5, |
| "reward": 0.44166667833924295, |
| "reward_std": 0.26897316575050356, |
| "rewards/MultiModalAccuracyORM/mean": 0.44166667833924295, |
| "rewards/MultiModalAccuracyORM/std": 0.26897316575050356, |
| "step": 2375, |
| "train_speed(iter/s)": 0.031751 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 584.8, |
| "completions/mean_length": 327.98334350585935, |
| "completions/min_length": 184.6, |
| "epoch": 0.9616161616161616, |
| "grad_norm": 2.197976826013823, |
| "kl": 0.021331787109375, |
| "learning_rate": 2e-07, |
| "loss": 0.005569913983345031, |
| "memory(GiB)": 113.5, |
| "reward": 0.43333334028720855, |
| "reward_std": 0.3840597689151764, |
| "rewards/MultiModalAccuracyORM/mean": 0.43333334028720855, |
| "rewards/MultiModalAccuracyORM/std": 0.3840597689151764, |
| "step": 2380, |
| "train_speed(iter/s)": 0.031761 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.016666666666666666, |
| "completions/max_length": 708.0, |
| "completions/mean_length": 293.06667556762693, |
| "completions/min_length": 155.5, |
| "epoch": 0.9636363636363636, |
| "grad_norm": 2.126614857423257, |
| "kl": 0.0335205078125, |
| "learning_rate": 2e-07, |
| "loss": 0.0018027305603027343, |
| "memory(GiB)": 113.5, |
| "reward": 0.416666679084301, |
| "reward_std": 0.3855114609003067, |
| "rewards/MultiModalAccuracyORM/mean": 0.416666679084301, |
| "rewards/MultiModalAccuracyORM/std": 0.3855114609003067, |
| "step": 2385, |
| "train_speed(iter/s)": 0.031758 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 627.6, |
| "completions/mean_length": 374.27501220703124, |
| "completions/min_length": 226.1, |
| "epoch": 0.9656565656565657, |
| "grad_norm": 2.285791825740683, |
| "kl": 0.03223876953125, |
| "learning_rate": 2e-07, |
| "loss": -0.007699564099311829, |
| "memory(GiB)": 113.5, |
| "reward": 0.14166667088866233, |
| "reward_std": 0.3000969380140305, |
| "rewards/MultiModalAccuracyORM/mean": 0.14166667088866233, |
| "rewards/MultiModalAccuracyORM/std": 0.3000969380140305, |
| "step": 2390, |
| "train_speed(iter/s)": 0.031759 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 601.7, |
| "completions/mean_length": 297.45834197998045, |
| "completions/min_length": 152.0, |
| "epoch": 0.9676767676767677, |
| "grad_norm": 2.926559087104104, |
| "kl": 0.0413818359375, |
| "learning_rate": 2e-07, |
| "loss": 0.04997736811637878, |
| "memory(GiB)": 113.5, |
| "reward": 0.31666667610406873, |
| "reward_std": 0.3687034219503403, |
| "rewards/MultiModalAccuracyORM/mean": 0.31666667610406873, |
| "rewards/MultiModalAccuracyORM/std": 0.3687034219503403, |
| "step": 2395, |
| "train_speed(iter/s)": 0.031761 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 674.1, |
| "completions/mean_length": 422.52500915527344, |
| "completions/min_length": 260.5, |
| "epoch": 0.9696969696969697, |
| "grad_norm": 1.0391142999786047, |
| "kl": 0.02857666015625, |
| "learning_rate": 2e-07, |
| "loss": -0.008375594019889831, |
| "memory(GiB)": 113.5, |
| "reward": 0.45000000670552254, |
| "reward_std": 0.34407602846622465, |
| "rewards/MultiModalAccuracyORM/mean": 0.45000000670552254, |
| "rewards/MultiModalAccuracyORM/std": 0.34407602846622465, |
| "step": 2400, |
| "train_speed(iter/s)": 0.031765 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 726.7, |
| "completions/mean_length": 369.96667938232423, |
| "completions/min_length": 166.7, |
| "epoch": 0.9717171717171718, |
| "grad_norm": 1.9044448293066447, |
| "kl": 0.034771728515625, |
| "learning_rate": 2e-07, |
| "loss": 0.041448038816452024, |
| "memory(GiB)": 113.5, |
| "reward": 0.3666666761040688, |
| "reward_std": 0.3330695390701294, |
| "rewards/MultiModalAccuracyORM/mean": 0.3666666761040688, |
| "rewards/MultiModalAccuracyORM/std": 0.3330695390701294, |
| "step": 2405, |
| "train_speed(iter/s)": 0.031765 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.016666666666666666, |
| "completions/max_length": 860.8, |
| "completions/mean_length": 442.18335113525393, |
| "completions/min_length": 199.9, |
| "epoch": 0.9737373737373738, |
| "grad_norm": 1.1043100849775993, |
| "kl": 0.0294525146484375, |
| "learning_rate": 2e-07, |
| "loss": 0.011988846212625503, |
| "memory(GiB)": 113.5, |
| "reward": 0.3166666731238365, |
| "reward_std": 0.383000972867012, |
| "rewards/MultiModalAccuracyORM/mean": 0.3166666731238365, |
| "rewards/MultiModalAccuracyORM/std": 0.383000972867012, |
| "step": 2410, |
| "train_speed(iter/s)": 0.031763 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03333333333333333, |
| "completions/max_length": 964.9, |
| "completions/mean_length": 423.55834503173827, |
| "completions/min_length": 182.4, |
| "epoch": 0.9757575757575757, |
| "grad_norm": 3.0380086133675968, |
| "kl": 0.037750244140625, |
| "learning_rate": 2e-07, |
| "loss": 0.02129605710506439, |
| "memory(GiB)": 113.5, |
| "reward": 0.5166666716337204, |
| "reward_std": 0.2104335606098175, |
| "rewards/MultiModalAccuracyORM/mean": 0.5166666716337204, |
| "rewards/MultiModalAccuracyORM/std": 0.2104335606098175, |
| "step": 2415, |
| "train_speed(iter/s)": 0.031748 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 493.0, |
| "completions/mean_length": 316.23334274291994, |
| "completions/min_length": 185.4, |
| "epoch": 0.9777777777777777, |
| "grad_norm": 3.286741279330765, |
| "kl": 0.03631591796875, |
| "learning_rate": 2e-07, |
| "loss": 0.01842118501663208, |
| "memory(GiB)": 113.5, |
| "reward": 0.4916666768491268, |
| "reward_std": 0.3266936391592026, |
| "rewards/MultiModalAccuracyORM/mean": 0.4916666768491268, |
| "rewards/MultiModalAccuracyORM/std": 0.3266936391592026, |
| "step": 2420, |
| "train_speed(iter/s)": 0.031765 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 624.4, |
| "completions/mean_length": 382.6333435058594, |
| "completions/min_length": 208.4, |
| "epoch": 0.9797979797979798, |
| "grad_norm": 3.1686862418479125, |
| "kl": 0.05205078125, |
| "learning_rate": 2e-07, |
| "loss": 0.011619596928358077, |
| "memory(GiB)": 113.5, |
| "reward": 0.3166666731238365, |
| "reward_std": 0.32526837289333344, |
| "rewards/MultiModalAccuracyORM/mean": 0.3166666731238365, |
| "rewards/MultiModalAccuracyORM/std": 0.32526837289333344, |
| "step": 2425, |
| "train_speed(iter/s)": 0.031766 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 588.9, |
| "completions/mean_length": 343.9250144958496, |
| "completions/min_length": 189.7, |
| "epoch": 0.9818181818181818, |
| "grad_norm": 1.5585214145494302, |
| "kl": 0.034375, |
| "learning_rate": 2e-07, |
| "loss": 0.0014587238430976868, |
| "memory(GiB)": 113.5, |
| "reward": 0.17500000596046447, |
| "reward_std": 0.3244759202003479, |
| "rewards/MultiModalAccuracyORM/mean": 0.17500000596046447, |
| "rewards/MultiModalAccuracyORM/std": 0.3244759202003479, |
| "step": 2430, |
| "train_speed(iter/s)": 0.031779 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 608.2, |
| "completions/mean_length": 378.7916778564453, |
| "completions/min_length": 209.2, |
| "epoch": 0.9838383838383838, |
| "grad_norm": 2.73232643290958, |
| "kl": 0.04532470703125, |
| "learning_rate": 2e-07, |
| "loss": 0.062485653162002566, |
| "memory(GiB)": 113.5, |
| "reward": 0.4666666828095913, |
| "reward_std": 0.4470617562532425, |
| "rewards/MultiModalAccuracyORM/mean": 0.4666666828095913, |
| "rewards/MultiModalAccuracyORM/std": 0.4470617562532425, |
| "step": 2435, |
| "train_speed(iter/s)": 0.031786 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 738.5, |
| "completions/mean_length": 341.6166717529297, |
| "completions/min_length": 163.9, |
| "epoch": 0.9858585858585859, |
| "grad_norm": 1.3853546154126857, |
| "kl": 0.0336669921875, |
| "learning_rate": 2e-07, |
| "loss": -0.028276541829109193, |
| "memory(GiB)": 113.5, |
| "reward": 0.14166667088866233, |
| "reward_std": 0.3000969380140305, |
| "rewards/MultiModalAccuracyORM/mean": 0.14166667088866233, |
| "rewards/MultiModalAccuracyORM/std": 0.3000969380140305, |
| "step": 2440, |
| "train_speed(iter/s)": 0.03179 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03333333333333333, |
| "completions/max_length": 914.9, |
| "completions/mean_length": 449.3500152587891, |
| "completions/min_length": 241.3, |
| "epoch": 0.9878787878787879, |
| "grad_norm": 2.2456582209413893, |
| "kl": 0.03409423828125, |
| "learning_rate": 2e-07, |
| "loss": -0.0006516605615615844, |
| "memory(GiB)": 113.5, |
| "reward": 0.2083333358168602, |
| "reward_std": 0.32050161957740786, |
| "rewards/MultiModalAccuracyORM/mean": 0.2083333358168602, |
| "rewards/MultiModalAccuracyORM/std": 0.32050161957740786, |
| "step": 2445, |
| "train_speed(iter/s)": 0.031782 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.008333333333333333, |
| "completions/max_length": 626.9, |
| "completions/mean_length": 322.7333419799805, |
| "completions/min_length": 203.0, |
| "epoch": 0.98989898989899, |
| "grad_norm": 2.6524648003013103, |
| "kl": 0.03165283203125, |
| "learning_rate": 2e-07, |
| "loss": -0.008733100444078445, |
| "memory(GiB)": 113.5, |
| "reward": 0.6416666708886624, |
| "reward_std": 0.15824586153030396, |
| "rewards/MultiModalAccuracyORM/mean": 0.6416666708886624, |
| "rewards/MultiModalAccuracyORM/std": 0.15824586153030396, |
| "step": 2450, |
| "train_speed(iter/s)": 0.031782 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.016666666666666666, |
| "completions/max_length": 881.4, |
| "completions/mean_length": 330.15001068115237, |
| "completions/min_length": 158.1, |
| "epoch": 0.9919191919191919, |
| "grad_norm": 1.7910216600269697, |
| "kl": 0.05638427734375, |
| "learning_rate": 2e-07, |
| "loss": -0.0065705299377441405, |
| "memory(GiB)": 113.5, |
| "reward": 0.2333333395421505, |
| "reward_std": 0.2815410792827606, |
| "rewards/MultiModalAccuracyORM/mean": 0.2333333395421505, |
| "rewards/MultiModalAccuracyORM/std": 0.2815410792827606, |
| "step": 2455, |
| "train_speed(iter/s)": 0.031777 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.016666666666666666, |
| "completions/max_length": 851.2, |
| "completions/mean_length": 378.46667633056643, |
| "completions/min_length": 185.4, |
| "epoch": 0.9939393939393939, |
| "grad_norm": 2.5045509391063856, |
| "kl": 0.04505615234375, |
| "learning_rate": 2e-07, |
| "loss": -0.008945465087890625, |
| "memory(GiB)": 113.5, |
| "reward": 0.40833334252238274, |
| "reward_std": 0.3794672876596451, |
| "rewards/MultiModalAccuracyORM/mean": 0.40833334252238274, |
| "rewards/MultiModalAccuracyORM/std": 0.3794672876596451, |
| "step": 2460, |
| "train_speed(iter/s)": 0.031779 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03333333333333333, |
| "completions/max_length": 879.8, |
| "completions/mean_length": 371.80834045410154, |
| "completions/min_length": 158.4, |
| "epoch": 0.9959595959595959, |
| "grad_norm": 3.572517250532036, |
| "kl": 0.051068115234375, |
| "learning_rate": 2e-07, |
| "loss": -0.013737475872039795, |
| "memory(GiB)": 113.5, |
| "reward": 0.31666667088866235, |
| "reward_std": 0.29408499896526336, |
| "rewards/MultiModalAccuracyORM/mean": 0.31666667088866235, |
| "rewards/MultiModalAccuracyORM/std": 0.29408499896526336, |
| "step": 2465, |
| "train_speed(iter/s)": 0.031766 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03333333333333333, |
| "completions/max_length": 1149.5, |
| "completions/mean_length": 445.8500122070312, |
| "completions/min_length": 184.4, |
| "epoch": 0.997979797979798, |
| "grad_norm": 2.2904897334575254, |
| "kl": 0.04656982421875, |
| "learning_rate": 2e-07, |
| "loss": -0.032226094603538515, |
| "memory(GiB)": 113.5, |
| "reward": 0.3416666775941849, |
| "reward_std": 0.4094175934791565, |
| "rewards/MultiModalAccuracyORM/mean": 0.3416666775941849, |
| "rewards/MultiModalAccuracyORM/std": 0.4094175934791565, |
| "step": 2470, |
| "train_speed(iter/s)": 0.031754 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.4731764005283963, |
| "learning_rate": 2e-07, |
| "loss": 0.061235594749450686, |
| "memory(GiB)": 113.5, |
| "step": 2475, |
| "train_speed(iter/s)": 0.031746 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_clip_ratio": 0.0, |
| "eval_completions/clipped_ratio": 0.018333333333333333, |
| "eval_completions/max_length": 787.14, |
| "eval_completions/mean_length": 378.51834548950194, |
| "eval_completions/min_length": 186.72, |
| "eval_kl": 0.040185546875, |
| "eval_loss": 0.029814261943101883, |
| "eval_reward": 0.3483333396911621, |
| "eval_reward_std": 0.3004326641559601, |
| "eval_rewards/MultiModalAccuracyORM/mean": 0.3483333396911621, |
| "eval_rewards/MultiModalAccuracyORM/std": 0.3004326641559601, |
| "eval_runtime": 729.694, |
| "eval_samples_per_second": 0.069, |
| "eval_steps_per_second": 0.007, |
| "step": 2475 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 2475, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 250, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 3, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|