| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.008502949460594144, |
| "eval_steps": 500, |
| "global_step": 40, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 216.796875, |
| "epoch": 0.00021257373651485358, |
| "grad_norm": 0.4854881763458252, |
| "kl": 9.614229202270508e-05, |
| "learning_rate": 9.997874149659865e-07, |
| "loss": 0.0, |
| "reward": 2.732285737991333, |
| "reward_std": 0.02619727296405472, |
| "rewards/format_reward_hoi_key": 0.9139583259820938, |
| "rewards/format_reward_hoi_object_label": 0.8222222253680229, |
| "rewards/format_reward_hoi_verb_label": 0.3161458373069763, |
| "rewards/hoi_iou_reward": 0.6799592822790146, |
| "step": 1 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 173.3125, |
| "epoch": 0.00042514747302970716, |
| "grad_norm": 0.6831408739089966, |
| "kl": 1.3329088687896729e-05, |
| "learning_rate": 9.995748299319728e-07, |
| "loss": 0.0, |
| "reward": 2.8274163007736206, |
| "reward_std": 0.03815040903282352, |
| "rewards/format_reward_hoi_key": 0.8166666775941849, |
| "rewards/format_reward_hoi_object_label": 0.7916666567325592, |
| "rewards/format_reward_hoi_verb_label": 0.5974702388048172, |
| "rewards/hoi_iou_reward": 0.6216127127408981, |
| "step": 2 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 166.03125, |
| "epoch": 0.0006377212095445608, |
| "grad_norm": 0.8407193422317505, |
| "kl": 0.00014454126358032227, |
| "learning_rate": 9.99362244897959e-07, |
| "loss": 0.0, |
| "reward": 2.986231029033661, |
| "reward_std": 0.0052611194987548515, |
| "rewards/format_reward_hoi_key": 0.8208333402872086, |
| "rewards/format_reward_hoi_object_label": 0.84375, |
| "rewards/format_reward_hoi_verb_label": 0.6927083432674408, |
| "rewards/hoi_iou_reward": 0.6289393231272697, |
| "step": 3 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 185.703125, |
| "epoch": 0.0008502949460594143, |
| "grad_norm": 2.3168516159057617, |
| "kl": 0.00014531612396240234, |
| "learning_rate": 9.991496598639456e-07, |
| "loss": 0.0, |
| "reward": 2.3956105709075928, |
| "reward_std": 0.045728508091997355, |
| "rewards/format_reward_hoi_key": 0.7395220696926117, |
| "rewards/format_reward_hoi_object_label": 0.59375, |
| "rewards/format_reward_hoi_verb_label": 0.5073784738779068, |
| "rewards/hoi_iou_reward": 0.5549599975347519, |
| "step": 4 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 180.421875, |
| "epoch": 0.001062868682574268, |
| "grad_norm": 0.5881515741348267, |
| "kl": 0.00014841556549072266, |
| "learning_rate": 9.989370748299319e-07, |
| "loss": 0.0, |
| "reward": 2.2462641298770905, |
| "reward_std": 0.14320564700756222, |
| "rewards/format_reward_hoi_key": 0.7350446432828903, |
| "rewards/format_reward_hoi_object_label": 0.4899553433060646, |
| "rewards/format_reward_hoi_verb_label": 0.5563345961272717, |
| "rewards/hoi_iou_reward": 0.46492957696318626, |
| "step": 5 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 208.28125, |
| "epoch": 0.0012754424190891216, |
| "grad_norm": 0.29585161805152893, |
| "kl": 0.0001379847526550293, |
| "learning_rate": 9.987244897959182e-07, |
| "loss": 0.0, |
| "reward": 2.1843446791172028, |
| "reward_std": 0.005820542646688409, |
| "rewards/format_reward_hoi_key": 0.8457291722297668, |
| "rewards/format_reward_hoi_object_label": 0.6000000089406967, |
| "rewards/format_reward_hoi_verb_label": 0.1180555634200573, |
| "rewards/hoi_iou_reward": 0.6205599009990692, |
| "step": 6 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 164.84375, |
| "epoch": 0.0014880161556039752, |
| "grad_norm": 0.5830075144767761, |
| "kl": 0.00010955333709716797, |
| "learning_rate": 9.985119047619047e-07, |
| "loss": -0.0, |
| "reward": 2.5442887246608734, |
| "reward_std": 0.11149050580570474, |
| "rewards/format_reward_hoi_key": 0.7979166656732559, |
| "rewards/format_reward_hoi_object_label": 0.7083333358168602, |
| "rewards/format_reward_hoi_verb_label": 0.4583333358168602, |
| "rewards/hoi_iou_reward": 0.5797053650021553, |
| "step": 7 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 167.234375, |
| "epoch": 0.0017005898921188286, |
| "grad_norm": 0.35756170749664307, |
| "kl": 8.910894393920898e-05, |
| "learning_rate": 9.982993197278912e-07, |
| "loss": 0.0, |
| "reward": 2.5064347982406616, |
| "reward_std": 0.0026310062530683354, |
| "rewards/format_reward_hoi_key": 0.7702381014823914, |
| "rewards/format_reward_hoi_object_label": 0.595362103311345, |
| "rewards/format_reward_hoi_verb_label": 0.5941220238455571, |
| "rewards/hoi_iou_reward": 0.546712551265955, |
| "step": 8 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 188.484375, |
| "epoch": 0.0019131636286336823, |
| "grad_norm": 1.1468232870101929, |
| "kl": 0.00023877620697021484, |
| "learning_rate": 9.980867346938775e-07, |
| "loss": 0.0, |
| "reward": 2.93448406457901, |
| "reward_std": 0.07516021025367081, |
| "rewards/format_reward_hoi_key": 0.90625, |
| "rewards/format_reward_hoi_object_label": 0.79296875, |
| "rewards/format_reward_hoi_verb_label": 0.447916679084301, |
| "rewards/hoi_iou_reward": 0.7873486280441284, |
| "step": 9 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 188.921875, |
| "epoch": 0.002125737365148536, |
| "grad_norm": 0.608834445476532, |
| "kl": 0.0003757476806640625, |
| "learning_rate": 9.97874149659864e-07, |
| "loss": -0.0, |
| "reward": 2.309541165828705, |
| "reward_std": 0.04332686646375805, |
| "rewards/format_reward_hoi_key": 0.7756249904632568, |
| "rewards/format_reward_hoi_object_label": 0.5166666656732559, |
| "rewards/format_reward_hoi_verb_label": 0.46510415710508823, |
| "rewards/hoi_iou_reward": 0.5521453768014908, |
| "step": 10 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 156.34375, |
| "epoch": 0.0023383111016633895, |
| "grad_norm": 1.079801321029663, |
| "kl": 0.0002917051315307617, |
| "learning_rate": 9.976615646258503e-07, |
| "loss": -0.0, |
| "reward": 2.9021179378032684, |
| "reward_std": 0.06573383091017604, |
| "rewards/format_reward_hoi_key": 0.9125000089406967, |
| "rewards/format_reward_hoi_object_label": 0.75, |
| "rewards/format_reward_hoi_verb_label": 0.5, |
| "rewards/hoi_iou_reward": 0.7396180182695389, |
| "step": 11 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 174.46875, |
| "epoch": 0.002550884838178243, |
| "grad_norm": 0.6156663298606873, |
| "kl": 0.0005776882171630859, |
| "learning_rate": 9.974489795918366e-07, |
| "loss": 0.0, |
| "reward": 2.3791774213314056, |
| "reward_std": 0.0850577435339801, |
| "rewards/format_reward_hoi_key": 0.7312500178813934, |
| "rewards/format_reward_hoi_object_label": 0.5208333358168602, |
| "rewards/format_reward_hoi_verb_label": 0.5911458358168602, |
| "rewards/hoi_iou_reward": 0.535948283970356, |
| "step": 12 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 167.578125, |
| "epoch": 0.002763458574693097, |
| "grad_norm": 3.2466673851013184, |
| "kl": 0.0002658367156982422, |
| "learning_rate": 9.972363945578231e-07, |
| "loss": 0.0, |
| "reward": 3.0418315529823303, |
| "reward_std": 0.013055827002972364, |
| "rewards/format_reward_hoi_key": 0.9000000059604645, |
| "rewards/format_reward_hoi_object_label": 0.8125, |
| "rewards/format_reward_hoi_verb_label": 0.625, |
| "rewards/hoi_iou_reward": 0.704331636428833, |
| "step": 13 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 167.4375, |
| "epoch": 0.0029760323112079505, |
| "grad_norm": 0.5669279098510742, |
| "kl": 0.0004019737243652344, |
| "learning_rate": 9.970238095238094e-07, |
| "loss": 0.0, |
| "reward": 2.4804917573928833, |
| "reward_std": 0.08349880830792245, |
| "rewards/format_reward_hoi_key": 0.7427083253860474, |
| "rewards/format_reward_hoi_object_label": 0.697916679084301, |
| "rewards/format_reward_hoi_verb_label": 0.483333345502615, |
| "rewards/hoi_iou_reward": 0.5565334260463715, |
| "step": 14 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 198.625, |
| "epoch": 0.0031886060477228037, |
| "grad_norm": 0.27379515767097473, |
| "kl": 0.00033855438232421875, |
| "learning_rate": 9.968112244897957e-07, |
| "loss": 0.0, |
| "reward": 2.1904609203338623, |
| "reward_std": 0.06255148959462531, |
| "rewards/format_reward_hoi_key": 0.7820312678813934, |
| "rewards/format_reward_hoi_object_label": 0.6083984375, |
| "rewards/format_reward_hoi_verb_label": 0.3639322891831398, |
| "rewards/hoi_iou_reward": 0.4360988959670067, |
| "step": 15 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 242.234375, |
| "epoch": 0.0034011797842376573, |
| "grad_norm": 0.2515924870967865, |
| "kl": 0.0006353855133056641, |
| "learning_rate": 9.965986394557822e-07, |
| "loss": 0.0, |
| "reward": 2.716467797756195, |
| "reward_std": 0.07810639549279585, |
| "rewards/format_reward_hoi_key": 0.7664583474397659, |
| "rewards/format_reward_hoi_object_label": 0.6187500059604645, |
| "rewards/format_reward_hoi_verb_label": 0.6677083224058151, |
| "rewards/hoi_iou_reward": 0.663551077246666, |
| "step": 16 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 226.859375, |
| "epoch": 0.003613753520752511, |
| "grad_norm": 0.5136963725090027, |
| "kl": 0.0003933906555175781, |
| "learning_rate": 9.963860544217688e-07, |
| "loss": 0.0, |
| "reward": 2.071069449186325, |
| "reward_std": 0.06459418445592746, |
| "rewards/format_reward_hoi_key": 0.6252120807766914, |
| "rewards/format_reward_hoi_object_label": 0.5837053582072258, |
| "rewards/format_reward_hoi_verb_label": 0.4394965320825577, |
| "rewards/hoi_iou_reward": 0.4226554408669472, |
| "step": 17 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 277.625, |
| "epoch": 0.0038263272572673646, |
| "grad_norm": 0.6188202500343323, |
| "kl": 0.0002378225326538086, |
| "learning_rate": 9.96173469387755e-07, |
| "loss": 0.0, |
| "reward": 3.0354496240615845, |
| "reward_std": 0.30482952669262886, |
| "rewards/format_reward_hoi_key": 0.8430059552192688, |
| "rewards/format_reward_hoi_object_label": 0.8227306753396988, |
| "rewards/format_reward_hoi_verb_label": 0.5986328125, |
| "rewards/hoi_iou_reward": 0.7710802108049393, |
| "step": 18 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 151.75, |
| "epoch": 0.004038900993782219, |
| "grad_norm": 0.27549490332603455, |
| "kl": 0.0006313323974609375, |
| "learning_rate": 9.959608843537416e-07, |
| "loss": -0.0, |
| "reward": 2.0183950662612915, |
| "reward_std": 0.015180108457570896, |
| "rewards/format_reward_hoi_key": 0.6604166775941849, |
| "rewards/format_reward_hoi_object_label": 0.5416666716337204, |
| "rewards/format_reward_hoi_verb_label": 0.3524305671453476, |
| "rewards/hoi_iou_reward": 0.4638812467455864, |
| "step": 19 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 224.078125, |
| "epoch": 0.004251474730297072, |
| "grad_norm": 0.56353759765625, |
| "kl": 0.0008664131164550781, |
| "learning_rate": 9.957482993197279e-07, |
| "loss": 0.0, |
| "reward": 2.617310881614685, |
| "reward_std": 0.185114907566458, |
| "rewards/format_reward_hoi_key": 0.7604167088866234, |
| "rewards/format_reward_hoi_object_label": 0.6744791641831398, |
| "rewards/format_reward_hoi_verb_label": 0.5677083283662796, |
| "rewards/hoi_iou_reward": 0.6147066801786423, |
| "step": 20 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 174.09375, |
| "epoch": 0.004464048466811925, |
| "grad_norm": 0.31322988867759705, |
| "kl": 0.0007352828979492188, |
| "learning_rate": 9.955357142857142e-07, |
| "loss": 0.0, |
| "reward": 2.9305796921253204, |
| "reward_std": 0.01013911364134401, |
| "rewards/format_reward_hoi_key": 0.8696428686380386, |
| "rewards/format_reward_hoi_object_label": 0.7857142835855484, |
| "rewards/format_reward_hoi_verb_label": 0.552300363779068, |
| "rewards/hoi_iou_reward": 0.7229221612215042, |
| "step": 21 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 162.0, |
| "epoch": 0.004676622203326779, |
| "grad_norm": 0.4496309161186218, |
| "kl": 0.001049041748046875, |
| "learning_rate": 9.953231292517007e-07, |
| "loss": 0.0, |
| "reward": 2.2096868455410004, |
| "reward_std": 0.0113821976701729, |
| "rewards/format_reward_hoi_key": 0.7333928644657135, |
| "rewards/format_reward_hoi_object_label": 0.6169642880558968, |
| "rewards/format_reward_hoi_verb_label": 0.2777777761220932, |
| "rewards/hoi_iou_reward": 0.5815519690513611, |
| "step": 22 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 154.609375, |
| "epoch": 0.004889195939841632, |
| "grad_norm": 0.8822689652442932, |
| "kl": 0.0013833045959472656, |
| "learning_rate": 9.95110544217687e-07, |
| "loss": 0.0, |
| "reward": 3.247895896434784, |
| "reward_std": 0.04373934442992322, |
| "rewards/format_reward_hoi_key": 0.9250000268220901, |
| "rewards/format_reward_hoi_object_label": 0.9583333283662796, |
| "rewards/format_reward_hoi_verb_label": 0.6562499850988388, |
| "rewards/hoi_iou_reward": 0.708312600851059, |
| "step": 23 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 410.328125, |
| "epoch": 0.005101769676356486, |
| "grad_norm": 0.7035483121871948, |
| "kl": 0.0004572868347167969, |
| "learning_rate": 9.948979591836735e-07, |
| "loss": 0.0, |
| "reward": 2.264761805534363, |
| "reward_std": 0.28715356811881065, |
| "rewards/format_reward_hoi_key": 0.6794504672288895, |
| "rewards/format_reward_hoi_object_label": 0.5326923131942749, |
| "rewards/format_reward_hoi_verb_label": 0.6280448734760284, |
| "rewards/hoi_iou_reward": 0.42457417771220207, |
| "step": 24 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 227.59375, |
| "epoch": 0.0053143434128713396, |
| "grad_norm": 0.31094542145729065, |
| "kl": 0.0009341239929199219, |
| "learning_rate": 9.946853741496598e-07, |
| "loss": 0.0, |
| "reward": 2.356251895427704, |
| "reward_std": 0.003799198704655282, |
| "rewards/format_reward_hoi_key": 0.767708346247673, |
| "rewards/format_reward_hoi_object_label": 0.4895833432674408, |
| "rewards/format_reward_hoi_verb_label": 0.5043560639023781, |
| "rewards/hoi_iou_reward": 0.5946041345596313, |
| "step": 25 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 181.375, |
| "epoch": 0.005526917149386194, |
| "grad_norm": 0.5995525121688843, |
| "kl": 0.00150299072265625, |
| "learning_rate": 9.944727891156463e-07, |
| "loss": 0.0001, |
| "reward": 2.6978970766067505, |
| "reward_std": 0.13544296027976088, |
| "rewards/format_reward_hoi_key": 0.8333333432674408, |
| "rewards/format_reward_hoi_object_label": 0.6158854141831398, |
| "rewards/format_reward_hoi_verb_label": 0.5898437350988388, |
| "rewards/hoi_iou_reward": 0.6588345021009445, |
| "step": 26 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 189.03125, |
| "epoch": 0.005739490885901047, |
| "grad_norm": 0.5540001392364502, |
| "kl": 0.000858306884765625, |
| "learning_rate": 9.942602040816326e-07, |
| "loss": 0.0001, |
| "reward": 3.355882227420807, |
| "reward_std": 0.005877207615412772, |
| "rewards/format_reward_hoi_key": 0.9535714238882065, |
| "rewards/format_reward_hoi_object_label": 0.9017857313156128, |
| "rewards/format_reward_hoi_verb_label": 0.7232142835855484, |
| "rewards/hoi_iou_reward": 0.7773108184337616, |
| "step": 27 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 211.90625, |
| "epoch": 0.005952064622415901, |
| "grad_norm": 2.0975677967071533, |
| "kl": 0.001495361328125, |
| "learning_rate": 9.940476190476191e-07, |
| "loss": 0.0001, |
| "reward": 2.007324628531933, |
| "reward_std": 0.03993106237612665, |
| "rewards/format_reward_hoi_key": 0.5873221457004547, |
| "rewards/format_reward_hoi_object_label": 0.44114159047603607, |
| "rewards/format_reward_hoi_verb_label": 0.5036415904760361, |
| "rewards/hoi_iou_reward": 0.47521928139030933, |
| "step": 28 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 165.1875, |
| "epoch": 0.006164638358930754, |
| "grad_norm": 0.37749311327934265, |
| "kl": 0.0019092559814453125, |
| "learning_rate": 9.938350340136054e-07, |
| "loss": 0.0001, |
| "reward": 2.2582033574581146, |
| "reward_std": 0.08065436300239526, |
| "rewards/format_reward_hoi_key": 0.6932291835546494, |
| "rewards/format_reward_hoi_object_label": 0.59375, |
| "rewards/format_reward_hoi_verb_label": 0.3541666641831398, |
| "rewards/hoi_iou_reward": 0.6170575618743896, |
| "step": 29 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 184.59375, |
| "epoch": 0.006377212095445607, |
| "grad_norm": 0.3330162763595581, |
| "kl": 0.0014448165893554688, |
| "learning_rate": 9.936224489795917e-07, |
| "loss": 0.0, |
| "reward": 2.6335054636001587, |
| "reward_std": 0.0012341497422312386, |
| "rewards/format_reward_hoi_key": 0.8750000149011612, |
| "rewards/format_reward_hoi_object_label": 0.6875, |
| "rewards/format_reward_hoi_verb_label": 0.3880208358168602, |
| "rewards/hoi_iou_reward": 0.6829846650362015, |
| "step": 30 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 172.125, |
| "epoch": 0.006589785831960461, |
| "grad_norm": 0.8369670510292053, |
| "kl": 0.0013284683227539062, |
| "learning_rate": 9.934098639455782e-07, |
| "loss": 0.0001, |
| "reward": 2.4850784838199615, |
| "reward_std": 0.02788396377582103, |
| "rewards/format_reward_hoi_key": 0.8687500208616257, |
| "rewards/format_reward_hoi_object_label": 0.4687500074505806, |
| "rewards/format_reward_hoi_verb_label": 0.5, |
| "rewards/hoi_iou_reward": 0.6475784331560135, |
| "step": 31 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 216.046875, |
| "epoch": 0.006802359568475315, |
| "grad_norm": 0.9224941730499268, |
| "kl": 0.00140380859375, |
| "learning_rate": 9.931972789115645e-07, |
| "loss": 0.0, |
| "reward": 2.751905083656311, |
| "reward_std": 0.08277821098454297, |
| "rewards/format_reward_hoi_key": 0.809895858168602, |
| "rewards/format_reward_hoi_object_label": 0.5078125, |
| "rewards/format_reward_hoi_verb_label": 0.6927083283662796, |
| "rewards/hoi_iou_reward": 0.7414884492754936, |
| "step": 32 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 210.71875, |
| "epoch": 0.007014933304990169, |
| "grad_norm": 0.39392945170402527, |
| "kl": 0.002300262451171875, |
| "learning_rate": 9.92984693877551e-07, |
| "loss": 0.0001, |
| "reward": 2.1440170407295227, |
| "reward_std": 0.02253561234101653, |
| "rewards/format_reward_hoi_key": 0.9121875166893005, |
| "rewards/format_reward_hoi_object_label": 0.3125, |
| "rewards/format_reward_hoi_verb_label": 0.3333333358168602, |
| "rewards/hoi_iou_reward": 0.5859961807727814, |
| "step": 33 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 194.265625, |
| "epoch": 0.007227507041505022, |
| "grad_norm": 0.5018682479858398, |
| "kl": 0.0017986297607421875, |
| "learning_rate": 9.927721088435373e-07, |
| "loss": 0.0001, |
| "reward": 2.5592292845249176, |
| "reward_std": 0.00986732606543228, |
| "rewards/format_reward_hoi_key": 0.7691666930913925, |
| "rewards/format_reward_hoi_object_label": 0.6583333313465118, |
| "rewards/format_reward_hoi_verb_label": 0.5562499985098839, |
| "rewards/hoi_iou_reward": 0.5754793435335159, |
| "step": 34 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 205.03125, |
| "epoch": 0.007440080778019876, |
| "grad_norm": 0.6149845719337463, |
| "kl": 0.0016880035400390625, |
| "learning_rate": 9.925595238095238e-07, |
| "loss": 0.0001, |
| "reward": 2.778216004371643, |
| "reward_std": 0.11917518911650404, |
| "rewards/format_reward_hoi_key": 0.8614583313465118, |
| "rewards/format_reward_hoi_object_label": 0.7333333194255829, |
| "rewards/format_reward_hoi_verb_label": 0.5011574029922485, |
| "rewards/hoi_iou_reward": 0.6822669506072998, |
| "step": 35 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 222.671875, |
| "epoch": 0.007652654514534729, |
| "grad_norm": 0.5013077259063721, |
| "kl": 0.0017242431640625, |
| "learning_rate": 9.923469387755101e-07, |
| "loss": 0.0001, |
| "reward": 2.724997416138649, |
| "reward_std": 0.007125564094167203, |
| "rewards/format_reward_hoi_key": 0.8181547522544861, |
| "rewards/format_reward_hoi_object_label": 0.6875, |
| "rewards/format_reward_hoi_verb_label": 0.625, |
| "rewards/hoi_iou_reward": 0.5943426117300987, |
| "step": 36 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 249.65625, |
| "epoch": 0.007865228251049582, |
| "grad_norm": 0.4427626430988312, |
| "kl": 0.00135040283203125, |
| "learning_rate": 9.921343537414967e-07, |
| "loss": 0.0001, |
| "reward": 2.5683979988098145, |
| "reward_std": 0.05630575024406426, |
| "rewards/format_reward_hoi_key": 0.810416653752327, |
| "rewards/format_reward_hoi_object_label": 0.625, |
| "rewards/format_reward_hoi_verb_label": 0.3906250111758709, |
| "rewards/hoi_iou_reward": 0.7423563152551651, |
| "step": 37 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 270.921875, |
| "epoch": 0.008077801987564437, |
| "grad_norm": 2.8067455291748047, |
| "kl": 0.0019435882568359375, |
| "learning_rate": 9.91921768707483e-07, |
| "loss": 0.0001, |
| "reward": 2.22263365983963, |
| "reward_std": 0.20146464882418513, |
| "rewards/format_reward_hoi_key": 0.6945772171020508, |
| "rewards/format_reward_hoi_object_label": 0.3977022171020508, |
| "rewards/format_reward_hoi_verb_label": 0.5460824370384216, |
| "rewards/hoi_iou_reward": 0.5842718333005905, |
| "step": 38 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 211.40625, |
| "epoch": 0.00829037572407929, |
| "grad_norm": 0.5841067433357239, |
| "kl": 0.00464630126953125, |
| "learning_rate": 9.917091836734693e-07, |
| "loss": 0.0002, |
| "reward": 2.7285755276679993, |
| "reward_std": 0.15019595221383497, |
| "rewards/format_reward_hoi_key": 0.931383952498436, |
| "rewards/format_reward_hoi_object_label": 0.5188244059681892, |
| "rewards/format_reward_hoi_verb_label": 0.5774181559681892, |
| "rewards/hoi_iou_reward": 0.7009490430355072, |
| "step": 39 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 235.15625, |
| "epoch": 0.008502949460594144, |
| "grad_norm": 0.3903954327106476, |
| "kl": 0.0014748573303222656, |
| "learning_rate": 9.914965986394558e-07, |
| "loss": 0.0001, |
| "reward": 2.41436231136322, |
| "reward_std": 0.031614198378520086, |
| "rewards/format_reward_hoi_key": 0.7393315136432648, |
| "rewards/format_reward_hoi_object_label": 0.6360462605953217, |
| "rewards/format_reward_hoi_verb_label": 0.5051649361848831, |
| "rewards/hoi_iou_reward": 0.5338196456432343, |
| "step": 40 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 4704, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 20, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|