| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.11074197120708748, |
| "eval_steps": 500, |
| "global_step": 100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "completion_length": 73.265625, |
| "epoch": 0.0011074197120708748, |
| "grad_norm": 0.5076314806938171, |
| "kl": 0.0, |
| "learning_rate": 9.99375e-07, |
| "loss": -0.018259915290400386, |
| "reward": 2.2648561000823975, |
| "reward_std": 0.32521533221006393, |
| "rewards/GDino": 0.84943026304245, |
| "rewards/GIT": 0.5776679813861847, |
| "rewards/HPSv2": 0.2639656066894531, |
| "rewards/ORM": 0.5737921893596649, |
| "self_certainty_semantic": -25.4375, |
| "self_certainty_token": -22.0, |
| "step": 1 |
| }, |
| { |
| "completion_length": 56.0, |
| "epoch": 0.0022148394241417496, |
| "grad_norm": 0.5364330410957336, |
| "kl": 0.001522064208984375, |
| "learning_rate": 9.9875e-07, |
| "loss": 0.00348748016403988, |
| "reward": 1.7680926322937012, |
| "reward_std": 0.41801488399505615, |
| "rewards/GDino": 0.6529064476490021, |
| "rewards/GIT": 0.19494981318712234, |
| "rewards/HPSv2": 0.24983596801757812, |
| "rewards/ORM": 0.6704004406929016, |
| "self_certainty_semantic": -25.4375, |
| "self_certainty_token": -21.0, |
| "step": 2 |
| }, |
| { |
| "completion_length": 55.4375, |
| "epoch": 0.0033222591362126247, |
| "grad_norm": 0.5614722967147827, |
| "kl": 0.001556396484375, |
| "learning_rate": 9.98125e-07, |
| "loss": 0.01565772108733654, |
| "reward": 1.6570448875427246, |
| "reward_std": 0.3965621292591095, |
| "rewards/GDino": 0.6382372081279755, |
| "rewards/GIT": 0.37795570492744446, |
| "rewards/HPSv2": 0.24709796905517578, |
| "rewards/ORM": 0.3937540017068386, |
| "self_certainty_semantic": -25.1875, |
| "self_certainty_token": -20.9375, |
| "step": 3 |
| }, |
| { |
| "completion_length": 65.34375, |
| "epoch": 0.004429678848283499, |
| "grad_norm": 2.5736770629882812, |
| "kl": 0.0016021728515625, |
| "learning_rate": 9.975e-07, |
| "loss": -0.0012893765233457088, |
| "reward": 2.061529755592346, |
| "reward_std": 0.4106704443693161, |
| "rewards/GDino": 0.7796730995178223, |
| "rewards/GIT": 0.43717896938323975, |
| "rewards/HPSv2": 0.24744796752929688, |
| "rewards/ORM": 0.5972296595573425, |
| "self_certainty_semantic": -25.5, |
| "self_certainty_token": -22.0, |
| "step": 4 |
| }, |
| { |
| "completion_length": 63.578125, |
| "epoch": 0.005537098560354375, |
| "grad_norm": 0.48238250613212585, |
| "kl": 0.001575469970703125, |
| "learning_rate": 9.968749999999999e-07, |
| "loss": 0.020129199139773846, |
| "reward": 1.5302643775939941, |
| "reward_std": 0.44902199506759644, |
| "rewards/GDino": 0.6246840953826904, |
| "rewards/GIT": 0.23608428239822388, |
| "rewards/HPSv2": 0.2453451156616211, |
| "rewards/ORM": 0.42415088415145874, |
| "self_certainty_semantic": -25.625, |
| "self_certainty_token": -22.1875, |
| "step": 5 |
| }, |
| { |
| "completion_length": 60.65625, |
| "epoch": 0.006644518272425249, |
| "grad_norm": 0.8221905827522278, |
| "kl": 0.001674652099609375, |
| "learning_rate": 9.9625e-07, |
| "loss": 0.0192068200558424, |
| "reward": 2.1602972745895386, |
| "reward_std": 0.23134037852287292, |
| "rewards/GDino": 0.783700168132782, |
| "rewards/GIT": 0.452057421207428, |
| "rewards/HPSv2": 0.274627685546875, |
| "rewards/ORM": 0.6499120593070984, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -22.0, |
| "step": 6 |
| }, |
| { |
| "completion_length": 65.453125, |
| "epoch": 0.007751937984496124, |
| "grad_norm": 0.433403879404068, |
| "kl": 0.0016021728515625, |
| "learning_rate": 9.956249999999999e-07, |
| "loss": 0.028950304724276066, |
| "reward": 1.7097668647766113, |
| "reward_std": 0.5880981385707855, |
| "rewards/GDino": 0.5914062708616257, |
| "rewards/GIT": 0.15753822773694992, |
| "rewards/HPSv2": 0.25023555755615234, |
| "rewards/ORM": 0.7105867862701416, |
| "self_certainty_semantic": -25.5, |
| "self_certainty_token": -21.75, |
| "step": 7 |
| }, |
| { |
| "completion_length": 74.90625, |
| "epoch": 0.008859357696566999, |
| "grad_norm": 0.41245806217193604, |
| "kl": 0.00152587890625, |
| "learning_rate": 9.95e-07, |
| "loss": -0.016540683340281248, |
| "reward": 1.785366177558899, |
| "reward_std": 0.39637817442417145, |
| "rewards/GDino": 0.7011832594871521, |
| "rewards/GIT": 0.3848375529050827, |
| "rewards/HPSv2": 0.2445659637451172, |
| "rewards/ORM": 0.45477938652038574, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -20.875, |
| "step": 8 |
| }, |
| { |
| "completion_length": 61.828125, |
| "epoch": 0.009966777408637873, |
| "grad_norm": 0.3924250602722168, |
| "kl": 0.001617431640625, |
| "learning_rate": 9.94375e-07, |
| "loss": 0.03069412149488926, |
| "reward": 2.0813064575195312, |
| "reward_std": 0.5435488224029541, |
| "rewards/GDino": 0.736801415681839, |
| "rewards/GIT": 0.32275132089853287, |
| "rewards/HPSv2": 0.26233673095703125, |
| "rewards/ORM": 0.759416937828064, |
| "self_certainty_semantic": -25.5625, |
| "self_certainty_token": -21.1875, |
| "step": 9 |
| }, |
| { |
| "completion_length": 62.796875, |
| "epoch": 0.01107419712070875, |
| "grad_norm": 0.5886948704719543, |
| "kl": 0.00164031982421875, |
| "learning_rate": 9.9375e-07, |
| "loss": -0.009089878294616938, |
| "reward": 1.8167259693145752, |
| "reward_std": 0.4427160769701004, |
| "rewards/GDino": 0.6997816860675812, |
| "rewards/GIT": 0.4742187559604645, |
| "rewards/HPSv2": 0.2480792999267578, |
| "rewards/ORM": 0.3946462571620941, |
| "self_certainty_semantic": -25.5, |
| "self_certainty_token": -21.125, |
| "step": 10 |
| }, |
| { |
| "completion_length": 64.09375, |
| "epoch": 0.012181616832779624, |
| "grad_norm": 0.6388463377952576, |
| "kl": 0.0016326904296875, |
| "learning_rate": 9.93125e-07, |
| "loss": -0.011163983959704638, |
| "reward": 2.250586152076721, |
| "reward_std": 0.29546695202589035, |
| "rewards/GDino": 0.7932291626930237, |
| "rewards/GIT": 0.5437096580862999, |
| "rewards/HPSv2": 0.25614356994628906, |
| "rewards/ORM": 0.657503753900528, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -21.0625, |
| "step": 11 |
| }, |
| { |
| "completion_length": 73.265625, |
| "epoch": 0.013289036544850499, |
| "grad_norm": 0.37963175773620605, |
| "kl": 0.001583099365234375, |
| "learning_rate": 9.925e-07, |
| "loss": 0.009535952471196651, |
| "reward": 1.8723560571670532, |
| "reward_std": 0.48824670910835266, |
| "rewards/GDino": 0.671429455280304, |
| "rewards/GIT": 0.4155814051628113, |
| "rewards/HPSv2": 0.2387409210205078, |
| "rewards/ORM": 0.5466042459011078, |
| "self_certainty_semantic": -25.4375, |
| "self_certainty_token": -21.625, |
| "step": 12 |
| }, |
| { |
| "completion_length": 55.015625, |
| "epoch": 0.014396456256921373, |
| "grad_norm": 0.5844080448150635, |
| "kl": 0.001674652099609375, |
| "learning_rate": 9.91875e-07, |
| "loss": 0.0034986711107194424, |
| "reward": 1.7595484256744385, |
| "reward_std": 0.3697086051106453, |
| "rewards/GDino": 0.7100214958190918, |
| "rewards/GIT": 0.26869260519742966, |
| "rewards/HPSv2": 0.24958419799804688, |
| "rewards/ORM": 0.53125, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -21.375, |
| "step": 13 |
| }, |
| { |
| "completion_length": 55.65625, |
| "epoch": 0.015503875968992248, |
| "grad_norm": 0.5192797780036926, |
| "kl": 0.001674652099609375, |
| "learning_rate": 9.912499999999998e-07, |
| "loss": 0.010001872200518847, |
| "reward": 2.201015591621399, |
| "reward_std": 0.4899330288171768, |
| "rewards/GDino": 0.8140625059604645, |
| "rewards/GIT": 0.4328514188528061, |
| "rewards/HPSv2": 0.2431640625, |
| "rewards/ORM": 0.7109375, |
| "self_certainty_semantic": -25.4375, |
| "self_certainty_token": -22.0, |
| "step": 14 |
| }, |
| { |
| "completion_length": 64.0, |
| "epoch": 0.016611295681063124, |
| "grad_norm": 0.46844616532325745, |
| "kl": 0.00174713134765625, |
| "learning_rate": 9.90625e-07, |
| "loss": 0.0017675042618066072, |
| "reward": 2.433342456817627, |
| "reward_std": 0.33736473321914673, |
| "rewards/GDino": 0.9153576791286469, |
| "rewards/GIT": 0.5124611556529999, |
| "rewards/HPSv2": 0.2507901191711426, |
| "rewards/ORM": 0.7547334432601929, |
| "self_certainty_semantic": -25.4375, |
| "self_certainty_token": -21.6875, |
| "step": 15 |
| }, |
| { |
| "completion_length": 53.203125, |
| "epoch": 0.017718715393133997, |
| "grad_norm": 0.49579355120658875, |
| "kl": 0.001758575439453125, |
| "learning_rate": 9.9e-07, |
| "loss": 0.003856237977743149, |
| "reward": 1.6368815302848816, |
| "reward_std": 0.42226114869117737, |
| "rewards/GDino": 0.6432631015777588, |
| "rewards/GIT": 0.2906690910458565, |
| "rewards/HPSv2": 0.25169944763183594, |
| "rewards/ORM": 0.45124977827072144, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -20.8125, |
| "step": 16 |
| }, |
| { |
| "completion_length": 76.28125, |
| "epoch": 0.018826135105204873, |
| "grad_norm": 0.5296036601066589, |
| "kl": 0.001590728759765625, |
| "learning_rate": 9.89375e-07, |
| "loss": -0.003345506265759468, |
| "reward": 1.7861530184745789, |
| "reward_std": 0.5057752877473831, |
| "rewards/GDino": 0.6293700635433197, |
| "rewards/GIT": 0.2197464406490326, |
| "rewards/HPSv2": 0.26516151428222656, |
| "rewards/ORM": 0.671875, |
| "self_certainty_semantic": -25.5, |
| "self_certainty_token": -20.6875, |
| "step": 17 |
| }, |
| { |
| "completion_length": 58.0625, |
| "epoch": 0.019933554817275746, |
| "grad_norm": 0.6577962636947632, |
| "kl": 0.00174713134765625, |
| "learning_rate": 9.8875e-07, |
| "loss": -0.019500677473843098, |
| "reward": 2.303292393684387, |
| "reward_std": 0.2609405145049095, |
| "rewards/GDino": 0.8339102566242218, |
| "rewards/GIT": 0.5853700041770935, |
| "rewards/HPSv2": 0.24338722229003906, |
| "rewards/ORM": 0.640625, |
| "self_certainty_semantic": -25.1875, |
| "self_certainty_token": -21.25, |
| "step": 18 |
| }, |
| { |
| "completion_length": 50.859375, |
| "epoch": 0.021040974529346623, |
| "grad_norm": 0.3543226718902588, |
| "kl": 0.00182342529296875, |
| "learning_rate": 9.88125e-07, |
| "loss": -0.00019507110118865967, |
| "reward": 1.6344053149223328, |
| "reward_std": 0.47374215722084045, |
| "rewards/GDino": 0.705148845911026, |
| "rewards/GIT": 0.2559727430343628, |
| "rewards/HPSv2": 0.2541370391845703, |
| "rewards/ORM": 0.41914665699005127, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -22.3125, |
| "step": 19 |
| }, |
| { |
| "completion_length": 65.921875, |
| "epoch": 0.0221483942414175, |
| "grad_norm": 0.5358290672302246, |
| "kl": 0.001781463623046875, |
| "learning_rate": 9.875e-07, |
| "loss": 0.007933363318443298, |
| "reward": 1.9504321217536926, |
| "reward_std": 0.3728322237730026, |
| "rewards/GDino": 0.6606760025024414, |
| "rewards/GIT": 0.48046815395355225, |
| "rewards/HPSv2": 0.24678802490234375, |
| "rewards/ORM": 0.5625, |
| "self_certainty_semantic": -25.125, |
| "self_certainty_token": -21.0625, |
| "step": 20 |
| }, |
| { |
| "completion_length": 59.3125, |
| "epoch": 0.023255813953488372, |
| "grad_norm": 2.0912797451019287, |
| "kl": 0.001811981201171875, |
| "learning_rate": 9.86875e-07, |
| "loss": -0.004398644436150789, |
| "reward": 2.252086877822876, |
| "reward_std": 0.44888848066329956, |
| "rewards/GDino": 0.798213005065918, |
| "rewards/GIT": 0.4853799045085907, |
| "rewards/HPSv2": 0.25956153869628906, |
| "rewards/ORM": 0.7089323997497559, |
| "self_certainty_semantic": -25.1875, |
| "self_certainty_token": -22.75, |
| "step": 21 |
| }, |
| { |
| "completion_length": 52.265625, |
| "epoch": 0.024363233665559248, |
| "grad_norm": 0.5790585875511169, |
| "kl": 0.00191497802734375, |
| "learning_rate": 9.862499999999999e-07, |
| "loss": 0.006876260507851839, |
| "reward": 1.9933909177780151, |
| "reward_std": 0.32367050647735596, |
| "rewards/GDino": 0.7134387493133545, |
| "rewards/GIT": 0.41087181866168976, |
| "rewards/HPSv2": 0.2721214294433594, |
| "rewards/ORM": 0.5969589203596115, |
| "self_certainty_semantic": -25.5, |
| "self_certainty_token": -22.375, |
| "step": 22 |
| }, |
| { |
| "completion_length": 59.375, |
| "epoch": 0.02547065337763012, |
| "grad_norm": 0.45692723989486694, |
| "kl": 0.001697540283203125, |
| "learning_rate": 9.85625e-07, |
| "loss": -0.00792664848268032, |
| "reward": 2.015365242958069, |
| "reward_std": 0.48256243765354156, |
| "rewards/GDino": 0.724082350730896, |
| "rewards/GIT": 0.42729710042476654, |
| "rewards/HPSv2": 0.2667560577392578, |
| "rewards/ORM": 0.5972296893596649, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -20.6875, |
| "step": 23 |
| }, |
| { |
| "completion_length": 55.203125, |
| "epoch": 0.026578073089700997, |
| "grad_norm": 0.46439889073371887, |
| "kl": 0.0016937255859375, |
| "learning_rate": 9.849999999999999e-07, |
| "loss": 0.0024933242239058018, |
| "reward": 2.460409939289093, |
| "reward_std": 0.4443647414445877, |
| "rewards/GDino": 0.8454739451408386, |
| "rewards/GIT": 0.6258784532546997, |
| "rewards/HPSv2": 0.2624950408935547, |
| "rewards/ORM": 0.7265625, |
| "self_certainty_semantic": -25.6875, |
| "self_certainty_token": -21.25, |
| "step": 24 |
| }, |
| { |
| "completion_length": 60.15625, |
| "epoch": 0.02768549280177187, |
| "grad_norm": 0.47176027297973633, |
| "kl": 0.001880645751953125, |
| "learning_rate": 9.84375e-07, |
| "loss": 0.005812188144773245, |
| "reward": 2.0174233317375183, |
| "reward_std": 0.40724658966064453, |
| "rewards/GDino": 0.7186038792133331, |
| "rewards/GIT": 0.4156235605478287, |
| "rewards/HPSv2": 0.26485633850097656, |
| "rewards/ORM": 0.6183395236730576, |
| "self_certainty_semantic": -25.5, |
| "self_certainty_token": -22.0, |
| "step": 25 |
| }, |
| { |
| "completion_length": 53.21875, |
| "epoch": 0.028792912513842746, |
| "grad_norm": 0.716375470161438, |
| "kl": 0.00209808349609375, |
| "learning_rate": 9.8375e-07, |
| "loss": 0.02397427149116993, |
| "reward": 2.186239182949066, |
| "reward_std": 0.46710920333862305, |
| "rewards/GDino": 0.7593750059604645, |
| "rewards/GIT": 0.5171153843402863, |
| "rewards/HPSv2": 0.2734565734863281, |
| "rewards/ORM": 0.6362921893596649, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -23.0, |
| "step": 26 |
| }, |
| { |
| "completion_length": 58.421875, |
| "epoch": 0.029900332225913623, |
| "grad_norm": 0.428893119096756, |
| "kl": 0.00171661376953125, |
| "learning_rate": 9.83125e-07, |
| "loss": -0.005866332910954952, |
| "reward": 1.9681838750839233, |
| "reward_std": 0.3645169883966446, |
| "rewards/GDino": 0.7666666209697723, |
| "rewards/GIT": 0.4486802965402603, |
| "rewards/HPSv2": 0.2419452667236328, |
| "rewards/ORM": 0.5108915567398071, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -21.4375, |
| "step": 27 |
| }, |
| { |
| "completion_length": 63.328125, |
| "epoch": 0.031007751937984496, |
| "grad_norm": 0.5334203243255615, |
| "kl": 0.002010345458984375, |
| "learning_rate": 9.825e-07, |
| "loss": 0.012586410157382488, |
| "reward": 1.4134111404418945, |
| "reward_std": 0.3155324012041092, |
| "rewards/GDino": 0.6005972325801849, |
| "rewards/GIT": 0.11092349141836166, |
| "rewards/HPSv2": 0.2596569061279297, |
| "rewards/ORM": 0.44223344326019287, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -20.5625, |
| "step": 28 |
| }, |
| { |
| "completion_length": 56.25, |
| "epoch": 0.03211517165005537, |
| "grad_norm": 0.40832045674324036, |
| "kl": 0.001819610595703125, |
| "learning_rate": 9.81875e-07, |
| "loss": 0.010300841182470322, |
| "reward": 2.465680956840515, |
| "reward_std": 0.298002652823925, |
| "rewards/GDino": 0.862500011920929, |
| "rewards/GIT": 0.6107669174671173, |
| "rewards/HPSv2": 0.28375244140625, |
| "rewards/ORM": 0.7086615860462189, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -21.0625, |
| "step": 29 |
| }, |
| { |
| "completion_length": 54.953125, |
| "epoch": 0.03322259136212625, |
| "grad_norm": 0.4050670266151428, |
| "kl": 0.002025604248046875, |
| "learning_rate": 9.8125e-07, |
| "loss": -0.001845305785536766, |
| "reward": 2.476737856864929, |
| "reward_std": 0.3756887763738632, |
| "rewards/GDino": 0.8967152833938599, |
| "rewards/GIT": 0.551719531416893, |
| "rewards/HPSv2": 0.24522781372070312, |
| "rewards/ORM": 0.7830752730369568, |
| "self_certainty_semantic": -25.1875, |
| "self_certainty_token": -21.25, |
| "step": 30 |
| }, |
| { |
| "completion_length": 74.84375, |
| "epoch": 0.03433001107419712, |
| "grad_norm": 0.7089686393737793, |
| "kl": 0.001865386962890625, |
| "learning_rate": 9.806249999999998e-07, |
| "loss": 0.023707949556410313, |
| "reward": 1.831493854522705, |
| "reward_std": 0.37860143184661865, |
| "rewards/GDino": 0.6287499666213989, |
| "rewards/GIT": 0.3833145350217819, |
| "rewards/HPSv2": 0.2413043975830078, |
| "rewards/ORM": 0.578125, |
| "self_certainty_semantic": -25.4375, |
| "self_certainty_token": -20.9375, |
| "step": 31 |
| }, |
| { |
| "completion_length": 71.0625, |
| "epoch": 0.035437430786267994, |
| "grad_norm": 0.45204266905784607, |
| "kl": 0.00200653076171875, |
| "learning_rate": 9.8e-07, |
| "loss": 0.014695112593472004, |
| "reward": 1.5279032588005066, |
| "reward_std": 0.5042913109064102, |
| "rewards/GDino": 0.6702238023281097, |
| "rewards/GIT": 0.24817809462547302, |
| "rewards/HPSv2": 0.2356252670288086, |
| "rewards/ORM": 0.37387609481811523, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -22.5625, |
| "step": 32 |
| }, |
| { |
| "completion_length": 59.703125, |
| "epoch": 0.036544850498338874, |
| "grad_norm": 0.4359590411186218, |
| "kl": 0.00201416015625, |
| "learning_rate": 9.79375e-07, |
| "loss": 0.00610552029684186, |
| "reward": 2.3108657598495483, |
| "reward_std": 0.4415571391582489, |
| "rewards/GDino": 0.8515625, |
| "rewards/GIT": 0.6067334115505219, |
| "rewards/HPSv2": 0.22726917266845703, |
| "rewards/ORM": 0.6253007054328918, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -21.5, |
| "step": 33 |
| }, |
| { |
| "completion_length": 58.046875, |
| "epoch": 0.03765227021040975, |
| "grad_norm": 0.5853399038314819, |
| "kl": 0.002033233642578125, |
| "learning_rate": 9.7875e-07, |
| "loss": 0.023541483096778393, |
| "reward": 2.012690246105194, |
| "reward_std": 0.4660336524248123, |
| "rewards/GDino": 0.6989582777023315, |
| "rewards/GIT": 0.40700431168079376, |
| "rewards/HPSv2": 0.24774932861328125, |
| "rewards/ORM": 0.6589783728122711, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -21.8125, |
| "step": 34 |
| }, |
| { |
| "completion_length": 56.90625, |
| "epoch": 0.03875968992248062, |
| "grad_norm": 0.3787715435028076, |
| "kl": 0.001888275146484375, |
| "learning_rate": 9.78125e-07, |
| "loss": 0.003942073322832584, |
| "reward": 2.452033281326294, |
| "reward_std": 0.3410096764564514, |
| "rewards/GDino": 0.8359375298023224, |
| "rewards/GIT": 0.567652553319931, |
| "rewards/HPSv2": 0.2418804168701172, |
| "rewards/ORM": 0.806562751531601, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -21.3125, |
| "step": 35 |
| }, |
| { |
| "completion_length": 66.0, |
| "epoch": 0.03986710963455149, |
| "grad_norm": 0.5305721163749695, |
| "kl": 0.005157470703125, |
| "learning_rate": 9.775e-07, |
| "loss": -0.003781900042667985, |
| "reward": 1.8618011474609375, |
| "reward_std": 0.4120703786611557, |
| "rewards/GDino": 0.6453125476837158, |
| "rewards/GIT": 0.4281370937824249, |
| "rewards/HPSv2": 0.24621009826660156, |
| "rewards/ORM": 0.5421415567398071, |
| "self_certainty_semantic": -25.5625, |
| "self_certainty_token": -20.9375, |
| "step": 36 |
| }, |
| { |
| "completion_length": 51.40625, |
| "epoch": 0.04097452934662237, |
| "grad_norm": 0.46515390276908875, |
| "kl": 0.002716064453125, |
| "learning_rate": 9.76875e-07, |
| "loss": 0.006902199704200029, |
| "reward": 1.9485998153686523, |
| "reward_std": 0.42147715389728546, |
| "rewards/GDino": 0.6951449513435364, |
| "rewards/GIT": 0.31057579815387726, |
| "rewards/HPSv2": 0.26158714294433594, |
| "rewards/ORM": 0.681291937828064, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -22.1875, |
| "step": 37 |
| }, |
| { |
| "completion_length": 71.03125, |
| "epoch": 0.042081949058693245, |
| "grad_norm": 0.951810896396637, |
| "kl": 0.00226593017578125, |
| "learning_rate": 9.7625e-07, |
| "loss": 0.03428783547133207, |
| "reward": 1.9112213850021362, |
| "reward_std": 0.30633312463760376, |
| "rewards/GDino": 0.7401995956897736, |
| "rewards/GIT": 0.30288365483283997, |
| "rewards/HPSv2": 0.2552833557128906, |
| "rewards/ORM": 0.6128547042608261, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -21.125, |
| "step": 38 |
| }, |
| { |
| "completion_length": 67.6875, |
| "epoch": 0.04318936877076412, |
| "grad_norm": 0.6357575058937073, |
| "kl": 0.01482391357421875, |
| "learning_rate": 9.756249999999999e-07, |
| "loss": 0.023865018505603075, |
| "reward": 2.345404624938965, |
| "reward_std": 0.31367097795009613, |
| "rewards/GDino": 0.8703815042972565, |
| "rewards/GIT": 0.4902418553829193, |
| "rewards/HPSv2": 0.26603126525878906, |
| "rewards/ORM": 0.71875, |
| "self_certainty_semantic": -25.5625, |
| "self_certainty_token": -21.75, |
| "step": 39 |
| }, |
| { |
| "completion_length": 59.671875, |
| "epoch": 0.044296788482835, |
| "grad_norm": 0.5422465801239014, |
| "kl": 0.00281524658203125, |
| "learning_rate": 9.75e-07, |
| "loss": -0.018710695207118988, |
| "reward": 2.222834825515747, |
| "reward_std": 0.42842796444892883, |
| "rewards/GDino": 0.8634105622768402, |
| "rewards/GIT": 0.40908148139715195, |
| "rewards/HPSv2": 0.27498817443847656, |
| "rewards/ORM": 0.6753546893596649, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -21.125, |
| "step": 40 |
| }, |
| { |
| "completion_length": 60.9375, |
| "epoch": 0.04540420819490587, |
| "grad_norm": 0.7511593103408813, |
| "kl": 0.00299072265625, |
| "learning_rate": 9.743749999999999e-07, |
| "loss": 0.005782268475741148, |
| "reward": 1.8980144262313843, |
| "reward_std": 0.3208035007119179, |
| "rewards/GDino": 0.6784752607345581, |
| "rewards/GIT": 0.3914954513311386, |
| "rewards/HPSv2": 0.24643898010253906, |
| "rewards/ORM": 0.5816046595573425, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -21.25, |
| "step": 41 |
| }, |
| { |
| "completion_length": 48.4375, |
| "epoch": 0.046511627906976744, |
| "grad_norm": 0.5177002549171448, |
| "kl": 0.0025177001953125, |
| "learning_rate": 9.7375e-07, |
| "loss": 0.045526545494794846, |
| "reward": 2.269711136817932, |
| "reward_std": 0.48014624416828156, |
| "rewards/GDino": 0.8855312466621399, |
| "rewards/GIT": 0.4437972754240036, |
| "rewards/HPSv2": 0.2572154998779297, |
| "rewards/ORM": 0.6831671893596649, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -20.5625, |
| "step": 42 |
| }, |
| { |
| "completion_length": 67.875, |
| "epoch": 0.047619047619047616, |
| "grad_norm": 0.5885121822357178, |
| "kl": 0.002044677734375, |
| "learning_rate": 9.73125e-07, |
| "loss": 0.013573684729635715, |
| "reward": 1.6382005214691162, |
| "reward_std": 0.38919302821159363, |
| "rewards/GDino": 0.6114583313465118, |
| "rewards/GIT": 0.3806646466255188, |
| "rewards/HPSv2": 0.23286819458007812, |
| "rewards/ORM": 0.41320937871932983, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -21.0625, |
| "step": 43 |
| }, |
| { |
| "completion_length": 54.4375, |
| "epoch": 0.048726467331118496, |
| "grad_norm": 0.40727919340133667, |
| "kl": 0.0020751953125, |
| "learning_rate": 9.725e-07, |
| "loss": -0.01244093757122755, |
| "reward": 2.8831005096435547, |
| "reward_std": 0.31665875762701035, |
| "rewards/GDino": 0.9588541388511658, |
| "rewards/GIT": 0.7738310992717743, |
| "rewards/HPSv2": 0.2601909637451172, |
| "rewards/ORM": 0.8902243673801422, |
| "self_certainty_semantic": -25.5625, |
| "self_certainty_token": -21.0625, |
| "step": 44 |
| }, |
| { |
| "completion_length": 54.90625, |
| "epoch": 0.04983388704318937, |
| "grad_norm": 0.4928445816040039, |
| "kl": 0.0024566650390625, |
| "learning_rate": 9.71875e-07, |
| "loss": 0.00010553281754255295, |
| "reward": 2.4343937635421753, |
| "reward_std": 0.5984751731157303, |
| "rewards/GDino": 0.862500011920929, |
| "rewards/GIT": 0.5139474421739578, |
| "rewards/HPSv2": 0.26379966735839844, |
| "rewards/ORM": 0.7941466569900513, |
| "self_certainty_semantic": -25.5625, |
| "self_certainty_token": -21.25, |
| "step": 45 |
| }, |
| { |
| "completion_length": 56.078125, |
| "epoch": 0.05094130675526024, |
| "grad_norm": 0.37051326036453247, |
| "kl": 0.00231170654296875, |
| "learning_rate": 9.712499999999998e-07, |
| "loss": 0.007893505971878767, |
| "reward": 1.9575175046920776, |
| "reward_std": 0.3945648521184921, |
| "rewards/GDino": 0.5999999940395355, |
| "rewards/GIT": 0.32395021617412567, |
| "rewards/HPSv2": 0.26719093322753906, |
| "rewards/ORM": 0.7663763463497162, |
| "self_certainty_semantic": -25.1875, |
| "self_certainty_token": -22.0625, |
| "step": 46 |
| }, |
| { |
| "completion_length": 55.171875, |
| "epoch": 0.05204872646733112, |
| "grad_norm": 0.8945181369781494, |
| "kl": 0.0025634765625, |
| "learning_rate": 9.70625e-07, |
| "loss": -0.0013387980870902538, |
| "reward": 1.836871862411499, |
| "reward_std": 0.23468619585037231, |
| "rewards/GDino": 0.7209739089012146, |
| "rewards/GIT": 0.22856376320123672, |
| "rewards/HPSv2": 0.27921295166015625, |
| "rewards/ORM": 0.608121246099472, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -21.6875, |
| "step": 47 |
| }, |
| { |
| "completion_length": 57.984375, |
| "epoch": 0.053156146179401995, |
| "grad_norm": 1.6689982414245605, |
| "kl": 0.00267791748046875, |
| "learning_rate": 9.7e-07, |
| "loss": 0.022647732868790627, |
| "reward": 1.454766035079956, |
| "reward_std": 0.40884387493133545, |
| "rewards/GDino": 0.6050891876220703, |
| "rewards/GIT": 0.0, |
| "rewards/HPSv2": 0.2698974609375, |
| "rewards/ORM": 0.5797793865203857, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -21.625, |
| "step": 48 |
| }, |
| { |
| "completion_length": 58.046875, |
| "epoch": 0.05426356589147287, |
| "grad_norm": 0.4761441648006439, |
| "kl": 0.002048492431640625, |
| "learning_rate": 9.69375e-07, |
| "loss": 0.016307475278154016, |
| "reward": 1.9066129326820374, |
| "reward_std": 0.5319462567567825, |
| "rewards/GDino": 0.7744874656200409, |
| "rewards/GIT": 0.2370736114680767, |
| "rewards/HPSv2": 0.2514495849609375, |
| "rewards/ORM": 0.6436022371053696, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -21.625, |
| "step": 49 |
| }, |
| { |
| "completion_length": 61.0, |
| "epoch": 0.05537098560354374, |
| "grad_norm": 0.8074173331260681, |
| "kl": 0.0040283203125, |
| "learning_rate": 9.6875e-07, |
| "loss": 0.005913220578804612, |
| "reward": 2.0915883779525757, |
| "reward_std": 0.5395111739635468, |
| "rewards/GDino": 0.7859093248844147, |
| "rewards/GIT": 0.3929952085018158, |
| "rewards/HPSv2": 0.25482940673828125, |
| "rewards/ORM": 0.657854437828064, |
| "self_certainty_semantic": -25.5625, |
| "self_certainty_token": -22.25, |
| "step": 50 |
| }, |
| { |
| "completion_length": 44.359375, |
| "epoch": 0.05647840531561462, |
| "grad_norm": 0.5618427991867065, |
| "kl": 0.002471923828125, |
| "learning_rate": 9.68125e-07, |
| "loss": -0.003945098840631545, |
| "reward": 1.8058671951293945, |
| "reward_std": 0.5712144523859024, |
| "rewards/GDino": 0.7815796732902527, |
| "rewards/GIT": 0.2604931816458702, |
| "rewards/HPSv2": 0.27115440368652344, |
| "rewards/ORM": 0.49263995885849, |
| "self_certainty_semantic": -25.4375, |
| "self_certainty_token": -22.625, |
| "step": 51 |
| }, |
| { |
| "completion_length": 48.0, |
| "epoch": 0.05758582502768549, |
| "grad_norm": 107.57159423828125, |
| "kl": 26.37615966796875, |
| "learning_rate": 9.675e-07, |
| "loss": 0.27801212295889854, |
| "reward": 2.4165316820144653, |
| "reward_std": 0.2998274937272072, |
| "rewards/GDino": 0.9244791567325592, |
| "rewards/GIT": 0.6574473828077316, |
| "rewards/HPSv2": 0.2756366729736328, |
| "rewards/ORM": 0.5589684545993805, |
| "self_certainty_semantic": -25.4375, |
| "self_certainty_token": -21.375, |
| "step": 52 |
| }, |
| { |
| "completion_length": 52.140625, |
| "epoch": 0.058693244739756366, |
| "grad_norm": 0.4408358931541443, |
| "kl": 0.00232696533203125, |
| "learning_rate": 9.66875e-07, |
| "loss": 0.013528472045436502, |
| "reward": 1.8899905681610107, |
| "reward_std": 0.4558149725198746, |
| "rewards/GDino": 0.730059951543808, |
| "rewards/GIT": 0.39098620414733887, |
| "rewards/HPSv2": 0.24242782592773438, |
| "rewards/ORM": 0.5265165567398071, |
| "self_certainty_semantic": -25.0, |
| "self_certainty_token": -20.5625, |
| "step": 53 |
| }, |
| { |
| "completion_length": 58.390625, |
| "epoch": 0.059800664451827246, |
| "grad_norm": 0.48384228348731995, |
| "kl": 0.00225067138671875, |
| "learning_rate": 9.6625e-07, |
| "loss": 0.005568797350861132, |
| "reward": 1.638724684715271, |
| "reward_std": 0.41337575018405914, |
| "rewards/GDino": 0.6137361526489258, |
| "rewards/GIT": 0.24863167852163315, |
| "rewards/HPSv2": 0.24831581115722656, |
| "rewards/ORM": 0.5280410945415497, |
| "self_certainty_semantic": -25.4375, |
| "self_certainty_token": -20.9375, |
| "step": 54 |
| }, |
| { |
| "completion_length": 50.234375, |
| "epoch": 0.06090808416389812, |
| "grad_norm": 0.46963369846343994, |
| "kl": 0.0026397705078125, |
| "learning_rate": 9.65625e-07, |
| "loss": 0.009267964400351048, |
| "reward": 1.7191376686096191, |
| "reward_std": 0.521537572145462, |
| "rewards/GDino": 0.7086881995201111, |
| "rewards/GIT": 0.3270767852663994, |
| "rewards/HPSv2": 0.2678356170654297, |
| "rewards/ORM": 0.4155370891094208, |
| "self_certainty_semantic": -25.5, |
| "self_certainty_token": -21.6875, |
| "step": 55 |
| }, |
| { |
| "completion_length": 59.953125, |
| "epoch": 0.06201550387596899, |
| "grad_norm": 0.6913841366767883, |
| "kl": 0.0024261474609375, |
| "learning_rate": 9.649999999999999e-07, |
| "loss": 0.03414425998926163, |
| "reward": 1.9336698055267334, |
| "reward_std": 0.45749759674072266, |
| "rewards/GDino": 0.6963726580142975, |
| "rewards/GIT": 0.38425514101982117, |
| "rewards/HPSv2": 0.2471466064453125, |
| "rewards/ORM": 0.6058953106403351, |
| "self_certainty_semantic": -25.0625, |
| "self_certainty_token": -21.875, |
| "step": 56 |
| }, |
| { |
| "completion_length": 50.765625, |
| "epoch": 0.06312292358803986, |
| "grad_norm": 0.5066769123077393, |
| "kl": 0.002532958984375, |
| "learning_rate": 9.64375e-07, |
| "loss": 0.009842937346547842, |
| "reward": 1.8338811993598938, |
| "reward_std": 0.3951306492090225, |
| "rewards/GDino": 0.7909577786922455, |
| "rewards/GIT": 0.24781160056591034, |
| "rewards/HPSv2": 0.2509651184082031, |
| "rewards/ORM": 0.5441466569900513, |
| "self_certainty_semantic": -25.5, |
| "self_certainty_token": -21.4375, |
| "step": 57 |
| }, |
| { |
| "completion_length": 52.9375, |
| "epoch": 0.06423034330011074, |
| "grad_norm": 0.37791869044303894, |
| "kl": 0.002685546875, |
| "learning_rate": 9.637499999999999e-07, |
| "loss": 0.024126023054122925, |
| "reward": 1.8852884769439697, |
| "reward_std": 0.46756890416145325, |
| "rewards/GDino": 0.732811689376831, |
| "rewards/GIT": 0.38145140558481216, |
| "rewards/HPSv2": 0.2541465759277344, |
| "rewards/ORM": 0.5168787688016891, |
| "self_certainty_semantic": -25.4375, |
| "self_certainty_token": -21.25, |
| "step": 58 |
| }, |
| { |
| "completion_length": 47.234375, |
| "epoch": 0.06533776301218161, |
| "grad_norm": 0.7410405278205872, |
| "kl": 0.0026092529296875, |
| "learning_rate": 9.63125e-07, |
| "loss": -0.01674468442797661, |
| "reward": 2.3462648391723633, |
| "reward_std": 0.2433818019926548, |
| "rewards/GDino": 0.8425607979297638, |
| "rewards/GIT": 0.46571947634220123, |
| "rewards/HPSv2": 0.2664222717285156, |
| "rewards/ORM": 0.771562248468399, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -21.25, |
| "step": 59 |
| }, |
| { |
| "completion_length": 45.015625, |
| "epoch": 0.0664451827242525, |
| "grad_norm": 0.5326105952262878, |
| "kl": 0.0026397705078125, |
| "learning_rate": 9.624999999999999e-07, |
| "loss": 0.003804182168096304, |
| "reward": 2.036432147026062, |
| "reward_std": 0.3990803211927414, |
| "rewards/GDino": 0.8798050284385681, |
| "rewards/GIT": 0.4744318723678589, |
| "rewards/HPSv2": 0.238006591796875, |
| "rewards/ORM": 0.44418865442276, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -20.6875, |
| "step": 60 |
| }, |
| { |
| "completion_length": 65.0, |
| "epoch": 0.06755260243632337, |
| "grad_norm": 0.5713196396827698, |
| "kl": 0.00235748291015625, |
| "learning_rate": 9.61875e-07, |
| "loss": 0.04368375800549984, |
| "reward": 2.1398236751556396, |
| "reward_std": 0.3530130609869957, |
| "rewards/GDino": 0.7138020694255829, |
| "rewards/GIT": 0.644903838634491, |
| "rewards/HPSv2": 0.2529468536376953, |
| "rewards/ORM": 0.5281709432601929, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -20.6875, |
| "step": 61 |
| }, |
| { |
| "completion_length": 54.9375, |
| "epoch": 0.06866002214839424, |
| "grad_norm": 5.612445831298828, |
| "kl": 0.00339508056640625, |
| "learning_rate": 9.6125e-07, |
| "loss": 0.008875304833054543, |
| "reward": 2.497900605201721, |
| "reward_std": 0.41675496101379395, |
| "rewards/GDino": 0.872697502374649, |
| "rewards/GIT": 0.601748138666153, |
| "rewards/HPSv2": 0.2640380859375, |
| "rewards/ORM": 0.759416937828064, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -20.875, |
| "step": 62 |
| }, |
| { |
| "completion_length": 49.734375, |
| "epoch": 0.06976744186046512, |
| "grad_norm": 0.5861217379570007, |
| "kl": 0.003326416015625, |
| "learning_rate": 9.606249999999998e-07, |
| "loss": 0.01025251136161387, |
| "reward": 2.2640050053596497, |
| "reward_std": 0.48744213581085205, |
| "rewards/GDino": 0.8172852694988251, |
| "rewards/GIT": 0.44742196798324585, |
| "rewards/HPSv2": 0.2430896759033203, |
| "rewards/ORM": 0.7562080323696136, |
| "self_certainty_semantic": -25.125, |
| "self_certainty_token": -22.1875, |
| "step": 63 |
| }, |
| { |
| "completion_length": 64.375, |
| "epoch": 0.07087486157253599, |
| "grad_norm": 0.39266109466552734, |
| "kl": 0.00298309326171875, |
| "learning_rate": 9.6e-07, |
| "loss": -0.005469436291605234, |
| "reward": 1.6910768747329712, |
| "reward_std": 0.2151722088456154, |
| "rewards/GDino": 0.7097718715667725, |
| "rewards/GIT": 0.32366037368774414, |
| "rewards/HPSv2": 0.2576026916503906, |
| "rewards/ORM": 0.40004195272922516, |
| "self_certainty_semantic": -25.5, |
| "self_certainty_token": -21.9375, |
| "step": 64 |
| }, |
| { |
| "completion_length": 61.515625, |
| "epoch": 0.07198228128460686, |
| "grad_norm": 0.705937922000885, |
| "kl": 0.002685546875, |
| "learning_rate": 9.59375e-07, |
| "loss": 0.010601098649203777, |
| "reward": 2.128853142261505, |
| "reward_std": 0.4351096749305725, |
| "rewards/GDino": 0.7197916805744171, |
| "rewards/GIT": 0.6168824732303619, |
| "rewards/HPSv2": 0.23163414001464844, |
| "rewards/ORM": 0.5605448335409164, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -22.75, |
| "step": 65 |
| }, |
| { |
| "completion_length": 49.0, |
| "epoch": 0.07308970099667775, |
| "grad_norm": 0.4427480101585388, |
| "kl": 0.002899169921875, |
| "learning_rate": 9.5875e-07, |
| "loss": 0.02646360918879509, |
| "reward": 2.1654986143112183, |
| "reward_std": 0.37753987312316895, |
| "rewards/GDino": 0.6895833611488342, |
| "rewards/GIT": 0.48387444019317627, |
| "rewards/HPSv2": 0.2579364776611328, |
| "rewards/ORM": 0.7341042160987854, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -21.5625, |
| "step": 66 |
| }, |
| { |
| "completion_length": 63.140625, |
| "epoch": 0.07419712070874862, |
| "grad_norm": 0.7619237899780273, |
| "kl": 0.00284576416015625, |
| "learning_rate": 9.58125e-07, |
| "loss": 0.026691121514886618, |
| "reward": 2.3450592160224915, |
| "reward_std": 0.2740027904510498, |
| "rewards/GDino": 0.8025760054588318, |
| "rewards/GIT": 0.5677543580532074, |
| "rewards/HPSv2": 0.2594585418701172, |
| "rewards/ORM": 0.7152703106403351, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -21.875, |
| "step": 67 |
| }, |
| { |
| "completion_length": 50.171875, |
| "epoch": 0.0753045404208195, |
| "grad_norm": 0.4760603904724121, |
| "kl": 0.0030517578125, |
| "learning_rate": 9.575e-07, |
| "loss": 0.022392848506569862, |
| "reward": 1.6361079216003418, |
| "reward_std": 0.33574268221855164, |
| "rewards/GDino": 0.6061920523643494, |
| "rewards/GIT": 0.31722745299339294, |
| "rewards/HPSv2": 0.2595634460449219, |
| "rewards/ORM": 0.453125, |
| "self_certainty_semantic": -25.0625, |
| "self_certainty_token": -21.5, |
| "step": 68 |
| }, |
| { |
| "completion_length": 55.046875, |
| "epoch": 0.07641196013289037, |
| "grad_norm": 0.5907943248748779, |
| "kl": 0.00336456298828125, |
| "learning_rate": 9.56875e-07, |
| "loss": -0.0030646873638033867, |
| "reward": 2.119426429271698, |
| "reward_std": 0.298831045627594, |
| "rewards/GDino": 0.8028125166893005, |
| "rewards/GIT": 0.3893257826566696, |
| "rewards/HPSv2": 0.2710380554199219, |
| "rewards/ORM": 0.65625, |
| "self_certainty_semantic": -25.4375, |
| "self_certainty_token": -21.4375, |
| "step": 69 |
| }, |
| { |
| "completion_length": 51.140625, |
| "epoch": 0.07751937984496124, |
| "grad_norm": 0.47751373052597046, |
| "kl": 0.00359344482421875, |
| "learning_rate": 9.5625e-07, |
| "loss": -0.011344656813889742, |
| "reward": 1.4646188020706177, |
| "reward_std": 0.5817874372005463, |
| "rewards/GDino": 0.5935695767402649, |
| "rewards/GIT": 0.23897356167435646, |
| "rewards/HPSv2": 0.25234222412109375, |
| "rewards/ORM": 0.37973344326019287, |
| "self_certainty_semantic": -24.875, |
| "self_certainty_token": -20.9375, |
| "step": 70 |
| }, |
| { |
| "completion_length": 55.609375, |
| "epoch": 0.07862679955703211, |
| "grad_norm": 0.5281980633735657, |
| "kl": 0.0030364990234375, |
| "learning_rate": 9.556249999999999e-07, |
| "loss": -0.023217559792101383, |
| "reward": 1.856022596359253, |
| "reward_std": 0.4435942769050598, |
| "rewards/GDino": 0.6947268545627594, |
| "rewards/GIT": 0.28702250868082047, |
| "rewards/HPSv2": 0.26489830017089844, |
| "rewards/ORM": 0.609375, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -20.5, |
| "step": 71 |
| }, |
| { |
| "completion_length": 42.390625, |
| "epoch": 0.07973421926910298, |
| "grad_norm": 0.4538242518901825, |
| "kl": 0.002960205078125, |
| "learning_rate": 9.55e-07, |
| "loss": 0.016265914775431156, |
| "reward": 1.911847174167633, |
| "reward_std": 0.4016146659851074, |
| "rewards/GDino": 0.6731474995613098, |
| "rewards/GIT": 0.46439771354198456, |
| "rewards/HPSv2": 0.2497406005859375, |
| "rewards/ORM": 0.524561420083046, |
| "self_certainty_semantic": -25.0, |
| "self_certainty_token": -20.875, |
| "step": 72 |
| }, |
| { |
| "completion_length": 53.28125, |
| "epoch": 0.08084163898117387, |
| "grad_norm": 0.5773823261260986, |
| "kl": 0.00330352783203125, |
| "learning_rate": 9.543749999999999e-07, |
| "loss": -0.0016377167776226997, |
| "reward": 2.114488363265991, |
| "reward_std": 0.44427454471588135, |
| "rewards/GDino": 0.8240922689437866, |
| "rewards/GIT": 0.4950668513774872, |
| "rewards/HPSv2": 0.24412155151367188, |
| "rewards/ORM": 0.5512077808380127, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -21.0, |
| "step": 73 |
| }, |
| { |
| "completion_length": 56.296875, |
| "epoch": 0.08194905869324474, |
| "grad_norm": 0.43449509143829346, |
| "kl": 0.0035247802734375, |
| "learning_rate": 9.5375e-07, |
| "loss": 0.03005522396415472, |
| "reward": 2.32301664352417, |
| "reward_std": 0.22542773187160492, |
| "rewards/GDino": 0.864062488079071, |
| "rewards/GIT": 0.5282620340585709, |
| "rewards/HPSv2": 0.25408363342285156, |
| "rewards/ORM": 0.6766084730625153, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -22.25, |
| "step": 74 |
| }, |
| { |
| "completion_length": 67.6875, |
| "epoch": 0.08305647840531562, |
| "grad_norm": 0.4218258857727051, |
| "kl": 0.0028228759765625, |
| "learning_rate": 9.53125e-07, |
| "loss": 0.015081442426890135, |
| "reward": 1.7625158429145813, |
| "reward_std": 0.4334114193916321, |
| "rewards/GDino": 0.6663236618041992, |
| "rewards/GIT": 0.26877461373806, |
| "rewards/HPSv2": 0.2647876739501953, |
| "rewards/ORM": 0.5626298785209656, |
| "self_certainty_semantic": -25.1875, |
| "self_certainty_token": -21.125, |
| "step": 75 |
| }, |
| { |
| "completion_length": 62.15625, |
| "epoch": 0.08416389811738649, |
| "grad_norm": 0.45278123021125793, |
| "kl": 0.00312042236328125, |
| "learning_rate": 9.525e-07, |
| "loss": 0.01650754688307643, |
| "reward": 2.2938578128814697, |
| "reward_std": 0.5077499151229858, |
| "rewards/GDino": 0.7734375, |
| "rewards/GIT": 0.6401466727256775, |
| "rewards/HPSv2": 0.2568778991699219, |
| "rewards/ORM": 0.6233955323696136, |
| "self_certainty_semantic": -25.4375, |
| "self_certainty_token": -21.125, |
| "step": 76 |
| }, |
| { |
| "completion_length": 50.984375, |
| "epoch": 0.08527131782945736, |
| "grad_norm": 0.5513558387756348, |
| "kl": 0.004730224609375, |
| "learning_rate": 9.51875e-07, |
| "loss": -0.008258584188297391, |
| "reward": 1.6354877948760986, |
| "reward_std": 0.5420883148908615, |
| "rewards/GDino": 0.643737405538559, |
| "rewards/GIT": 0.20579323172569275, |
| "rewards/HPSv2": 0.2405567169189453, |
| "rewards/ORM": 0.5454003810882568, |
| "self_certainty_semantic": -25.0625, |
| "self_certainty_token": -22.1875, |
| "step": 77 |
| }, |
| { |
| "completion_length": 56.390625, |
| "epoch": 0.08637873754152824, |
| "grad_norm": 0.9578920602798462, |
| "kl": 0.00360107421875, |
| "learning_rate": 9.5125e-07, |
| "loss": 0.0016261041164398193, |
| "reward": 2.061507523059845, |
| "reward_std": 0.2758500352501869, |
| "rewards/GDino": 0.7561410367488861, |
| "rewards/GIT": 0.33666322380304337, |
| "rewards/HPSv2": 0.2762489318847656, |
| "rewards/ORM": 0.6924542784690857, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -21.1875, |
| "step": 78 |
| }, |
| { |
| "completion_length": 57.375, |
| "epoch": 0.08748615725359911, |
| "grad_norm": 0.46459418535232544, |
| "kl": 0.004241943359375, |
| "learning_rate": 9.50625e-07, |
| "loss": -0.019409675151109695, |
| "reward": 2.298323154449463, |
| "reward_std": 0.22066934406757355, |
| "rewards/GDino": 0.8136925399303436, |
| "rewards/GIT": 0.6333461850881577, |
| "rewards/HPSv2": 0.27008056640625, |
| "rewards/ORM": 0.5812040567398071, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -21.875, |
| "step": 79 |
| }, |
| { |
| "completion_length": 60.5625, |
| "epoch": 0.08859357696567, |
| "grad_norm": 0.4274587631225586, |
| "kl": 0.004058837890625, |
| "learning_rate": 9.499999999999999e-07, |
| "loss": 0.013256619684398174, |
| "reward": 1.6786987781524658, |
| "reward_std": 0.3984425514936447, |
| "rewards/GDino": 0.6007516384124756, |
| "rewards/GIT": 0.18326736986637115, |
| "rewards/HPSv2": 0.2720355987548828, |
| "rewards/ORM": 0.6226442158222198, |
| "self_certainty_semantic": -25.5, |
| "self_certainty_token": -21.3125, |
| "step": 80 |
| }, |
| { |
| "completion_length": 58.03125, |
| "epoch": 0.08970099667774087, |
| "grad_norm": 0.9172859191894531, |
| "kl": 0.00426483154296875, |
| "learning_rate": 9.493749999999999e-07, |
| "loss": 0.003496276680380106, |
| "reward": 2.106017231941223, |
| "reward_std": 0.30050399899482727, |
| "rewards/GDino": 0.7440759837627411, |
| "rewards/GIT": 0.3581302911043167, |
| "rewards/HPSv2": 0.27126121520996094, |
| "rewards/ORM": 0.7325496971607208, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -20.875, |
| "step": 81 |
| }, |
| { |
| "completion_length": 49.5625, |
| "epoch": 0.09080841638981174, |
| "grad_norm": 0.4841405153274536, |
| "kl": 0.00412750244140625, |
| "learning_rate": 9.487499999999999e-07, |
| "loss": 0.025506282225251198, |
| "reward": 1.6879253387451172, |
| "reward_std": 0.42353254556655884, |
| "rewards/GDino": 0.6098452508449554, |
| "rewards/GIT": 0.38033944368362427, |
| "rewards/HPSv2": 0.2658271789550781, |
| "rewards/ORM": 0.43191343545913696, |
| "self_certainty_semantic": -25.4375, |
| "self_certainty_token": -20.9375, |
| "step": 82 |
| }, |
| { |
| "completion_length": 48.328125, |
| "epoch": 0.09191583610188261, |
| "grad_norm": 0.492243230342865, |
| "kl": 0.00345611572265625, |
| "learning_rate": 9.481249999999999e-07, |
| "loss": -0.0034960508346557617, |
| "reward": 2.1111596822738647, |
| "reward_std": 0.41540510952472687, |
| "rewards/GDino": 0.7717877924442291, |
| "rewards/GIT": 0.4860316216945648, |
| "rewards/HPSv2": 0.2670021057128906, |
| "rewards/ORM": 0.5863381326198578, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -21.4375, |
| "step": 83 |
| }, |
| { |
| "completion_length": 66.3125, |
| "epoch": 0.09302325581395349, |
| "grad_norm": 0.5617808699607849, |
| "kl": 0.004180908203125, |
| "learning_rate": 9.474999999999999e-07, |
| "loss": 0.003248518332839012, |
| "reward": 2.094790816307068, |
| "reward_std": 0.3879907354712486, |
| "rewards/GDino": 0.7973622679710388, |
| "rewards/GIT": 0.632976621389389, |
| "rewards/HPSv2": 0.24137306213378906, |
| "rewards/ORM": 0.4230788052082062, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -21.1875, |
| "step": 84 |
| }, |
| { |
| "completion_length": 51.40625, |
| "epoch": 0.09413067552602436, |
| "grad_norm": 0.5695884823799133, |
| "kl": 0.003204345703125, |
| "learning_rate": 9.468749999999999e-07, |
| "loss": 0.012543351389467716, |
| "reward": 1.8675293326377869, |
| "reward_std": 0.4282868355512619, |
| "rewards/GDino": 0.6550000011920929, |
| "rewards/GIT": 0.33260630816221237, |
| "rewards/HPSv2": 0.24515533447265625, |
| "rewards/ORM": 0.6347676515579224, |
| "self_certainty_semantic": -25.0625, |
| "self_certainty_token": -21.375, |
| "step": 85 |
| }, |
| { |
| "completion_length": 48.296875, |
| "epoch": 0.09523809523809523, |
| "grad_norm": 0.46590158343315125, |
| "kl": 0.00469970703125, |
| "learning_rate": 9.462499999999999e-07, |
| "loss": 0.00347991194576025, |
| "reward": 2.2731298208236694, |
| "reward_std": 0.383390873670578, |
| "rewards/GDino": 0.8246111273765564, |
| "rewards/GIT": 0.33447980135679245, |
| "rewards/HPSv2": 0.29212188720703125, |
| "rewards/ORM": 0.821916937828064, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -22.0, |
| "step": 86 |
| }, |
| { |
| "completion_length": 54.390625, |
| "epoch": 0.09634551495016612, |
| "grad_norm": 0.5397853255271912, |
| "kl": 0.004425048828125, |
| "learning_rate": 9.45625e-07, |
| "loss": 0.008617566898465157, |
| "reward": 2.2459940314292908, |
| "reward_std": 0.4676859378814697, |
| "rewards/GDino": 0.7356771230697632, |
| "rewards/GIT": 0.46453191339969635, |
| "rewards/HPSv2": 0.26766395568847656, |
| "rewards/ORM": 0.7781210243701935, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -21.0, |
| "step": 87 |
| }, |
| { |
| "completion_length": 42.15625, |
| "epoch": 0.09745293466223699, |
| "grad_norm": 0.48280662298202515, |
| "kl": 0.00406646728515625, |
| "learning_rate": 9.45e-07, |
| "loss": 0.016791983507573605, |
| "reward": 2.1528985500335693, |
| "reward_std": 0.44025059044361115, |
| "rewards/GDino": 0.7985424101352692, |
| "rewards/GIT": 0.47699007391929626, |
| "rewards/HPSv2": 0.2789325714111328, |
| "rewards/ORM": 0.5984334945678711, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -20.875, |
| "step": 88 |
| }, |
| { |
| "completion_length": 48.25, |
| "epoch": 0.09856035437430787, |
| "grad_norm": 0.4512772560119629, |
| "kl": 0.00405120849609375, |
| "learning_rate": 9.44375e-07, |
| "loss": -0.009609811007976532, |
| "reward": 2.155352771282196, |
| "reward_std": 0.3193782642483711, |
| "rewards/GDino": 0.7525902688503265, |
| "rewards/GIT": 0.4481022357940674, |
| "rewards/HPSv2": 0.2619743347167969, |
| "rewards/ORM": 0.6926859021186829, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -21.125, |
| "step": 89 |
| }, |
| { |
| "completion_length": 47.78125, |
| "epoch": 0.09966777408637874, |
| "grad_norm": 0.5204576849937439, |
| "kl": 0.004425048828125, |
| "learning_rate": 9.4375e-07, |
| "loss": -0.017570611089468002, |
| "reward": 2.3318194150924683, |
| "reward_std": 0.3641355484724045, |
| "rewards/GDino": 0.854687511920929, |
| "rewards/GIT": 0.6271218061447144, |
| "rewards/HPSv2": 0.26286888122558594, |
| "rewards/ORM": 0.5871412754058838, |
| "self_certainty_semantic": -25.5, |
| "self_certainty_token": -21.4375, |
| "step": 90 |
| }, |
| { |
| "completion_length": 54.9375, |
| "epoch": 0.10077519379844961, |
| "grad_norm": 0.7515896558761597, |
| "kl": 0.0042266845703125, |
| "learning_rate": 9.43125e-07, |
| "loss": 0.022024651989340782, |
| "reward": 1.7255874276161194, |
| "reward_std": 0.3924099802970886, |
| "rewards/GDino": 0.6800954043865204, |
| "rewards/GIT": 0.41760827600955963, |
| "rewards/HPSv2": 0.22957611083984375, |
| "rewards/ORM": 0.3983076214790344, |
| "self_certainty_semantic": -24.875, |
| "self_certainty_token": -21.125, |
| "step": 91 |
| }, |
| { |
| "completion_length": 51.90625, |
| "epoch": 0.10188261351052048, |
| "grad_norm": 0.6844750046730042, |
| "kl": 0.004730224609375, |
| "learning_rate": 9.425e-07, |
| "loss": 0.017017286270856857, |
| "reward": 1.7472361326217651, |
| "reward_std": 0.49342362582683563, |
| "rewards/GDino": 0.7615998685359955, |
| "rewards/GIT": 0.3799494504928589, |
| "rewards/HPSv2": 0.2450580596923828, |
| "rewards/ORM": 0.36062875390052795, |
| "self_certainty_semantic": -25.5, |
| "self_certainty_token": -21.0625, |
| "step": 92 |
| }, |
| { |
| "completion_length": 52.5625, |
| "epoch": 0.10299003322259136, |
| "grad_norm": 0.476144403219223, |
| "kl": 0.004547119140625, |
| "learning_rate": 9.41875e-07, |
| "loss": -0.006627652794122696, |
| "reward": 2.3529324531555176, |
| "reward_std": 0.38789400458335876, |
| "rewards/GDino": 0.8122400343418121, |
| "rewards/GIT": 0.40920257568359375, |
| "rewards/HPSv2": 0.25894737243652344, |
| "rewards/ORM": 0.8725424408912659, |
| "self_certainty_semantic": -25.1875, |
| "self_certainty_token": -20.75, |
| "step": 93 |
| }, |
| { |
| "completion_length": 45.234375, |
| "epoch": 0.10409745293466224, |
| "grad_norm": 0.4303518235683441, |
| "kl": 0.00390625, |
| "learning_rate": 9.4125e-07, |
| "loss": 0.002329372800886631, |
| "reward": 2.063507556915283, |
| "reward_std": 0.4875355362892151, |
| "rewards/GDino": 0.8157378733158112, |
| "rewards/GIT": 0.2162991166114807, |
| "rewards/HPSv2": 0.2860240936279297, |
| "rewards/ORM": 0.7454463839530945, |
| "self_certainty_semantic": -25.3125, |
| "self_certainty_token": -22.25, |
| "step": 94 |
| }, |
| { |
| "completion_length": 54.21875, |
| "epoch": 0.10520487264673312, |
| "grad_norm": 0.9745371341705322, |
| "kl": 0.004852294921875, |
| "learning_rate": 9.40625e-07, |
| "loss": 0.015892890747636557, |
| "reward": 2.4900766611099243, |
| "reward_std": 0.33158986270427704, |
| "rewards/GDino": 0.9456690549850464, |
| "rewards/GIT": 0.7110534906387329, |
| "rewards/HPSv2": 0.2568836212158203, |
| "rewards/ORM": 0.5764705836772919, |
| "self_certainty_semantic": -25.625, |
| "self_certainty_token": -21.3125, |
| "step": 95 |
| }, |
| { |
| "completion_length": 62.953125, |
| "epoch": 0.10631229235880399, |
| "grad_norm": 1.6108874082565308, |
| "kl": 0.00475311279296875, |
| "learning_rate": 9.399999999999999e-07, |
| "loss": 0.012537557166069746, |
| "reward": 2.4274561405181885, |
| "reward_std": 0.3028244078159332, |
| "rewards/GDino": 0.9155160486698151, |
| "rewards/GIT": 0.6933247745037079, |
| "rewards/HPSv2": 0.25919437408447266, |
| "rewards/ORM": 0.5594209432601929, |
| "self_certainty_semantic": -25.375, |
| "self_certainty_token": -20.3125, |
| "step": 96 |
| }, |
| { |
| "completion_length": 44.890625, |
| "epoch": 0.10741971207087486, |
| "grad_norm": 0.42777886986732483, |
| "kl": 0.0064697265625, |
| "learning_rate": 9.393749999999999e-07, |
| "loss": 0.006582918576896191, |
| "reward": 1.7229499220848083, |
| "reward_std": 0.29571742564439774, |
| "rewards/GDino": 0.6976552903652191, |
| "rewards/GIT": 0.17514611035585403, |
| "rewards/HPSv2": 0.2757740020751953, |
| "rewards/ORM": 0.5743745565414429, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -21.5, |
| "step": 97 |
| }, |
| { |
| "completion_length": 52.78125, |
| "epoch": 0.10852713178294573, |
| "grad_norm": 0.4346785247325897, |
| "kl": 0.00446319580078125, |
| "learning_rate": 9.387499999999999e-07, |
| "loss": 0.010664775501936674, |
| "reward": 1.9896260499954224, |
| "reward_std": 0.5384568274021149, |
| "rewards/GDino": 0.7534899115562439, |
| "rewards/GIT": 0.416723370552063, |
| "rewards/HPSv2": 0.25490760803222656, |
| "rewards/ORM": 0.5645051300525665, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -21.4375, |
| "step": 98 |
| }, |
| { |
| "completion_length": 53.546875, |
| "epoch": 0.10963455149501661, |
| "grad_norm": 0.4226502478122711, |
| "kl": 0.00543212890625, |
| "learning_rate": 9.381249999999999e-07, |
| "loss": -0.009754271944984794, |
| "reward": 2.1711018085479736, |
| "reward_std": 0.3036491945385933, |
| "rewards/GDino": 0.8239583373069763, |
| "rewards/GIT": 0.6844146698713303, |
| "rewards/HPSv2": 0.24951934814453125, |
| "rewards/ORM": 0.41320937871932983, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -22.0, |
| "step": 99 |
| }, |
| { |
| "completion_length": 48.0625, |
| "epoch": 0.11074197120708748, |
| "grad_norm": 0.4250389039516449, |
| "kl": 0.00537109375, |
| "learning_rate": 9.374999999999999e-07, |
| "loss": -0.014408082235604525, |
| "reward": 1.9375371932983398, |
| "reward_std": 0.4484590142965317, |
| "rewards/GDino": 0.6897697150707245, |
| "rewards/GIT": 0.4094943553209305, |
| "rewards/HPSv2": 0.2472515106201172, |
| "rewards/ORM": 0.5910216569900513, |
| "self_certainty_semantic": -25.25, |
| "self_certainty_token": -21.4375, |
| "step": 100 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1600, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|