|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 1.3054830287206268e-08, |
|
"logits/chosen": -2.377302885055542, |
|
"logits/rejected": -2.2193148136138916, |
|
"logps/chosen": -290.4185485839844, |
|
"logps/rejected": -374.6668701171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.40625, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": -2.2492425441741943, |
|
"logits/rejected": -2.0517687797546387, |
|
"logps/chosen": -279.6344909667969, |
|
"logps/rejected": -245.47564697265625, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4930555522441864, |
|
"rewards/chosen": 0.0005959311965852976, |
|
"rewards/margins": 0.000615339376963675, |
|
"rewards/rejected": -1.9408274965826422e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.5, |
|
"learning_rate": 2.610966057441253e-07, |
|
"logits/chosen": -2.244947671890259, |
|
"logits/rejected": -1.943969964981079, |
|
"logps/chosen": -305.4734802246094, |
|
"logps/rejected": -237.70083618164062, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.004091521259397268, |
|
"rewards/margins": 0.000647729029878974, |
|
"rewards/rejected": 0.0034437919966876507, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 3.9164490861618804e-07, |
|
"logits/chosen": -2.205514907836914, |
|
"logits/rejected": -2.1370320320129395, |
|
"logps/chosen": -251.25662231445312, |
|
"logps/rejected": -251.41213989257812, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.011662699282169342, |
|
"rewards/margins": 0.0018940108129754663, |
|
"rewards/rejected": 0.00976868998259306, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.9453125, |
|
"learning_rate": 5.221932114882506e-07, |
|
"logits/chosen": -2.0618391036987305, |
|
"logits/rejected": -2.0241973400115967, |
|
"logps/chosen": -216.21438598632812, |
|
"logps/rejected": -221.6951141357422, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.019298259168863297, |
|
"rewards/margins": 0.0034350629430264235, |
|
"rewards/rejected": 0.015863195061683655, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.078125, |
|
"learning_rate": 6.527415143603135e-07, |
|
"logits/chosen": -2.1124298572540283, |
|
"logits/rejected": -2.1008057594299316, |
|
"logps/chosen": -266.8966064453125, |
|
"logps/rejected": -234.32998657226562, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.029909158125519753, |
|
"rewards/margins": 0.005118774715811014, |
|
"rewards/rejected": 0.024790380150079727, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.125, |
|
"learning_rate": 7.832898172323761e-07, |
|
"logits/chosen": -2.099602460861206, |
|
"logits/rejected": -1.9424635171890259, |
|
"logps/chosen": -252.27310180664062, |
|
"logps/rejected": -226.72030639648438, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.03226853534579277, |
|
"rewards/margins": 0.007134293206036091, |
|
"rewards/rejected": 0.025134241208434105, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 9.138381201044387e-07, |
|
"logits/chosen": -2.2440109252929688, |
|
"logits/rejected": -2.036339282989502, |
|
"logps/chosen": -272.09234619140625, |
|
"logps/rejected": -246.6947784423828, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.04063863307237625, |
|
"rewards/margins": 0.010386193171143532, |
|
"rewards/rejected": 0.03025243617594242, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.359375, |
|
"learning_rate": 1.0443864229765013e-06, |
|
"logits/chosen": -2.1541717052459717, |
|
"logits/rejected": -1.9777501821517944, |
|
"logps/chosen": -257.61871337890625, |
|
"logps/rejected": -246.86483764648438, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.038099195808172226, |
|
"rewards/margins": 0.011846454814076424, |
|
"rewards/rejected": 0.026252740994095802, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.1875, |
|
"learning_rate": 1.1749347258485642e-06, |
|
"logits/chosen": -2.1348958015441895, |
|
"logits/rejected": -1.998792290687561, |
|
"logps/chosen": -250.1610107421875, |
|
"logps/rejected": -234.56787109375, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.04175186529755592, |
|
"rewards/margins": 0.01758204773068428, |
|
"rewards/rejected": 0.024169817566871643, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.125, |
|
"learning_rate": 1.305483028720627e-06, |
|
"logits/chosen": -2.1793951988220215, |
|
"logits/rejected": -2.0686168670654297, |
|
"logps/chosen": -247.0215301513672, |
|
"logps/rejected": -230.79537963867188, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.04747994989156723, |
|
"rewards/margins": 0.0229250006377697, |
|
"rewards/rejected": 0.024554943665862083, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_logits/chosen": -2.0950841903686523, |
|
"eval_logits/rejected": -1.9557065963745117, |
|
"eval_logps/chosen": -259.6705627441406, |
|
"eval_logps/rejected": -241.93917846679688, |
|
"eval_loss": 0.6820979714393616, |
|
"eval_rewards/accuracies": 0.656499981880188, |
|
"eval_rewards/chosen": 0.04981444031000137, |
|
"eval_rewards/margins": 0.02312026545405388, |
|
"eval_rewards/rejected": 0.026694171130657196, |
|
"eval_runtime": 385.815, |
|
"eval_samples_per_second": 5.184, |
|
"eval_steps_per_second": 0.648, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 1.4360313315926894e-06, |
|
"logits/chosen": -2.146080493927002, |
|
"logits/rejected": -2.002453327178955, |
|
"logps/chosen": -284.4079895019531, |
|
"logps/rejected": -238.9375457763672, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.04955831170082092, |
|
"rewards/margins": 0.029538575559854507, |
|
"rewards/rejected": 0.020019738003611565, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.15625, |
|
"learning_rate": 1.5665796344647521e-06, |
|
"logits/chosen": -2.1928741931915283, |
|
"logits/rejected": -2.0533928871154785, |
|
"logps/chosen": -287.5110778808594, |
|
"logps/rejected": -271.9446716308594, |
|
"loss": 0.6728, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.05533873289823532, |
|
"rewards/margins": 0.04249165579676628, |
|
"rewards/rejected": 0.01284707523882389, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.671875, |
|
"learning_rate": 1.6971279373368146e-06, |
|
"logits/chosen": -2.2082314491271973, |
|
"logits/rejected": -2.118213653564453, |
|
"logps/chosen": -250.14013671875, |
|
"logps/rejected": -252.6034393310547, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.05011880397796631, |
|
"rewards/margins": 0.048879969865083694, |
|
"rewards/rejected": 0.001238831551745534, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.5, |
|
"learning_rate": 1.8276762402088774e-06, |
|
"logits/chosen": -2.24537992477417, |
|
"logits/rejected": -1.9110206365585327, |
|
"logps/chosen": -270.5356750488281, |
|
"logps/rejected": -226.2827606201172, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.04202268272638321, |
|
"rewards/margins": 0.052745603024959564, |
|
"rewards/rejected": -0.010722924955189228, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.65625, |
|
"learning_rate": 1.9582245430809403e-06, |
|
"logits/chosen": -2.264875888824463, |
|
"logits/rejected": -2.0387892723083496, |
|
"logps/chosen": -280.36077880859375, |
|
"logps/rejected": -242.8515625, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.036501698195934296, |
|
"rewards/margins": 0.05586882680654526, |
|
"rewards/rejected": -0.019367124885320663, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.71875, |
|
"learning_rate": 2.0887728459530026e-06, |
|
"logits/chosen": -2.15449595451355, |
|
"logits/rejected": -2.0523486137390137, |
|
"logps/chosen": -256.1204833984375, |
|
"logps/rejected": -261.9712219238281, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.007034213747829199, |
|
"rewards/margins": 0.0563817024230957, |
|
"rewards/rejected": -0.04934748262166977, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.890625, |
|
"learning_rate": 2.2193211488250653e-06, |
|
"logits/chosen": -2.1238508224487305, |
|
"logits/rejected": -1.9688222408294678, |
|
"logps/chosen": -220.9573211669922, |
|
"logps/rejected": -228.40869140625, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.003666641190648079, |
|
"rewards/margins": 0.05213465169072151, |
|
"rewards/rejected": -0.05580129101872444, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.28125, |
|
"learning_rate": 2.3498694516971284e-06, |
|
"logits/chosen": -2.1223385334014893, |
|
"logits/rejected": -1.9868714809417725, |
|
"logps/chosen": -258.9825134277344, |
|
"logps/rejected": -252.4698944091797, |
|
"loss": 0.6638, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03278004750609398, |
|
"rewards/margins": 0.06821247935295105, |
|
"rewards/rejected": -0.10099252313375473, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.890625, |
|
"learning_rate": 2.4804177545691907e-06, |
|
"logits/chosen": -2.2460696697235107, |
|
"logits/rejected": -2.0304675102233887, |
|
"logps/chosen": -274.5130920410156, |
|
"logps/rejected": -256.2106628417969, |
|
"loss": 0.65, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.04428885504603386, |
|
"rewards/margins": 0.10040076822042465, |
|
"rewards/rejected": -0.1446896344423294, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.375, |
|
"learning_rate": 2.610966057441254e-06, |
|
"logits/chosen": -2.1960341930389404, |
|
"logits/rejected": -1.95565927028656, |
|
"logps/chosen": -259.01934814453125, |
|
"logps/rejected": -231.2660369873047, |
|
"loss": 0.6496, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.09310005605220795, |
|
"rewards/margins": 0.10362167656421661, |
|
"rewards/rejected": -0.19672173261642456, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -2.068035125732422, |
|
"eval_logits/rejected": -1.9312690496444702, |
|
"eval_logps/chosen": -270.0797119140625, |
|
"eval_logps/rejected": -260.6905517578125, |
|
"eval_loss": 0.6486819982528687, |
|
"eval_rewards/accuracies": 0.6809999942779541, |
|
"eval_rewards/chosen": -0.05427735298871994, |
|
"eval_rewards/margins": 0.10654205083847046, |
|
"eval_rewards/rejected": -0.160819411277771, |
|
"eval_runtime": 385.2774, |
|
"eval_samples_per_second": 5.191, |
|
"eval_steps_per_second": 0.649, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.484375, |
|
"learning_rate": 2.741514360313316e-06, |
|
"logits/chosen": -2.197986602783203, |
|
"logits/rejected": -1.9808934926986694, |
|
"logps/chosen": -267.27685546875, |
|
"logps/rejected": -249.9297637939453, |
|
"loss": 0.6319, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.05056775361299515, |
|
"rewards/margins": 0.14322780072689056, |
|
"rewards/rejected": -0.1937955617904663, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.09375, |
|
"learning_rate": 2.872062663185379e-06, |
|
"logits/chosen": -2.0990307331085205, |
|
"logits/rejected": -1.983565330505371, |
|
"logps/chosen": -270.3437194824219, |
|
"logps/rejected": -256.6988525390625, |
|
"loss": 0.6401, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.16327962279319763, |
|
"rewards/margins": 0.12751872837543488, |
|
"rewards/rejected": -0.2907983660697937, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.0625, |
|
"learning_rate": 3.0026109660574416e-06, |
|
"logits/chosen": -2.2433676719665527, |
|
"logits/rejected": -2.056224822998047, |
|
"logps/chosen": -314.1068420410156, |
|
"logps/rejected": -288.00250244140625, |
|
"loss": 0.6589, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.07937607169151306, |
|
"rewards/margins": 0.09387041628360748, |
|
"rewards/rejected": -0.17324648797512054, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.6875, |
|
"learning_rate": 3.1331592689295043e-06, |
|
"logits/chosen": -2.1602792739868164, |
|
"logits/rejected": -1.9714686870574951, |
|
"logps/chosen": -310.117919921875, |
|
"logps/rejected": -308.3526916503906, |
|
"loss": 0.6431, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.13817985355854034, |
|
"rewards/margins": 0.13379593193531036, |
|
"rewards/rejected": -0.2719758152961731, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 3.263707571801567e-06, |
|
"logits/chosen": -2.129748821258545, |
|
"logits/rejected": -2.028604030609131, |
|
"logps/chosen": -282.7078552246094, |
|
"logps/rejected": -272.08837890625, |
|
"loss": 0.6361, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.23127944767475128, |
|
"rewards/margins": 0.14839713275432587, |
|
"rewards/rejected": -0.37967658042907715, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.65625, |
|
"learning_rate": 3.3942558746736293e-06, |
|
"logits/chosen": -2.183048725128174, |
|
"logits/rejected": -1.9789161682128906, |
|
"logps/chosen": -281.8155212402344, |
|
"logps/rejected": -272.23956298828125, |
|
"loss": 0.6437, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.09410645067691803, |
|
"rewards/margins": 0.12439638376235962, |
|
"rewards/rejected": -0.21850283443927765, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.921875, |
|
"learning_rate": 3.524804177545692e-06, |
|
"logits/chosen": -2.083225965499878, |
|
"logits/rejected": -1.9568647146224976, |
|
"logps/chosen": -275.0286560058594, |
|
"logps/rejected": -263.38140869140625, |
|
"loss": 0.6139, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.14354154467582703, |
|
"rewards/margins": 0.19759733974933624, |
|
"rewards/rejected": -0.3411388695240021, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.8125, |
|
"learning_rate": 3.6553524804177547e-06, |
|
"logits/chosen": -2.1453604698181152, |
|
"logits/rejected": -1.9743705987930298, |
|
"logps/chosen": -287.78057861328125, |
|
"logps/rejected": -284.1526794433594, |
|
"loss": 0.6277, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.28831422328948975, |
|
"rewards/margins": 0.17838594317436218, |
|
"rewards/rejected": -0.4667002260684967, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 3.7859007832898174e-06, |
|
"logits/chosen": -2.068016529083252, |
|
"logits/rejected": -1.9705654382705688, |
|
"logps/chosen": -315.2586364746094, |
|
"logps/rejected": -313.2366027832031, |
|
"loss": 0.6125, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2962488532066345, |
|
"rewards/margins": 0.2308805286884308, |
|
"rewards/rejected": -0.5271294116973877, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.8125, |
|
"learning_rate": 3.9164490861618806e-06, |
|
"logits/chosen": -2.1018004417419434, |
|
"logits/rejected": -1.8998439311981201, |
|
"logps/chosen": -275.9500732421875, |
|
"logps/rejected": -287.0372009277344, |
|
"loss": 0.6042, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.31821924448013306, |
|
"rewards/margins": 0.24809296429157257, |
|
"rewards/rejected": -0.5663121938705444, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/chosen": -2.0229153633117676, |
|
"eval_logits/rejected": -1.889541745185852, |
|
"eval_logps/chosen": -295.1513671875, |
|
"eval_logps/rejected": -296.011474609375, |
|
"eval_loss": 0.6216087937355042, |
|
"eval_rewards/accuracies": 0.6729999780654907, |
|
"eval_rewards/chosen": -0.30499377846717834, |
|
"eval_rewards/margins": 0.2090347856283188, |
|
"eval_rewards/rejected": -0.5140285491943359, |
|
"eval_runtime": 385.3276, |
|
"eval_samples_per_second": 5.19, |
|
"eval_steps_per_second": 0.649, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.78125, |
|
"learning_rate": 4.046997389033943e-06, |
|
"logits/chosen": -2.246411085128784, |
|
"logits/rejected": -2.0464656352996826, |
|
"logps/chosen": -320.37054443359375, |
|
"logps/rejected": -296.6560363769531, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.31464242935180664, |
|
"rewards/margins": 0.29925835132598877, |
|
"rewards/rejected": -0.6139007806777954, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.90625, |
|
"learning_rate": 4.177545691906005e-06, |
|
"logits/chosen": -2.1202454566955566, |
|
"logits/rejected": -1.933571457862854, |
|
"logps/chosen": -300.3293151855469, |
|
"logps/rejected": -303.07177734375, |
|
"loss": 0.6333, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.42065340280532837, |
|
"rewards/margins": 0.19771243631839752, |
|
"rewards/rejected": -0.6183657646179199, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.65625, |
|
"learning_rate": 4.308093994778068e-06, |
|
"logits/chosen": -2.0555598735809326, |
|
"logits/rejected": -1.9103734493255615, |
|
"logps/chosen": -286.83306884765625, |
|
"logps/rejected": -285.3974609375, |
|
"loss": 0.6152, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2723875939846039, |
|
"rewards/margins": 0.22743086516857147, |
|
"rewards/rejected": -0.49981847405433655, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.6875, |
|
"learning_rate": 4.4386422976501306e-06, |
|
"logits/chosen": -2.1098897457122803, |
|
"logits/rejected": -1.996603012084961, |
|
"logps/chosen": -339.12225341796875, |
|
"logps/rejected": -342.5606994628906, |
|
"loss": 0.611, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.49436426162719727, |
|
"rewards/margins": 0.24838733673095703, |
|
"rewards/rejected": -0.7427516579627991, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.71875, |
|
"learning_rate": 4.569190600522193e-06, |
|
"logits/chosen": -2.0351061820983887, |
|
"logits/rejected": -1.8878052234649658, |
|
"logps/chosen": -342.15667724609375, |
|
"logps/rejected": -348.20281982421875, |
|
"loss": 0.6434, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.5289834141731262, |
|
"rewards/margins": 0.1753660887479782, |
|
"rewards/rejected": -0.7043493986129761, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.84375, |
|
"learning_rate": 4.699738903394257e-06, |
|
"logits/chosen": -2.014333963394165, |
|
"logits/rejected": -1.9689722061157227, |
|
"logps/chosen": -274.50213623046875, |
|
"logps/rejected": -278.16351318359375, |
|
"loss": 0.6081, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.33518165349960327, |
|
"rewards/margins": 0.24500660598278046, |
|
"rewards/rejected": -0.5801882743835449, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.09375, |
|
"learning_rate": 4.8302872062663196e-06, |
|
"logits/chosen": -2.1054439544677734, |
|
"logits/rejected": -1.9295707941055298, |
|
"logps/chosen": -315.6613464355469, |
|
"logps/rejected": -300.81231689453125, |
|
"loss": 0.5976, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.38765162229537964, |
|
"rewards/margins": 0.2898003160953522, |
|
"rewards/rejected": -0.6774519681930542, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 6.375, |
|
"learning_rate": 4.9608355091383814e-06, |
|
"logits/chosen": -2.0662331581115723, |
|
"logits/rejected": -1.8568542003631592, |
|
"logps/chosen": -335.3840637207031, |
|
"logps/rejected": -334.6043395996094, |
|
"loss": 0.5885, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.373274028301239, |
|
"rewards/margins": 0.3260083794593811, |
|
"rewards/rejected": -0.6992824077606201, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 4.9999488562447675e-06, |
|
"logits/chosen": -2.0750114917755127, |
|
"logits/rejected": -1.9580342769622803, |
|
"logps/chosen": -320.772705078125, |
|
"logps/rejected": -333.44476318359375, |
|
"loss": 0.5855, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.39726486802101135, |
|
"rewards/margins": 0.3268759846687317, |
|
"rewards/rejected": -0.7241408228874207, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.3125, |
|
"learning_rate": 4.999698361256577e-06, |
|
"logits/chosen": -2.0969738960266113, |
|
"logits/rejected": -1.8604263067245483, |
|
"logps/chosen": -311.9226989746094, |
|
"logps/rejected": -294.60662841796875, |
|
"loss": 0.6218, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.4644620418548584, |
|
"rewards/margins": 0.23335090279579163, |
|
"rewards/rejected": -0.6978129148483276, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -1.9431427717208862, |
|
"eval_logits/rejected": -1.8155378103256226, |
|
"eval_logps/chosen": -326.5406799316406, |
|
"eval_logps/rejected": -340.4455261230469, |
|
"eval_loss": 0.5939911007881165, |
|
"eval_rewards/accuracies": 0.6809999942779541, |
|
"eval_rewards/chosen": -0.6188870072364807, |
|
"eval_rewards/margins": 0.3394821286201477, |
|
"eval_rewards/rejected": -0.9583691358566284, |
|
"eval_runtime": 385.2303, |
|
"eval_samples_per_second": 5.192, |
|
"eval_steps_per_second": 0.649, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 5.59375, |
|
"learning_rate": 4.999239142174581e-06, |
|
"logits/chosen": -1.9562289714813232, |
|
"logits/rejected": -1.8964239358901978, |
|
"logps/chosen": -315.13616943359375, |
|
"logps/rejected": -334.50677490234375, |
|
"loss": 0.6431, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.7281379699707031, |
|
"rewards/margins": 0.2114681750535965, |
|
"rewards/rejected": -0.9396060705184937, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 6.28125, |
|
"learning_rate": 4.99857123734344e-06, |
|
"logits/chosen": -1.9491183757781982, |
|
"logits/rejected": -1.8290717601776123, |
|
"logps/chosen": -280.4700622558594, |
|
"logps/rejected": -309.1809997558594, |
|
"loss": 0.5735, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5167919397354126, |
|
"rewards/margins": 0.3777889609336853, |
|
"rewards/rejected": -0.8945809602737427, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 9.5, |
|
"learning_rate": 4.997694702533016e-06, |
|
"logits/chosen": -1.9259364604949951, |
|
"logits/rejected": -1.8644450902938843, |
|
"logps/chosen": -345.35797119140625, |
|
"logps/rejected": -365.54449462890625, |
|
"loss": 0.5722, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6780990362167358, |
|
"rewards/margins": 0.4380587637424469, |
|
"rewards/rejected": -1.1161577701568604, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 4.996609610933713e-06, |
|
"logits/chosen": -2.0121302604675293, |
|
"logits/rejected": -1.9294341802597046, |
|
"logps/chosen": -349.0380554199219, |
|
"logps/rejected": -362.43768310546875, |
|
"loss": 0.5912, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7688915729522705, |
|
"rewards/margins": 0.4175523817539215, |
|
"rewards/rejected": -1.1864439249038696, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 4.995316053150366e-06, |
|
"logits/chosen": -1.889850378036499, |
|
"logits/rejected": -1.7697973251342773, |
|
"logps/chosen": -332.23077392578125, |
|
"logps/rejected": -353.26593017578125, |
|
"loss": 0.5642, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.643971860408783, |
|
"rewards/margins": 0.4406636357307434, |
|
"rewards/rejected": -1.0846354961395264, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 9.875, |
|
"learning_rate": 4.9938141371946815e-06, |
|
"logits/chosen": -1.8695566654205322, |
|
"logits/rejected": -1.7812881469726562, |
|
"logps/chosen": -366.63818359375, |
|
"logps/rejected": -409.49755859375, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.0153841972351074, |
|
"rewards/margins": 0.5738715529441833, |
|
"rewards/rejected": -1.589255690574646, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 7.46875, |
|
"learning_rate": 4.992103988476206e-06, |
|
"logits/chosen": -1.8687667846679688, |
|
"logits/rejected": -1.7270047664642334, |
|
"logps/chosen": -376.8227844238281, |
|
"logps/rejected": -413.8404846191406, |
|
"loss": 0.5719, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.3300559520721436, |
|
"rewards/margins": 0.49565353989601135, |
|
"rewards/rejected": -1.825709581375122, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 6.0625, |
|
"learning_rate": 4.990185749791866e-06, |
|
"logits/chosen": -1.8790470361709595, |
|
"logits/rejected": -1.7465674877166748, |
|
"logps/chosen": -361.17974853515625, |
|
"logps/rejected": -419.521484375, |
|
"loss": 0.5472, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0430080890655518, |
|
"rewards/margins": 0.5655397176742554, |
|
"rewards/rejected": -1.6085479259490967, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 4.9880595813140395e-06, |
|
"logits/chosen": -1.923179268836975, |
|
"logits/rejected": -1.7839629650115967, |
|
"logps/chosen": -394.8546142578125, |
|
"logps/rejected": -421.29730224609375, |
|
"loss": 0.5317, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0996313095092773, |
|
"rewards/margins": 0.5679025053977966, |
|
"rewards/rejected": -1.6675338745117188, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 6.25, |
|
"learning_rate": 4.985725660577184e-06, |
|
"logits/chosen": -1.887112021446228, |
|
"logits/rejected": -1.7504981756210327, |
|
"logps/chosen": -411.74951171875, |
|
"logps/rejected": -424.2745666503906, |
|
"loss": 0.5674, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3798956871032715, |
|
"rewards/margins": 0.5375889539718628, |
|
"rewards/rejected": -1.9174845218658447, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_logits/chosen": -1.7892649173736572, |
|
"eval_logits/rejected": -1.6636674404144287, |
|
"eval_logps/chosen": -421.9456787109375, |
|
"eval_logps/rejected": -449.8769836425781, |
|
"eval_loss": 0.5779695510864258, |
|
"eval_rewards/accuracies": 0.7039999961853027, |
|
"eval_rewards/chosen": -1.572936773300171, |
|
"eval_rewards/margins": 0.4797472655773163, |
|
"eval_rewards/rejected": -2.0526838302612305, |
|
"eval_runtime": 385.3091, |
|
"eval_samples_per_second": 5.191, |
|
"eval_steps_per_second": 0.649, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 5.40625, |
|
"learning_rate": 4.983184182463009e-06, |
|
"logits/chosen": -1.853735327720642, |
|
"logits/rejected": -1.7524950504302979, |
|
"logps/chosen": -404.90545654296875, |
|
"logps/rejected": -425.74676513671875, |
|
"loss": 0.5607, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.266443133354187, |
|
"rewards/margins": 0.5788331031799316, |
|
"rewards/rejected": -1.8452762365341187, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 4.980435359184203e-06, |
|
"logits/chosen": -1.9005975723266602, |
|
"logits/rejected": -1.8376613855361938, |
|
"logps/chosen": -341.048828125, |
|
"logps/rejected": -359.40496826171875, |
|
"loss": 0.6122, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6798708438873291, |
|
"rewards/margins": 0.3456707298755646, |
|
"rewards/rejected": -1.0255415439605713, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 10.875, |
|
"learning_rate": 4.9774794202667236e-06, |
|
"logits/chosen": -1.8874883651733398, |
|
"logits/rejected": -1.8308721780776978, |
|
"logps/chosen": -315.84173583984375, |
|
"logps/rejected": -365.2502746582031, |
|
"loss": 0.5734, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5474014282226562, |
|
"rewards/margins": 0.40957459807395935, |
|
"rewards/rejected": -0.9569761157035828, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 8.5, |
|
"learning_rate": 4.974316612530615e-06, |
|
"logits/chosen": -1.8144280910491943, |
|
"logits/rejected": -1.657810926437378, |
|
"logps/chosen": -369.9844665527344, |
|
"logps/rejected": -390.8047180175781, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.9150163531303406, |
|
"rewards/margins": 0.676045298576355, |
|
"rewards/rejected": -1.5910617113113403, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 12.625, |
|
"learning_rate": 4.970947200069416e-06, |
|
"logits/chosen": -1.7606821060180664, |
|
"logits/rejected": -1.7015259265899658, |
|
"logps/chosen": -427.96990966796875, |
|
"logps/rejected": -451.92205810546875, |
|
"loss": 0.6311, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.494568943977356, |
|
"rewards/margins": 0.4373590350151062, |
|
"rewards/rejected": -1.931928038597107, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 7.78125, |
|
"learning_rate": 4.967371464228096e-06, |
|
"logits/chosen": -1.9176502227783203, |
|
"logits/rejected": -1.832397699356079, |
|
"logps/chosen": -372.6578369140625, |
|
"logps/rejected": -429.7704162597656, |
|
"loss": 0.5482, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1515864133834839, |
|
"rewards/margins": 0.567107081413269, |
|
"rewards/rejected": -1.718693494796753, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 4.963589703579569e-06, |
|
"logits/chosen": -1.9988332986831665, |
|
"logits/rejected": -1.8672618865966797, |
|
"logps/chosen": -407.62664794921875, |
|
"logps/rejected": -419.98291015625, |
|
"loss": 0.5754, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.075867772102356, |
|
"rewards/margins": 0.47032594680786133, |
|
"rewards/rejected": -1.5461935997009277, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 8.5, |
|
"learning_rate": 4.9596022338997615e-06, |
|
"logits/chosen": -1.9790706634521484, |
|
"logits/rejected": -1.7595863342285156, |
|
"logps/chosen": -397.14752197265625, |
|
"logps/rejected": -413.5733337402344, |
|
"loss": 0.5495, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9936078190803528, |
|
"rewards/margins": 0.5750035047531128, |
|
"rewards/rejected": -1.5686112642288208, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 4.955409388141243e-06, |
|
"logits/chosen": -1.8258365392684937, |
|
"logits/rejected": -1.7129390239715576, |
|
"logps/chosen": -363.6575622558594, |
|
"logps/rejected": -387.19378662109375, |
|
"loss": 0.6003, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0635744333267212, |
|
"rewards/margins": 0.4720209240913391, |
|
"rewards/rejected": -1.5355952978134155, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.84375, |
|
"learning_rate": 4.951011516405429e-06, |
|
"logits/chosen": -1.8798444271087646, |
|
"logits/rejected": -1.8100011348724365, |
|
"logps/chosen": -338.61151123046875, |
|
"logps/rejected": -374.54974365234375, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8785476684570312, |
|
"rewards/margins": 0.5087668895721436, |
|
"rewards/rejected": -1.3873146772384644, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": -1.778578281402588, |
|
"eval_logits/rejected": -1.6489102840423584, |
|
"eval_logps/chosen": -342.7493896484375, |
|
"eval_logps/rejected": -372.69134521484375, |
|
"eval_loss": 0.5649436712265015, |
|
"eval_rewards/accuracies": 0.7039999961853027, |
|
"eval_rewards/chosen": -0.7809735536575317, |
|
"eval_rewards/margins": 0.49985405802726746, |
|
"eval_rewards/rejected": -1.2808276414871216, |
|
"eval_runtime": 385.3125, |
|
"eval_samples_per_second": 5.191, |
|
"eval_steps_per_second": 0.649, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 6.71875, |
|
"learning_rate": 4.946408985913344e-06, |
|
"logits/chosen": -1.8086153268814087, |
|
"logits/rejected": -1.7312501668930054, |
|
"logps/chosen": -321.55279541015625, |
|
"logps/rejected": -367.79229736328125, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7568905353546143, |
|
"rewards/margins": 0.6399748921394348, |
|
"rewards/rejected": -1.3968654870986938, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 16.25, |
|
"learning_rate": 4.941602180974958e-06, |
|
"logits/chosen": -1.833062767982483, |
|
"logits/rejected": -1.5977442264556885, |
|
"logps/chosen": -380.17169189453125, |
|
"logps/rejected": -390.75848388671875, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.9234441518783569, |
|
"rewards/margins": 0.6925610303878784, |
|
"rewards/rejected": -1.616005301475525, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 6.71875, |
|
"learning_rate": 4.936591502957101e-06, |
|
"logits/chosen": -1.813197374343872, |
|
"logits/rejected": -1.6430933475494385, |
|
"logps/chosen": -355.9547424316406, |
|
"logps/rejected": -418.7765197753906, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.078249216079712, |
|
"rewards/margins": 0.7311606407165527, |
|
"rewards/rejected": -1.809409737586975, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 7.0, |
|
"learning_rate": 4.931377370249946e-06, |
|
"logits/chosen": -1.8197021484375, |
|
"logits/rejected": -1.5834531784057617, |
|
"logps/chosen": -435.12738037109375, |
|
"logps/rejected": -468.70501708984375, |
|
"loss": 0.5641, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.6980397701263428, |
|
"rewards/margins": 0.593255341053009, |
|
"rewards/rejected": -2.291295289993286, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 10.0, |
|
"learning_rate": 4.925960218232073e-06, |
|
"logits/chosen": -1.7958835363388062, |
|
"logits/rejected": -1.6748111248016357, |
|
"logps/chosen": -392.5576171875, |
|
"logps/rejected": -455.75811767578125, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.372554898262024, |
|
"rewards/margins": 0.7270306348800659, |
|
"rewards/rejected": -2.09958553314209, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 4.920340499234116e-06, |
|
"logits/chosen": -1.7571017742156982, |
|
"logits/rejected": -1.5184545516967773, |
|
"logps/chosen": -403.0295715332031, |
|
"logps/rejected": -419.2205505371094, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.3702582120895386, |
|
"rewards/margins": 0.5140202641487122, |
|
"rewards/rejected": -1.884278655052185, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 4.914518682500995e-06, |
|
"logits/chosen": -1.9124584197998047, |
|
"logits/rejected": -1.694361925125122, |
|
"logps/chosen": -436.59747314453125, |
|
"logps/rejected": -460.3738708496094, |
|
"loss": 0.5391, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.530548334121704, |
|
"rewards/margins": 0.6449794769287109, |
|
"rewards/rejected": -2.175528049468994, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 4.9084952541527315e-06, |
|
"logits/chosen": -1.7815015316009521, |
|
"logits/rejected": -1.5756428241729736, |
|
"logps/chosen": -448.1412658691406, |
|
"logps/rejected": -469.43603515625, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.7480707168579102, |
|
"rewards/margins": 0.6839998364448547, |
|
"rewards/rejected": -2.432070255279541, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 4.902270717143858e-06, |
|
"logits/chosen": -1.7120873928070068, |
|
"logits/rejected": -1.6082136631011963, |
|
"logps/chosen": -419.0126037597656, |
|
"logps/rejected": -534.6773681640625, |
|
"loss": 0.4522, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.827803611755371, |
|
"rewards/margins": 1.0398612022399902, |
|
"rewards/rejected": -2.8676648139953613, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 6.8125, |
|
"learning_rate": 4.895845591221427e-06, |
|
"logits/chosen": -1.676849365234375, |
|
"logits/rejected": -1.601438283920288, |
|
"logps/chosen": -455.9642639160156, |
|
"logps/rejected": -528.1475219726562, |
|
"loss": 0.5331, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0265378952026367, |
|
"rewards/margins": 0.7665891647338867, |
|
"rewards/rejected": -2.7931270599365234, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_logits/chosen": -1.5919249057769775, |
|
"eval_logits/rejected": -1.469058632850647, |
|
"eval_logps/chosen": -455.5274963378906, |
|
"eval_logps/rejected": -512.6751098632812, |
|
"eval_loss": 0.560720682144165, |
|
"eval_rewards/accuracies": 0.7059999704360962, |
|
"eval_rewards/chosen": -1.9087554216384888, |
|
"eval_rewards/margins": 0.7719098925590515, |
|
"eval_rewards/rejected": -2.6806650161743164, |
|
"eval_runtime": 385.1228, |
|
"eval_samples_per_second": 5.193, |
|
"eval_steps_per_second": 0.649, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 9.5, |
|
"learning_rate": 4.8892204128816e-06, |
|
"logits/chosen": -1.7319362163543701, |
|
"logits/rejected": -1.619175672531128, |
|
"logps/chosen": -431.63232421875, |
|
"logps/rejected": -489.86297607421875, |
|
"loss": 0.5277, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6406848430633545, |
|
"rewards/margins": 0.7181671857833862, |
|
"rewards/rejected": -2.358851909637451, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 4.882395735324864e-06, |
|
"logits/chosen": -1.6986335515975952, |
|
"logits/rejected": -1.5594747066497803, |
|
"logps/chosen": -427.96978759765625, |
|
"logps/rejected": -497.16766357421875, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.6188774108886719, |
|
"rewards/margins": 0.8638092875480652, |
|
"rewards/rejected": -2.4826865196228027, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 7.25, |
|
"learning_rate": 4.87537212840983e-06, |
|
"logits/chosen": -1.6116526126861572, |
|
"logits/rejected": -1.474578619003296, |
|
"logps/chosen": -464.416259765625, |
|
"logps/rejected": -503.581787109375, |
|
"loss": 0.576, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.981610894203186, |
|
"rewards/margins": 0.6970613598823547, |
|
"rewards/rejected": -2.6786723136901855, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 4.8681501786056545e-06, |
|
"logits/chosen": -1.5888502597808838, |
|
"logits/rejected": -1.4401233196258545, |
|
"logps/chosen": -373.1294860839844, |
|
"logps/rejected": -415.46240234375, |
|
"loss": 0.5066, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.449134111404419, |
|
"rewards/margins": 0.7457250356674194, |
|
"rewards/rejected": -2.194859027862549, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 14.75, |
|
"learning_rate": 4.860730488943068e-06, |
|
"logits/chosen": -1.6056511402130127, |
|
"logits/rejected": -1.5784225463867188, |
|
"logps/chosen": -356.6183166503906, |
|
"logps/rejected": -429.750732421875, |
|
"loss": 0.5024, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2209298610687256, |
|
"rewards/margins": 0.7504220008850098, |
|
"rewards/rejected": -1.971351981163025, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 6.96875, |
|
"learning_rate": 4.853113678964022e-06, |
|
"logits/chosen": -1.6386051177978516, |
|
"logits/rejected": -1.5690464973449707, |
|
"logps/chosen": -394.1507568359375, |
|
"logps/rejected": -469.383056640625, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.2175710201263428, |
|
"rewards/margins": 0.8312736749649048, |
|
"rewards/rejected": -2.048844575881958, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 15.75, |
|
"learning_rate": 4.845300384669958e-06, |
|
"logits/chosen": -1.6991758346557617, |
|
"logits/rejected": -1.563987374305725, |
|
"logps/chosen": -405.8094482421875, |
|
"logps/rejected": -445.58209228515625, |
|
"loss": 0.5794, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5260562896728516, |
|
"rewards/margins": 0.6259430050849915, |
|
"rewards/rejected": -2.1519992351531982, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 8.9375, |
|
"learning_rate": 4.837291258468701e-06, |
|
"logits/chosen": -1.7494251728057861, |
|
"logits/rejected": -1.6077022552490234, |
|
"logps/chosen": -431.01519775390625, |
|
"logps/rejected": -486.4640197753906, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.4566891193389893, |
|
"rewards/margins": 0.8033839464187622, |
|
"rewards/rejected": -2.260073184967041, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 6.78125, |
|
"learning_rate": 4.829086969119984e-06, |
|
"logits/chosen": -1.613250732421875, |
|
"logits/rejected": -1.5955699682235718, |
|
"logps/chosen": -397.90008544921875, |
|
"logps/rejected": -463.9117126464844, |
|
"loss": 0.6001, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4717532396316528, |
|
"rewards/margins": 0.64255690574646, |
|
"rewards/rejected": -2.1143100261688232, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 7.5, |
|
"learning_rate": 4.820688201679605e-06, |
|
"logits/chosen": -1.8398478031158447, |
|
"logits/rejected": -1.5474001169204712, |
|
"logps/chosen": -399.21368408203125, |
|
"logps/rejected": -416.2703552246094, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3657824993133545, |
|
"rewards/margins": 0.7942038774490356, |
|
"rewards/rejected": -2.1599864959716797, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.6709563732147217, |
|
"eval_logits/rejected": -1.546115756034851, |
|
"eval_logps/chosen": -409.65435791015625, |
|
"eval_logps/rejected": -460.5684814453125, |
|
"eval_loss": 0.543312132358551, |
|
"eval_rewards/accuracies": 0.7070000171661377, |
|
"eval_rewards/chosen": -1.4500234127044678, |
|
"eval_rewards/margins": 0.7095751166343689, |
|
"eval_rewards/rejected": -2.1595985889434814, |
|
"eval_runtime": 385.2124, |
|
"eval_samples_per_second": 5.192, |
|
"eval_steps_per_second": 0.649, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 7.34375, |
|
"learning_rate": 4.8120956574422315e-06, |
|
"logits/chosen": -1.7810264825820923, |
|
"logits/rejected": -1.7489475011825562, |
|
"logps/chosen": -431.69219970703125, |
|
"logps/rejected": -477.871337890625, |
|
"loss": 0.6275, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.5776736736297607, |
|
"rewards/margins": 0.5325725674629211, |
|
"rewards/rejected": -2.110246181488037, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 13.625, |
|
"learning_rate": 4.803310053882831e-06, |
|
"logits/chosen": -1.7703052759170532, |
|
"logits/rejected": -1.7803173065185547, |
|
"logps/chosen": -363.9437561035156, |
|
"logps/rejected": -435.0057678222656, |
|
"loss": 0.5542, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3461793661117554, |
|
"rewards/margins": 0.5871396064758301, |
|
"rewards/rejected": -1.933318853378296, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 6.875, |
|
"learning_rate": 4.794332124596775e-06, |
|
"logits/chosen": -1.8022472858428955, |
|
"logits/rejected": -1.6746841669082642, |
|
"logps/chosen": -397.36090087890625, |
|
"logps/rejected": -445.603759765625, |
|
"loss": 0.5885, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2891987562179565, |
|
"rewards/margins": 0.5191696882247925, |
|
"rewards/rejected": -1.808368444442749, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 9.375, |
|
"learning_rate": 4.785162619238575e-06, |
|
"logits/chosen": -1.7888991832733154, |
|
"logits/rejected": -1.6187770366668701, |
|
"logps/chosen": -355.0903015136719, |
|
"logps/rejected": -387.1643981933594, |
|
"loss": 0.5416, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.012415885925293, |
|
"rewards/margins": 0.5939286947250366, |
|
"rewards/rejected": -1.6063445806503296, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 6.78125, |
|
"learning_rate": 4.775802303459288e-06, |
|
"logits/chosen": -1.7059911489486694, |
|
"logits/rejected": -1.6270997524261475, |
|
"logps/chosen": -346.2181091308594, |
|
"logps/rejected": -401.40069580078125, |
|
"loss": 0.5465, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9845376014709473, |
|
"rewards/margins": 0.5990740656852722, |
|
"rewards/rejected": -1.5836117267608643, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 11.125, |
|
"learning_rate": 4.766251958842589e-06, |
|
"logits/chosen": -1.676922082901001, |
|
"logits/rejected": -1.5429388284683228, |
|
"logps/chosen": -394.45416259765625, |
|
"logps/rejected": -433.03369140625, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.219531774520874, |
|
"rewards/margins": 0.49049144983291626, |
|
"rewards/rejected": -1.710023283958435, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 6.78125, |
|
"learning_rate": 4.7565123828395066e-06, |
|
"logits/chosen": -1.5784261226654053, |
|
"logits/rejected": -1.5068719387054443, |
|
"logps/chosen": -391.16192626953125, |
|
"logps/rejected": -455.4800720214844, |
|
"loss": 0.531, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.3523309230804443, |
|
"rewards/margins": 0.6492956280708313, |
|
"rewards/rejected": -2.001626491546631, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 4.746584388701831e-06, |
|
"logits/chosen": -1.6509666442871094, |
|
"logits/rejected": -1.5814907550811768, |
|
"logps/chosen": -408.57598876953125, |
|
"logps/rejected": -468.8497619628906, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.4533666372299194, |
|
"rewards/margins": 0.7448235750198364, |
|
"rewards/rejected": -2.198190212249756, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 4.736468805414218e-06, |
|
"logits/chosen": -1.6324241161346436, |
|
"logits/rejected": -1.6051101684570312, |
|
"logps/chosen": -362.0763244628906, |
|
"logps/rejected": -444.11077880859375, |
|
"loss": 0.5667, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1464101076126099, |
|
"rewards/margins": 0.6818080544471741, |
|
"rewards/rejected": -1.8282181024551392, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 12.125, |
|
"learning_rate": 4.7261664776249595e-06, |
|
"logits/chosen": -1.5433322191238403, |
|
"logits/rejected": -1.4583094120025635, |
|
"logps/chosen": -336.41778564453125, |
|
"logps/rejected": -401.85772705078125, |
|
"loss": 0.514, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.0750483274459839, |
|
"rewards/margins": 0.7443105578422546, |
|
"rewards/rejected": -1.8193588256835938, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_logits/chosen": -1.621368169784546, |
|
"eval_logits/rejected": -1.5014086961746216, |
|
"eval_logps/chosen": -391.2229919433594, |
|
"eval_logps/rejected": -436.3040771484375, |
|
"eval_loss": 0.5440120697021484, |
|
"eval_rewards/accuracies": 0.718999981880188, |
|
"eval_rewards/chosen": -1.2657097578048706, |
|
"eval_rewards/margins": 0.6512450575828552, |
|
"eval_rewards/rejected": -1.916954755783081, |
|
"eval_runtime": 385.3527, |
|
"eval_samples_per_second": 5.19, |
|
"eval_steps_per_second": 0.649, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 4.715678265575463e-06, |
|
"logits/chosen": -1.7400833368301392, |
|
"logits/rejected": -1.5401082038879395, |
|
"logps/chosen": -410.2032775878906, |
|
"logps/rejected": -411.843994140625, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.2593110799789429, |
|
"rewards/margins": 0.5718873739242554, |
|
"rewards/rejected": -1.8311984539031982, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 4.705005045028415e-06, |
|
"logits/chosen": -1.6306053400039673, |
|
"logits/rejected": -1.5210235118865967, |
|
"logps/chosen": -400.1542053222656, |
|
"logps/rejected": -448.408447265625, |
|
"loss": 0.5563, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.370181679725647, |
|
"rewards/margins": 0.6565110087394714, |
|
"rewards/rejected": -2.0266928672790527, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 4.694147707194659e-06, |
|
"logits/chosen": -1.6995433568954468, |
|
"logits/rejected": -1.6389293670654297, |
|
"logps/chosen": -427.10137939453125, |
|
"logps/rejected": -471.07952880859375, |
|
"loss": 0.5469, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5522325038909912, |
|
"rewards/margins": 0.6380002498626709, |
|
"rewards/rejected": -2.190232753753662, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 4.683107158658782e-06, |
|
"logits/chosen": -1.6130173206329346, |
|
"logits/rejected": -1.5491468906402588, |
|
"logps/chosen": -439.54248046875, |
|
"logps/rejected": -480.834228515625, |
|
"loss": 0.512, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4930182695388794, |
|
"rewards/margins": 0.7003245949745178, |
|
"rewards/rejected": -2.193342924118042, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 11.3125, |
|
"learning_rate": 4.671884321303407e-06, |
|
"logits/chosen": -1.6797221899032593, |
|
"logits/rejected": -1.5230547189712524, |
|
"logps/chosen": -394.5656433105469, |
|
"logps/rejected": -453.25946044921875, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4823963642120361, |
|
"rewards/margins": 0.7305151224136353, |
|
"rewards/rejected": -2.212911605834961, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 4.660480132232224e-06, |
|
"logits/chosen": -1.7173080444335938, |
|
"logits/rejected": -1.60665762424469, |
|
"logps/chosen": -406.39117431640625, |
|
"logps/rejected": -445.9922790527344, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.3457807302474976, |
|
"rewards/margins": 0.637101411819458, |
|
"rewards/rejected": -1.9828822612762451, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 4.6488955436917414e-06, |
|
"logits/chosen": -1.7457382678985596, |
|
"logits/rejected": -1.5430558919906616, |
|
"logps/chosen": -429.39300537109375, |
|
"logps/rejected": -465.61224365234375, |
|
"loss": 0.5461, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.49411940574646, |
|
"rewards/margins": 0.8279851078987122, |
|
"rewards/rejected": -2.3221046924591064, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 7.03125, |
|
"learning_rate": 4.6371315229917644e-06, |
|
"logits/chosen": -1.7286710739135742, |
|
"logits/rejected": -1.5955041646957397, |
|
"logps/chosen": -443.83270263671875, |
|
"logps/rejected": -498.59967041015625, |
|
"loss": 0.5188, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6041603088378906, |
|
"rewards/margins": 0.7516692876815796, |
|
"rewards/rejected": -2.3558297157287598, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 10.625, |
|
"learning_rate": 4.625189052424638e-06, |
|
"logits/chosen": -1.6606595516204834, |
|
"logits/rejected": -1.5426713228225708, |
|
"logps/chosen": -412.0262145996094, |
|
"logps/rejected": -478.1866149902344, |
|
"loss": 0.4696, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.7292293310165405, |
|
"rewards/margins": 0.888912558555603, |
|
"rewards/rejected": -2.6181421279907227, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 7.46875, |
|
"learning_rate": 4.613069129183218e-06, |
|
"logits/chosen": -1.7503217458724976, |
|
"logits/rejected": -1.6148483753204346, |
|
"logps/chosen": -452.8263244628906, |
|
"logps/rejected": -481.222900390625, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4942306280136108, |
|
"rewards/margins": 0.6457923054695129, |
|
"rewards/rejected": -2.1400229930877686, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -1.5656111240386963, |
|
"eval_logits/rejected": -1.4448813199996948, |
|
"eval_logps/chosen": -401.67669677734375, |
|
"eval_logps/rejected": -451.64080810546875, |
|
"eval_loss": 0.5418093204498291, |
|
"eval_rewards/accuracies": 0.7174999713897705, |
|
"eval_rewards/chosen": -1.3702467679977417, |
|
"eval_rewards/margins": 0.7000752091407776, |
|
"eval_rewards/rejected": -2.070322036743164, |
|
"eval_runtime": 385.2164, |
|
"eval_samples_per_second": 5.192, |
|
"eval_steps_per_second": 0.649, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 4.600772765277607e-06, |
|
"logits/chosen": -1.531764268875122, |
|
"logits/rejected": -1.4728986024856567, |
|
"logps/chosen": -375.1974792480469, |
|
"logps/rejected": -444.0108337402344, |
|
"loss": 0.5138, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4164024591445923, |
|
"rewards/margins": 0.7396863698959351, |
|
"rewards/rejected": -2.1560888290405273, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 16.25, |
|
"learning_rate": 4.588300987450652e-06, |
|
"logits/chosen": -1.6515556573867798, |
|
"logits/rejected": -1.547123670578003, |
|
"logps/chosen": -394.8990173339844, |
|
"logps/rejected": -431.17645263671875, |
|
"loss": 0.5418, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4115506410598755, |
|
"rewards/margins": 0.6983591318130493, |
|
"rewards/rejected": -2.1099095344543457, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 6.875, |
|
"learning_rate": 4.5756548370922136e-06, |
|
"logits/chosen": -1.6495920419692993, |
|
"logits/rejected": -1.5659213066101074, |
|
"logps/chosen": -351.00146484375, |
|
"logps/rejected": -412.519287109375, |
|
"loss": 0.5127, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1029760837554932, |
|
"rewards/margins": 0.7049504518508911, |
|
"rewards/rejected": -1.8079265356063843, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 9.0625, |
|
"learning_rate": 4.562835370152206e-06, |
|
"logits/chosen": -1.7497320175170898, |
|
"logits/rejected": -1.5089380741119385, |
|
"logps/chosen": -426.8157653808594, |
|
"logps/rejected": -491.87860107421875, |
|
"loss": 0.473, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.2558623552322388, |
|
"rewards/margins": 0.9615765810012817, |
|
"rewards/rejected": -2.2174386978149414, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 7.0625, |
|
"learning_rate": 4.54984365705243e-06, |
|
"logits/chosen": -1.6929643154144287, |
|
"logits/rejected": -1.5880625247955322, |
|
"logps/chosen": -421.701416015625, |
|
"logps/rejected": -518.8970947265625, |
|
"loss": 0.4784, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.5323131084442139, |
|
"rewards/margins": 1.0377166271209717, |
|
"rewards/rejected": -2.5700297355651855, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 12.25, |
|
"learning_rate": 4.536680782597191e-06, |
|
"logits/chosen": -1.5793530941009521, |
|
"logits/rejected": -1.503025770187378, |
|
"logps/chosen": -413.30792236328125, |
|
"logps/rejected": -483.18048095703125, |
|
"loss": 0.5921, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.7728277444839478, |
|
"rewards/margins": 0.7549096345901489, |
|
"rewards/rejected": -2.5277373790740967, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 15.3125, |
|
"learning_rate": 4.523347845882718e-06, |
|
"logits/chosen": -1.6937191486358643, |
|
"logits/rejected": -1.5083749294281006, |
|
"logps/chosen": -422.14447021484375, |
|
"logps/rejected": -479.6094665527344, |
|
"loss": 0.4495, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.3397438526153564, |
|
"rewards/margins": 1.0674594640731812, |
|
"rewards/rejected": -2.407203197479248, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 5.375, |
|
"learning_rate": 4.50984596020539e-06, |
|
"logits/chosen": -1.544276475906372, |
|
"logits/rejected": -1.4562034606933594, |
|
"logps/chosen": -403.8301696777344, |
|
"logps/rejected": -444.5962829589844, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.236783504486084, |
|
"rewards/margins": 0.7301002740859985, |
|
"rewards/rejected": -1.9668840169906616, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 7.40625, |
|
"learning_rate": 4.4961762529687745e-06, |
|
"logits/chosen": -1.6948843002319336, |
|
"logits/rejected": -1.5669870376586914, |
|
"logps/chosen": -365.44342041015625, |
|
"logps/rejected": -436.5625915527344, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0504177808761597, |
|
"rewards/margins": 0.8762027621269226, |
|
"rewards/rejected": -1.9266207218170166, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 4.482339865589492e-06, |
|
"logits/chosen": -1.6588748693466187, |
|
"logits/rejected": -1.5048010349273682, |
|
"logps/chosen": -401.0564270019531, |
|
"logps/rejected": -414.84466552734375, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3476970195770264, |
|
"rewards/margins": 0.5775381922721863, |
|
"rewards/rejected": -1.925235390663147, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": -1.5524324178695679, |
|
"eval_logits/rejected": -1.427809476852417, |
|
"eval_logps/chosen": -378.61767578125, |
|
"eval_logps/rejected": -430.84136962890625, |
|
"eval_loss": 0.5299040675163269, |
|
"eval_rewards/accuracies": 0.7210000157356262, |
|
"eval_rewards/chosen": -1.1396570205688477, |
|
"eval_rewards/margins": 0.7226706147193909, |
|
"eval_rewards/rejected": -1.8623274564743042, |
|
"eval_runtime": 385.4496, |
|
"eval_samples_per_second": 5.189, |
|
"eval_steps_per_second": 0.649, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 5.6875, |
|
"learning_rate": 4.468337953401909e-06, |
|
"logits/chosen": -1.661257028579712, |
|
"logits/rejected": -1.5975781679153442, |
|
"logps/chosen": -380.5933837890625, |
|
"logps/rejected": -433.12139892578125, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1024227142333984, |
|
"rewards/margins": 0.5738898515701294, |
|
"rewards/rejected": -1.6763126850128174, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 7.34375, |
|
"learning_rate": 4.45417168556166e-06, |
|
"logits/chosen": -1.5824635028839111, |
|
"logits/rejected": -1.4781149625778198, |
|
"logps/chosen": -340.6497497558594, |
|
"logps/rejected": -407.69293212890625, |
|
"loss": 0.5255, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9949854016304016, |
|
"rewards/margins": 0.6768967509269714, |
|
"rewards/rejected": -1.6718822717666626, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 10.5625, |
|
"learning_rate": 4.439842244948036e-06, |
|
"logits/chosen": -1.5540910959243774, |
|
"logits/rejected": -1.4291226863861084, |
|
"logps/chosen": -390.7538757324219, |
|
"logps/rejected": -446.49310302734375, |
|
"loss": 0.5752, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.296918511390686, |
|
"rewards/margins": 0.6129493117332458, |
|
"rewards/rejected": -1.9098678827285767, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 14.125, |
|
"learning_rate": 4.425350828065204e-06, |
|
"logits/chosen": -1.6088273525238037, |
|
"logits/rejected": -1.3946729898452759, |
|
"logps/chosen": -412.3367614746094, |
|
"logps/rejected": -442.0401916503906, |
|
"loss": 0.5089, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.2825069427490234, |
|
"rewards/margins": 0.7757080793380737, |
|
"rewards/rejected": -2.0582151412963867, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 7.875, |
|
"learning_rate": 4.410698644942303e-06, |
|
"logits/chosen": -1.6174886226654053, |
|
"logits/rejected": -1.4844688177108765, |
|
"logps/chosen": -402.29486083984375, |
|
"logps/rejected": -463.25689697265625, |
|
"loss": 0.4913, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.299822211265564, |
|
"rewards/margins": 0.8488262891769409, |
|
"rewards/rejected": -2.148648738861084, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 11.125, |
|
"learning_rate": 4.395886919032406e-06, |
|
"logits/chosen": -1.4636362791061401, |
|
"logits/rejected": -1.3575894832611084, |
|
"logps/chosen": -405.80010986328125, |
|
"logps/rejected": -456.88641357421875, |
|
"loss": 0.5316, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.4196369647979736, |
|
"rewards/margins": 0.7757617235183716, |
|
"rewards/rejected": -2.1953988075256348, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 6.6875, |
|
"learning_rate": 4.380916887110366e-06, |
|
"logits/chosen": -1.6339868307113647, |
|
"logits/rejected": -1.4374290704727173, |
|
"logps/chosen": -406.9070739746094, |
|
"logps/rejected": -451.3981018066406, |
|
"loss": 0.5169, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.477386474609375, |
|
"rewards/margins": 0.8484745025634766, |
|
"rewards/rejected": -2.3258609771728516, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 6.84375, |
|
"learning_rate": 4.365789799169539e-06, |
|
"logits/chosen": -1.4347012042999268, |
|
"logits/rejected": -1.4834723472595215, |
|
"logps/chosen": -395.71014404296875, |
|
"logps/rejected": -475.1640625, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.4909913539886475, |
|
"rewards/margins": 0.7410578727722168, |
|
"rewards/rejected": -2.2320492267608643, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 6.78125, |
|
"learning_rate": 4.350506918317416e-06, |
|
"logits/chosen": -1.6247329711914062, |
|
"logits/rejected": -1.4631903171539307, |
|
"logps/chosen": -389.4300842285156, |
|
"logps/rejected": -455.31573486328125, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4183425903320312, |
|
"rewards/margins": 0.7307096719741821, |
|
"rewards/rejected": -2.149052143096924, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 6.15625, |
|
"learning_rate": 4.335069520670149e-06, |
|
"logits/chosen": -1.4696300029754639, |
|
"logits/rejected": -1.3941162824630737, |
|
"logps/chosen": -352.44671630859375, |
|
"logps/rejected": -424.1249084472656, |
|
"loss": 0.5732, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2273385524749756, |
|
"rewards/margins": 0.6349586248397827, |
|
"rewards/rejected": -1.8622970581054688, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -1.4804484844207764, |
|
"eval_logits/rejected": -1.3595802783966064, |
|
"eval_logps/chosen": -375.21826171875, |
|
"eval_logps/rejected": -427.4810485839844, |
|
"eval_loss": 0.5184832811355591, |
|
"eval_rewards/accuracies": 0.7250000238418579, |
|
"eval_rewards/chosen": -1.1056623458862305, |
|
"eval_rewards/margins": 0.7230623364448547, |
|
"eval_rewards/rejected": -1.82872474193573, |
|
"eval_runtime": 385.0476, |
|
"eval_samples_per_second": 5.194, |
|
"eval_steps_per_second": 0.649, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 4.319478895246e-06, |
|
"logits/chosen": -1.5287452936172485, |
|
"logits/rejected": -1.3607852458953857, |
|
"logps/chosen": -350.2371520996094, |
|
"logps/rejected": -398.1286315917969, |
|
"loss": 0.5104, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.0312172174453735, |
|
"rewards/margins": 0.7258288264274597, |
|
"rewards/rejected": -1.757046103477478, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 13.0, |
|
"learning_rate": 4.303736343857704e-06, |
|
"logits/chosen": -1.5342817306518555, |
|
"logits/rejected": -1.4489666223526, |
|
"logps/chosen": -372.7054138183594, |
|
"logps/rejected": -475.97601318359375, |
|
"loss": 0.5008, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1559852361679077, |
|
"rewards/margins": 0.9135689735412598, |
|
"rewards/rejected": -2.069554328918457, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 8.125, |
|
"learning_rate": 4.287843181003772e-06, |
|
"logits/chosen": -1.5427916049957275, |
|
"logits/rejected": -1.3855717182159424, |
|
"logps/chosen": -458.01641845703125, |
|
"logps/rejected": -475.8519592285156, |
|
"loss": 0.5884, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.6416466236114502, |
|
"rewards/margins": 0.6417607665061951, |
|
"rewards/rejected": -2.283407211303711, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 6.46875, |
|
"learning_rate": 4.27180073375873e-06, |
|
"logits/chosen": -1.5489182472229004, |
|
"logits/rejected": -1.402178168296814, |
|
"logps/chosen": -442.7936096191406, |
|
"logps/rejected": -477.34515380859375, |
|
"loss": 0.5287, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.5542631149291992, |
|
"rewards/margins": 0.7610459327697754, |
|
"rewards/rejected": -2.3153088092803955, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 4.255610341662304e-06, |
|
"logits/chosen": -1.6110093593597412, |
|
"logits/rejected": -1.398992896080017, |
|
"logps/chosen": -380.208740234375, |
|
"logps/rejected": -425.40838623046875, |
|
"loss": 0.5553, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2492074966430664, |
|
"rewards/margins": 0.6511562466621399, |
|
"rewards/rejected": -1.900363564491272, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 6.84375, |
|
"learning_rate": 4.2392733566075764e-06, |
|
"logits/chosen": -1.59576416015625, |
|
"logits/rejected": -1.4599517583847046, |
|
"logps/chosen": -401.14984130859375, |
|
"logps/rejected": -438.921630859375, |
|
"loss": 0.591, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4514347314834595, |
|
"rewards/margins": 0.5331937670707703, |
|
"rewards/rejected": -1.984628438949585, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 9.875, |
|
"learning_rate": 4.2227911427280975e-06, |
|
"logits/chosen": -1.5509364604949951, |
|
"logits/rejected": -1.3630738258361816, |
|
"logps/chosen": -384.2834777832031, |
|
"logps/rejected": -420.7542419433594, |
|
"loss": 0.5353, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.339779257774353, |
|
"rewards/margins": 0.6931589841842651, |
|
"rewards/rejected": -2.0329384803771973, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 4.206165076283983e-06, |
|
"logits/chosen": -1.5844643115997314, |
|
"logits/rejected": -1.4324209690093994, |
|
"logps/chosen": -375.78973388671875, |
|
"logps/rejected": -440.9784240722656, |
|
"loss": 0.4792, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.29856538772583, |
|
"rewards/margins": 0.8626803159713745, |
|
"rewards/rejected": -2.161245584487915, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 10.5625, |
|
"learning_rate": 4.189396545546995e-06, |
|
"logits/chosen": -1.5281752347946167, |
|
"logits/rejected": -1.4283504486083984, |
|
"logps/chosen": -397.5606384277344, |
|
"logps/rejected": -468.21002197265625, |
|
"loss": 0.5202, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.4838616847991943, |
|
"rewards/margins": 0.9059172868728638, |
|
"rewards/rejected": -2.3897788524627686, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 4.172486950684627e-06, |
|
"logits/chosen": -1.480257511138916, |
|
"logits/rejected": -1.4012019634246826, |
|
"logps/chosen": -429.61181640625, |
|
"logps/rejected": -510.66522216796875, |
|
"loss": 0.5332, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7596956491470337, |
|
"rewards/margins": 0.8419567942619324, |
|
"rewards/rejected": -2.6016526222229004, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": -1.30724036693573, |
|
"eval_logits/rejected": -1.1976608037948608, |
|
"eval_logps/chosen": -478.32550048828125, |
|
"eval_logps/rejected": -549.7024536132812, |
|
"eval_loss": 0.5315085053443909, |
|
"eval_rewards/accuracies": 0.7239999771118164, |
|
"eval_rewards/chosen": -2.1367344856262207, |
|
"eval_rewards/margins": 0.9142037630081177, |
|
"eval_rewards/rejected": -3.050938367843628, |
|
"eval_runtime": 385.0593, |
|
"eval_samples_per_second": 5.194, |
|
"eval_steps_per_second": 0.649, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 12.6875, |
|
"learning_rate": 4.155437703643182e-06, |
|
"logits/chosen": -1.4552199840545654, |
|
"logits/rejected": -1.306873083114624, |
|
"logps/chosen": -439.85382080078125, |
|
"logps/rejected": -500.3904724121094, |
|
"loss": 0.5037, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9311021566390991, |
|
"rewards/margins": 0.8962618708610535, |
|
"rewards/rejected": -2.827363967895508, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 4.138250228029882e-06, |
|
"logits/chosen": -1.482912302017212, |
|
"logits/rejected": -1.403141736984253, |
|
"logps/chosen": -424.140380859375, |
|
"logps/rejected": -514.3765869140625, |
|
"loss": 0.5066, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.6720972061157227, |
|
"rewards/margins": 0.8676088452339172, |
|
"rewards/rejected": -2.539705753326416, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 6.9375, |
|
"learning_rate": 4.120925958993994e-06, |
|
"logits/chosen": -1.4682929515838623, |
|
"logits/rejected": -1.3645504713058472, |
|
"logps/chosen": -376.16033935546875, |
|
"logps/rejected": -447.6339416503906, |
|
"loss": 0.5583, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4225904941558838, |
|
"rewards/margins": 0.7580591440200806, |
|
"rewards/rejected": -2.180649757385254, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 10.8125, |
|
"learning_rate": 4.103466343106999e-06, |
|
"logits/chosen": -1.5599358081817627, |
|
"logits/rejected": -1.4370046854019165, |
|
"logps/chosen": -424.14849853515625, |
|
"logps/rejected": -472.6615295410156, |
|
"loss": 0.5315, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5476281642913818, |
|
"rewards/margins": 0.743899941444397, |
|
"rewards/rejected": -2.2915279865264893, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 8.625, |
|
"learning_rate": 4.085872838241797e-06, |
|
"logits/chosen": -1.464450716972351, |
|
"logits/rejected": -1.3373545408248901, |
|
"logps/chosen": -405.13262939453125, |
|
"logps/rejected": -447.93994140625, |
|
"loss": 0.5899, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.427685022354126, |
|
"rewards/margins": 0.6289039850234985, |
|
"rewards/rejected": -2.056588649749756, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 9.75, |
|
"learning_rate": 4.06814691345098e-06, |
|
"logits/chosen": -1.452643871307373, |
|
"logits/rejected": -1.2927871942520142, |
|
"logps/chosen": -378.2747497558594, |
|
"logps/rejected": -437.4178161621094, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.2272692918777466, |
|
"rewards/margins": 0.8121210336685181, |
|
"rewards/rejected": -2.0393900871276855, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 4.050290048844171e-06, |
|
"logits/chosen": -1.572665810585022, |
|
"logits/rejected": -1.4710958003997803, |
|
"logps/chosen": -398.8462829589844, |
|
"logps/rejected": -468.70794677734375, |
|
"loss": 0.5368, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.30232834815979, |
|
"rewards/margins": 0.750015139579773, |
|
"rewards/rejected": -2.0523436069488525, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 7.46875, |
|
"learning_rate": 4.032303735464422e-06, |
|
"logits/chosen": -1.6318330764770508, |
|
"logits/rejected": -1.4836442470550537, |
|
"logps/chosen": -405.6830749511719, |
|
"logps/rejected": -475.7168884277344, |
|
"loss": 0.4568, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.337021827697754, |
|
"rewards/margins": 0.9311949610710144, |
|
"rewards/rejected": -2.268216609954834, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 4.014189475163727e-06, |
|
"logits/chosen": -1.4534022808074951, |
|
"logits/rejected": -1.3461982011795044, |
|
"logps/chosen": -380.7342224121094, |
|
"logps/rejected": -464.63916015625, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.2607730627059937, |
|
"rewards/margins": 0.9202286005020142, |
|
"rewards/rejected": -2.181001663208008, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 15.0625, |
|
"learning_rate": 3.995948780477605e-06, |
|
"logits/chosen": -1.5742177963256836, |
|
"logits/rejected": -1.410463809967041, |
|
"logps/chosen": -382.19830322265625, |
|
"logps/rejected": -427.6187438964844, |
|
"loss": 0.5431, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1344490051269531, |
|
"rewards/margins": 0.7007244825363159, |
|
"rewards/rejected": -1.8351733684539795, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": -1.431371808052063, |
|
"eval_logits/rejected": -1.3129903078079224, |
|
"eval_logps/chosen": -390.28460693359375, |
|
"eval_logps/rejected": -454.35223388671875, |
|
"eval_loss": 0.521051287651062, |
|
"eval_rewards/accuracies": 0.7260000109672546, |
|
"eval_rewards/chosen": -1.2563258409500122, |
|
"eval_rewards/margins": 0.841110348701477, |
|
"eval_rewards/rejected": -2.09743595123291, |
|
"eval_runtime": 385.3298, |
|
"eval_samples_per_second": 5.19, |
|
"eval_steps_per_second": 0.649, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 10.875, |
|
"learning_rate": 3.977583174498816e-06, |
|
"logits/chosen": -1.4515248537063599, |
|
"logits/rejected": -1.3351339101791382, |
|
"logps/chosen": -412.0836486816406, |
|
"logps/rejected": -511.7002868652344, |
|
"loss": 0.3984, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4840004444122314, |
|
"rewards/margins": 1.215951681137085, |
|
"rewards/rejected": -2.6999518871307373, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 10.375, |
|
"learning_rate": 3.959094190750172e-06, |
|
"logits/chosen": -1.4154666662216187, |
|
"logits/rejected": -1.2808506488800049, |
|
"logps/chosen": -463.95367431640625, |
|
"logps/rejected": -530.5446166992188, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7879329919815063, |
|
"rewards/margins": 0.9349812269210815, |
|
"rewards/rejected": -2.722913980484009, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 14.625, |
|
"learning_rate": 3.9404833730564975e-06, |
|
"logits/chosen": -1.3400425910949707, |
|
"logits/rejected": -1.2239243984222412, |
|
"logps/chosen": -414.04168701171875, |
|
"logps/rejected": -493.5077209472656, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.6094110012054443, |
|
"rewards/margins": 0.910406768321991, |
|
"rewards/rejected": -2.51981782913208, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 13.0, |
|
"learning_rate": 3.921752275415712e-06, |
|
"logits/chosen": -1.4123733043670654, |
|
"logits/rejected": -1.379097580909729, |
|
"logps/chosen": -400.0645751953125, |
|
"logps/rejected": -482.3004455566406, |
|
"loss": 0.455, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.495286464691162, |
|
"rewards/margins": 1.028262734413147, |
|
"rewards/rejected": -2.5235490798950195, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 6.40625, |
|
"learning_rate": 3.902902461869079e-06, |
|
"logits/chosen": -1.3998125791549683, |
|
"logits/rejected": -1.2797114849090576, |
|
"logps/chosen": -421.95794677734375, |
|
"logps/rejected": -507.14361572265625, |
|
"loss": 0.5415, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.8244426250457764, |
|
"rewards/margins": 1.0165250301361084, |
|
"rewards/rejected": -2.840967893600464, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 15.8125, |
|
"learning_rate": 3.883935506370605e-06, |
|
"logits/chosen": -1.4051461219787598, |
|
"logits/rejected": -1.2663236856460571, |
|
"logps/chosen": -432.9169921875, |
|
"logps/rejected": -484.6170349121094, |
|
"loss": 0.5752, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.7967593669891357, |
|
"rewards/margins": 0.7953070402145386, |
|
"rewards/rejected": -2.592066526412964, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 3.864852992655617e-06, |
|
"logits/chosen": -1.5188504457473755, |
|
"logits/rejected": -1.4224086999893188, |
|
"logps/chosen": -385.0553283691406, |
|
"logps/rejected": -460.64166259765625, |
|
"loss": 0.4617, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.364206075668335, |
|
"rewards/margins": 0.8787292242050171, |
|
"rewards/rejected": -2.2429351806640625, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 6.0, |
|
"learning_rate": 3.845656514108516e-06, |
|
"logits/chosen": -1.4730474948883057, |
|
"logits/rejected": -1.3063628673553467, |
|
"logps/chosen": -420.05364990234375, |
|
"logps/rejected": -448.61663818359375, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.609230637550354, |
|
"rewards/margins": 0.8354890942573547, |
|
"rewards/rejected": -2.4447197914123535, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 15.6875, |
|
"learning_rate": 3.826347673629738e-06, |
|
"logits/chosen": -1.447205901145935, |
|
"logits/rejected": -1.2630943059921265, |
|
"logps/chosen": -382.7901916503906, |
|
"logps/rejected": -455.2850036621094, |
|
"loss": 0.4846, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.2948672771453857, |
|
"rewards/margins": 0.9875162243843079, |
|
"rewards/rejected": -2.282383441925049, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 13.4375, |
|
"learning_rate": 3.8069280835019062e-06, |
|
"logits/chosen": -1.4306355714797974, |
|
"logits/rejected": -1.2892208099365234, |
|
"logps/chosen": -402.4939880371094, |
|
"logps/rejected": -487.1109313964844, |
|
"loss": 0.4862, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.38181734085083, |
|
"rewards/margins": 1.0380725860595703, |
|
"rewards/rejected": -2.4198899269104004, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": -1.4015111923217773, |
|
"eval_logits/rejected": -1.2794849872589111, |
|
"eval_logps/chosen": -401.4261779785156, |
|
"eval_logps/rejected": -472.0146179199219, |
|
"eval_loss": 0.5161935091018677, |
|
"eval_rewards/accuracies": 0.7354999780654907, |
|
"eval_rewards/chosen": -1.3677420616149902, |
|
"eval_rewards/margins": 0.9063177704811096, |
|
"eval_rewards/rejected": -2.274059534072876, |
|
"eval_runtime": 384.9141, |
|
"eval_samples_per_second": 5.196, |
|
"eval_steps_per_second": 0.649, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 3.7873993652552077e-06, |
|
"logits/chosen": -1.4077152013778687, |
|
"logits/rejected": -1.3199503421783447, |
|
"logps/chosen": -359.19488525390625, |
|
"logps/rejected": -424.65576171875, |
|
"loss": 0.6047, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2548080682754517, |
|
"rewards/margins": 0.7129807472229004, |
|
"rewards/rejected": -1.9677889347076416, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 3.7677631495319953e-06, |
|
"logits/chosen": -1.5366017818450928, |
|
"logits/rejected": -1.420841932296753, |
|
"logps/chosen": -355.3591613769531, |
|
"logps/rejected": -406.9905700683594, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9167426228523254, |
|
"rewards/margins": 0.6963993310928345, |
|
"rewards/rejected": -1.6131420135498047, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 6.15625, |
|
"learning_rate": 3.748021075950633e-06, |
|
"logits/chosen": -1.5663089752197266, |
|
"logits/rejected": -1.4497790336608887, |
|
"logps/chosen": -371.51312255859375, |
|
"logps/rejected": -410.3604431152344, |
|
"loss": 0.5946, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.9686979055404663, |
|
"rewards/margins": 0.49481868743896484, |
|
"rewards/rejected": -1.4635167121887207, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 3.7281747929685824e-06, |
|
"logits/chosen": -1.4247326850891113, |
|
"logits/rejected": -1.265855073928833, |
|
"logps/chosen": -353.05194091796875, |
|
"logps/rejected": -399.3148498535156, |
|
"loss": 0.548, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.1539690494537354, |
|
"rewards/margins": 0.6195784211158752, |
|
"rewards/rejected": -1.7735474109649658, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 3.7082259577447604e-06, |
|
"logits/chosen": -1.5184131860733032, |
|
"logits/rejected": -1.4079376459121704, |
|
"logps/chosen": -389.82550048828125, |
|
"logps/rejected": -445.1400451660156, |
|
"loss": 0.4885, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.1863467693328857, |
|
"rewards/margins": 0.7531275749206543, |
|
"rewards/rejected": -1.9394744634628296, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 3.6881762360011688e-06, |
|
"logits/chosen": -1.5098861455917358, |
|
"logits/rejected": -1.317479133605957, |
|
"logps/chosen": -411.7156677246094, |
|
"logps/rejected": -458.67218017578125, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.3021931648254395, |
|
"rewards/margins": 0.8395140767097473, |
|
"rewards/rejected": -2.141706943511963, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 10.8125, |
|
"learning_rate": 3.668027301883802e-06, |
|
"logits/chosen": -1.4269211292266846, |
|
"logits/rejected": -1.2615479230880737, |
|
"logps/chosen": -402.62603759765625, |
|
"logps/rejected": -482.184326171875, |
|
"loss": 0.511, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.5126349925994873, |
|
"rewards/margins": 0.9446732401847839, |
|
"rewards/rejected": -2.457308053970337, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 6.09375, |
|
"learning_rate": 3.64778083782286e-06, |
|
"logits/chosen": -1.2994117736816406, |
|
"logits/rejected": -1.2819687128067017, |
|
"logps/chosen": -454.22711181640625, |
|
"logps/rejected": -568.9495239257812, |
|
"loss": 0.5489, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.0162034034729004, |
|
"rewards/margins": 0.8572282791137695, |
|
"rewards/rejected": -2.87343168258667, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 8.125, |
|
"learning_rate": 3.627438534392268e-06, |
|
"logits/chosen": -1.4073131084442139, |
|
"logits/rejected": -1.3753129243850708, |
|
"logps/chosen": -438.55255126953125, |
|
"logps/rejected": -532.357421875, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0050759315490723, |
|
"rewards/margins": 0.8708721399307251, |
|
"rewards/rejected": -2.875947952270508, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 10.5625, |
|
"learning_rate": 3.607002090168506e-06, |
|
"logits/chosen": -1.2787964344024658, |
|
"logits/rejected": -1.2062056064605713, |
|
"logps/chosen": -478.2181091308594, |
|
"logps/rejected": -532.1177978515625, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.139094829559326, |
|
"rewards/margins": 0.7530891299247742, |
|
"rewards/rejected": -2.892183780670166, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -1.2717995643615723, |
|
"eval_logits/rejected": -1.1533604860305786, |
|
"eval_logps/chosen": -445.6515197753906, |
|
"eval_logps/rejected": -514.567138671875, |
|
"eval_loss": 0.5072752833366394, |
|
"eval_rewards/accuracies": 0.7365000247955322, |
|
"eval_rewards/chosen": -1.809995174407959, |
|
"eval_rewards/margins": 0.889590322971344, |
|
"eval_rewards/rejected": -2.6995856761932373, |
|
"eval_runtime": 385.2379, |
|
"eval_samples_per_second": 5.192, |
|
"eval_steps_per_second": 0.649, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 4.8125, |
|
"learning_rate": 3.586473211588787e-06, |
|
"logits/chosen": -1.3733545541763306, |
|
"logits/rejected": -1.2681185007095337, |
|
"logps/chosen": -407.07623291015625, |
|
"logps/rejected": -509.69683837890625, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.6238105297088623, |
|
"rewards/margins": 0.9594193696975708, |
|
"rewards/rejected": -2.5832300186157227, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 11.125, |
|
"learning_rate": 3.5658536128085623e-06, |
|
"logits/chosen": -1.3982038497924805, |
|
"logits/rejected": -1.2271344661712646, |
|
"logps/chosen": -460.24951171875, |
|
"logps/rejected": -503.8080139160156, |
|
"loss": 0.595, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.9733803272247314, |
|
"rewards/margins": 0.7311316728591919, |
|
"rewards/rejected": -2.704512119293213, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 8.625, |
|
"learning_rate": 3.545145015558399e-06, |
|
"logits/chosen": -1.1945741176605225, |
|
"logits/rejected": -1.1713488101959229, |
|
"logps/chosen": -412.6747131347656, |
|
"logps/rejected": -492.372802734375, |
|
"loss": 0.5028, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8103736639022827, |
|
"rewards/margins": 0.9391372799873352, |
|
"rewards/rejected": -2.7495107650756836, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 8.9375, |
|
"learning_rate": 3.5243491490002056e-06, |
|
"logits/chosen": -1.3308615684509277, |
|
"logits/rejected": -1.2446686029434204, |
|
"logps/chosen": -433.484375, |
|
"logps/rejected": -507.3377990722656, |
|
"loss": 0.5688, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7979129552841187, |
|
"rewards/margins": 0.7903792262077332, |
|
"rewards/rejected": -2.588292360305786, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 3.503467749582857e-06, |
|
"logits/chosen": -1.378259301185608, |
|
"logits/rejected": -1.1882727146148682, |
|
"logps/chosen": -412.93560791015625, |
|
"logps/rejected": -446.2088317871094, |
|
"loss": 0.5722, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.6171245574951172, |
|
"rewards/margins": 0.6728037595748901, |
|
"rewards/rejected": -2.2899281978607178, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 14.125, |
|
"learning_rate": 3.4825025608971947e-06, |
|
"logits/chosen": -1.2760392427444458, |
|
"logits/rejected": -1.2017720937728882, |
|
"logps/chosen": -379.2555847167969, |
|
"logps/rejected": -455.60791015625, |
|
"loss": 0.5323, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.573118805885315, |
|
"rewards/margins": 0.7255537509918213, |
|
"rewards/rejected": -2.2986724376678467, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 7.34375, |
|
"learning_rate": 3.4614553335304407e-06, |
|
"logits/chosen": -1.3151836395263672, |
|
"logits/rejected": -1.113488793373108, |
|
"logps/chosen": -440.9947814941406, |
|
"logps/rejected": -505.5106506347656, |
|
"loss": 0.4714, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.7208404541015625, |
|
"rewards/margins": 0.9629707336425781, |
|
"rewards/rejected": -2.6838109493255615, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 3.4403278249200222e-06, |
|
"logits/chosen": -1.289880633354187, |
|
"logits/rejected": -1.0922951698303223, |
|
"logps/chosen": -470.2997131347656, |
|
"logps/rejected": -540.0161743164062, |
|
"loss": 0.4409, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8769254684448242, |
|
"rewards/margins": 1.1163800954818726, |
|
"rewards/rejected": -2.9933059215545654, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 15.75, |
|
"learning_rate": 3.4191217992068293e-06, |
|
"logits/chosen": -1.3650540113449097, |
|
"logits/rejected": -1.1904373168945312, |
|
"logps/chosen": -491.87060546875, |
|
"logps/rejected": -539.9581298828125, |
|
"loss": 0.5323, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.137281656265259, |
|
"rewards/margins": 0.9366267323493958, |
|
"rewards/rejected": -3.0739083290100098, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 3.3978390270879056e-06, |
|
"logits/chosen": -1.273272156715393, |
|
"logits/rejected": -1.1826374530792236, |
|
"logps/chosen": -441.7779235839844, |
|
"logps/rejected": -540.5211791992188, |
|
"loss": 0.5147, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.3176229000091553, |
|
"rewards/margins": 0.9195470809936523, |
|
"rewards/rejected": -3.2371699810028076, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_logits/chosen": -1.269109845161438, |
|
"eval_logits/rejected": -1.146828293800354, |
|
"eval_logps/chosen": -491.4620666503906, |
|
"eval_logps/rejected": -566.2828979492188, |
|
"eval_loss": 0.5000255107879639, |
|
"eval_rewards/accuracies": 0.734000027179718, |
|
"eval_rewards/chosen": -2.2681005001068115, |
|
"eval_rewards/margins": 0.9486428499221802, |
|
"eval_rewards/rejected": -3.2167434692382812, |
|
"eval_runtime": 385.0866, |
|
"eval_samples_per_second": 5.194, |
|
"eval_steps_per_second": 0.649, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 11.6875, |
|
"learning_rate": 3.3764812856685995e-06, |
|
"logits/chosen": -1.3418161869049072, |
|
"logits/rejected": -1.3261712789535522, |
|
"logps/chosen": -440.2900390625, |
|
"logps/rejected": -541.0260009765625, |
|
"loss": 0.5252, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1573386192321777, |
|
"rewards/margins": 0.8722022175788879, |
|
"rewards/rejected": -3.029540777206421, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 10.875, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"logits/chosen": -1.4454267024993896, |
|
"logits/rejected": -1.311650276184082, |
|
"logps/chosen": -485.03411865234375, |
|
"logps/rejected": -569.6810913085938, |
|
"loss": 0.4849, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.1618409156799316, |
|
"rewards/margins": 0.9978164434432983, |
|
"rewards/rejected": -3.1596572399139404, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 8.375, |
|
"learning_rate": 3.3335480345008907e-06, |
|
"logits/chosen": -1.2839902639389038, |
|
"logits/rejected": -1.1861859560012817, |
|
"logps/chosen": -466.77850341796875, |
|
"logps/rejected": -553.5386352539062, |
|
"loss": 0.4622, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0662953853607178, |
|
"rewards/margins": 1.1144917011260986, |
|
"rewards/rejected": -3.1807870864868164, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 8.0, |
|
"learning_rate": 3.3119761096666055e-06, |
|
"logits/chosen": -1.3106259107589722, |
|
"logits/rejected": -1.1651959419250488, |
|
"logps/chosen": -498.0738830566406, |
|
"logps/rejected": -552.7239379882812, |
|
"loss": 0.5547, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.230546236038208, |
|
"rewards/margins": 0.8497620820999146, |
|
"rewards/rejected": -3.080308437347412, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 7.6875, |
|
"learning_rate": 3.290336385060832e-06, |
|
"logits/chosen": -1.493554949760437, |
|
"logits/rejected": -1.2929532527923584, |
|
"logps/chosen": -479.22259521484375, |
|
"logps/rejected": -548.3055419921875, |
|
"loss": 0.55, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.3036773204803467, |
|
"rewards/margins": 0.8925528526306152, |
|
"rewards/rejected": -3.196229934692383, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 3.268630667594348e-06, |
|
"logits/chosen": -1.355196237564087, |
|
"logits/rejected": -1.3183298110961914, |
|
"logps/chosen": -460.26336669921875, |
|
"logps/rejected": -523.685546875, |
|
"loss": 0.5176, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.046140670776367, |
|
"rewards/margins": 0.8765950202941895, |
|
"rewards/rejected": -2.9227356910705566, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 11.125, |
|
"learning_rate": 3.2468607696883147e-06, |
|
"logits/chosen": -1.3625749349594116, |
|
"logits/rejected": -1.311535358428955, |
|
"logps/chosen": -489.18017578125, |
|
"logps/rejected": -587.8863525390625, |
|
"loss": 0.4934, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.360048294067383, |
|
"rewards/margins": 0.9435898065567017, |
|
"rewards/rejected": -3.303637742996216, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 7.78125, |
|
"learning_rate": 3.225028509122944e-06, |
|
"logits/chosen": -1.397005319595337, |
|
"logits/rejected": -1.2728253602981567, |
|
"logps/chosen": -486.8643493652344, |
|
"logps/rejected": -561.2532958984375, |
|
"loss": 0.5211, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.503471851348877, |
|
"rewards/margins": 0.8570526838302612, |
|
"rewards/rejected": -3.3605244159698486, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 11.6875, |
|
"learning_rate": 3.2031357088857083e-06, |
|
"logits/chosen": -1.3312914371490479, |
|
"logits/rejected": -1.2595702409744263, |
|
"logps/chosen": -561.8858032226562, |
|
"logps/rejected": -660.8182373046875, |
|
"loss": 0.5043, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.8668177127838135, |
|
"rewards/margins": 1.0241832733154297, |
|
"rewards/rejected": -3.8910012245178223, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 3.181184197019127e-06, |
|
"logits/chosen": -1.1215088367462158, |
|
"logits/rejected": -1.0118662118911743, |
|
"logps/chosen": -525.9521484375, |
|
"logps/rejected": -697.3963623046875, |
|
"loss": 0.4809, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.957373857498169, |
|
"rewards/margins": 1.4523636102676392, |
|
"rewards/rejected": -4.409738063812256, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": -1.1786177158355713, |
|
"eval_logits/rejected": -1.0616753101348877, |
|
"eval_logps/chosen": -557.43115234375, |
|
"eval_logps/rejected": -643.640869140625, |
|
"eval_loss": 0.5022104382514954, |
|
"eval_rewards/accuracies": 0.7404999732971191, |
|
"eval_rewards/chosen": -2.9277913570404053, |
|
"eval_rewards/margins": 1.062530517578125, |
|
"eval_rewards/rejected": -3.9903218746185303, |
|
"eval_runtime": 384.8251, |
|
"eval_samples_per_second": 5.197, |
|
"eval_steps_per_second": 0.65, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 18.125, |
|
"learning_rate": 3.159175806468126e-06, |
|
"logits/chosen": -1.1367595195770264, |
|
"logits/rejected": -0.9490365982055664, |
|
"logps/chosen": -545.4899291992188, |
|
"logps/rejected": -620.2122192382812, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.9681437015533447, |
|
"rewards/margins": 1.0490918159484863, |
|
"rewards/rejected": -4.01723575592041, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 13.375, |
|
"learning_rate": 3.1371123749269804e-06, |
|
"logits/chosen": -1.2076561450958252, |
|
"logits/rejected": -1.135667085647583, |
|
"logps/chosen": -596.1036376953125, |
|
"logps/rejected": -664.8118896484375, |
|
"loss": 0.5596, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.1270740032196045, |
|
"rewards/margins": 0.847625732421875, |
|
"rewards/rejected": -3.9746997356414795, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 11.6875, |
|
"learning_rate": 3.114995744685877e-06, |
|
"logits/chosen": -1.1738382577896118, |
|
"logits/rejected": -1.146437644958496, |
|
"logps/chosen": -529.6216430664062, |
|
"logps/rejected": -603.9373168945312, |
|
"loss": 0.5267, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.8229541778564453, |
|
"rewards/margins": 0.8735902905464172, |
|
"rewards/rejected": -3.696544647216797, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 6.34375, |
|
"learning_rate": 3.0928277624770743e-06, |
|
"logits/chosen": -1.3653886318206787, |
|
"logits/rejected": -1.2098249197006226, |
|
"logps/chosen": -532.6870727539062, |
|
"logps/rejected": -613.7505493164062, |
|
"loss": 0.5049, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4794299602508545, |
|
"rewards/margins": 1.0948512554168701, |
|
"rewards/rejected": -3.5742812156677246, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 6.625, |
|
"learning_rate": 3.070610279320708e-06, |
|
"logits/chosen": -1.3816752433776855, |
|
"logits/rejected": -1.2150719165802002, |
|
"logps/chosen": -521.9651489257812, |
|
"logps/rejected": -601.0781860351562, |
|
"loss": 0.4669, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3488364219665527, |
|
"rewards/margins": 1.0314748287200928, |
|
"rewards/rejected": -3.3803107738494873, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 3.0483451503702264e-06, |
|
"logits/chosen": -1.3038969039916992, |
|
"logits/rejected": -1.2319445610046387, |
|
"logps/chosen": -547.4259033203125, |
|
"logps/rejected": -617.3253784179688, |
|
"loss": 0.5618, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.6133077144622803, |
|
"rewards/margins": 0.8994197845458984, |
|
"rewards/rejected": -3.5127272605895996, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 3.0260342347574916e-06, |
|
"logits/chosen": -1.2965396642684937, |
|
"logits/rejected": -1.1523797512054443, |
|
"logps/chosen": -519.9957275390625, |
|
"logps/rejected": -625.9295654296875, |
|
"loss": 0.4402, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.4765942096710205, |
|
"rewards/margins": 1.2496535778045654, |
|
"rewards/rejected": -3.726247787475586, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 3.0036793954375358e-06, |
|
"logits/chosen": -1.2782443761825562, |
|
"logits/rejected": -1.1259523630142212, |
|
"logps/chosen": -547.7828979492188, |
|
"logps/rejected": -630.2535400390625, |
|
"loss": 0.4395, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.7768635749816895, |
|
"rewards/margins": 1.2470468282699585, |
|
"rewards/rejected": -4.0239105224609375, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 11.5, |
|
"learning_rate": 2.981282499033009e-06, |
|
"logits/chosen": -1.278181791305542, |
|
"logits/rejected": -1.1554654836654663, |
|
"logps/chosen": -553.5909423828125, |
|
"logps/rejected": -634.082275390625, |
|
"loss": 0.5183, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.772510051727295, |
|
"rewards/margins": 1.059287667274475, |
|
"rewards/rejected": -3.8317978382110596, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 2.9588454156783163e-06, |
|
"logits/chosen": -1.327986717224121, |
|
"logits/rejected": -1.165433645248413, |
|
"logps/chosen": -511.99090576171875, |
|
"logps/rejected": -616.3585815429688, |
|
"loss": 0.46, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.3063502311706543, |
|
"rewards/margins": 1.2612559795379639, |
|
"rewards/rejected": -3.5676064491271973, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_logits/chosen": -1.2252681255340576, |
|
"eval_logits/rejected": -1.1040537357330322, |
|
"eval_logps/chosen": -507.9823303222656, |
|
"eval_logps/rejected": -594.7523193359375, |
|
"eval_loss": 0.5002806782722473, |
|
"eval_rewards/accuracies": 0.7354999780654907, |
|
"eval_rewards/chosen": -2.433302879333496, |
|
"eval_rewards/margins": 1.0681343078613281, |
|
"eval_rewards/rejected": -3.501437187194824, |
|
"eval_runtime": 384.8766, |
|
"eval_samples_per_second": 5.196, |
|
"eval_steps_per_second": 0.65, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 2.9363700188634597e-06, |
|
"logits/chosen": -1.2988775968551636, |
|
"logits/rejected": -1.167811632156372, |
|
"logps/chosen": -534.3869018554688, |
|
"logps/rejected": -595.1586303710938, |
|
"loss": 0.518, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.6931681632995605, |
|
"rewards/margins": 0.9779523611068726, |
|
"rewards/rejected": -3.6711204051971436, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 11.3125, |
|
"learning_rate": 2.9138581852776053e-06, |
|
"logits/chosen": -1.2570379972457886, |
|
"logits/rejected": -1.1531012058258057, |
|
"logps/chosen": -555.2855834960938, |
|
"logps/rejected": -654.2891845703125, |
|
"loss": 0.508, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.955744981765747, |
|
"rewards/margins": 1.1195967197418213, |
|
"rewards/rejected": -4.075342178344727, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 2.8913117946523805e-06, |
|
"logits/chosen": -1.280539631843567, |
|
"logits/rejected": -1.100694179534912, |
|
"logps/chosen": -573.8317260742188, |
|
"logps/rejected": -636.535400390625, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -3.0741798877716064, |
|
"rewards/margins": 1.0079572200775146, |
|
"rewards/rejected": -4.082137107849121, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 2.8687327296049126e-06, |
|
"logits/chosen": -1.2726280689239502, |
|
"logits/rejected": -1.171382188796997, |
|
"logps/chosen": -556.9118041992188, |
|
"logps/rejected": -646.0772705078125, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.977890968322754, |
|
"rewards/margins": 0.9889172315597534, |
|
"rewards/rejected": -3.966808319091797, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 13.25, |
|
"learning_rate": 2.8461228754806376e-06, |
|
"logits/chosen": -1.3368163108825684, |
|
"logits/rejected": -1.172978401184082, |
|
"logps/chosen": -542.0377807617188, |
|
"logps/rejected": -597.8560180664062, |
|
"loss": 0.5274, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.6624741554260254, |
|
"rewards/margins": 0.8161913752555847, |
|
"rewards/rejected": -3.478665590286255, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 2.823484120195865e-06, |
|
"logits/chosen": -1.4352657794952393, |
|
"logits/rejected": -1.227199912071228, |
|
"logps/chosen": -520.835693359375, |
|
"logps/rejected": -587.2822265625, |
|
"loss": 0.4585, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.4302444458007812, |
|
"rewards/margins": 1.0082801580429077, |
|
"rewards/rejected": -3.4385247230529785, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 2.8008183540801486e-06, |
|
"logits/chosen": -1.293084979057312, |
|
"logits/rejected": -1.148153305053711, |
|
"logps/chosen": -520.2894897460938, |
|
"logps/rejected": -565.23681640625, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.4665493965148926, |
|
"rewards/margins": 0.9034391641616821, |
|
"rewards/rejected": -3.369988203048706, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 13.1875, |
|
"learning_rate": 2.7781274697184353e-06, |
|
"logits/chosen": -1.1424802541732788, |
|
"logits/rejected": -1.187720775604248, |
|
"logps/chosen": -492.9554138183594, |
|
"logps/rejected": -617.7970581054688, |
|
"loss": 0.5349, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.6359786987304688, |
|
"rewards/margins": 1.0452814102172852, |
|
"rewards/rejected": -3.681259870529175, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 9.375, |
|
"learning_rate": 2.7554133617930397e-06, |
|
"logits/chosen": -1.2500625848770142, |
|
"logits/rejected": -1.1256784200668335, |
|
"logps/chosen": -501.5577087402344, |
|
"logps/rejected": -588.8922119140625, |
|
"loss": 0.5168, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.537215232849121, |
|
"rewards/margins": 1.0240715742111206, |
|
"rewards/rejected": -3.5612869262695312, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 11.625, |
|
"learning_rate": 2.7326779269254363e-06, |
|
"logits/chosen": -1.436962366104126, |
|
"logits/rejected": -1.266498327255249, |
|
"logps/chosen": -528.1736450195312, |
|
"logps/rejected": -578.77734375, |
|
"loss": 0.477, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.388388156890869, |
|
"rewards/margins": 1.0654609203338623, |
|
"rewards/rejected": -3.4538490772247314, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -1.2391676902770996, |
|
"eval_logits/rejected": -1.1185089349746704, |
|
"eval_logps/chosen": -503.76922607421875, |
|
"eval_logps/rejected": -583.5771484375, |
|
"eval_loss": 0.4988709092140198, |
|
"eval_rewards/accuracies": 0.734499990940094, |
|
"eval_rewards/chosen": -2.39117169380188, |
|
"eval_rewards/margins": 0.9985132813453674, |
|
"eval_rewards/rejected": -3.3896851539611816, |
|
"eval_runtime": 385.1549, |
|
"eval_samples_per_second": 5.193, |
|
"eval_steps_per_second": 0.649, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 10.375, |
|
"learning_rate": 2.7099230635178954e-06, |
|
"logits/chosen": -1.280256748199463, |
|
"logits/rejected": -1.239262342453003, |
|
"logps/chosen": -499.21240234375, |
|
"logps/rejected": -584.2531127929688, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.3436267375946045, |
|
"rewards/margins": 0.9151015281677246, |
|
"rewards/rejected": -3.25872802734375, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 10.625, |
|
"learning_rate": 2.6871506715949608e-06, |
|
"logits/chosen": -1.4013721942901611, |
|
"logits/rejected": -1.2793995141983032, |
|
"logps/chosen": -463.5269470214844, |
|
"logps/rejected": -541.9952392578125, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.1055784225463867, |
|
"rewards/margins": 0.965211033821106, |
|
"rewards/rejected": -3.070789337158203, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 10.25, |
|
"learning_rate": 2.6643626526448063e-06, |
|
"logits/chosen": -1.4540785551071167, |
|
"logits/rejected": -1.2950793504714966, |
|
"logps/chosen": -521.9766235351562, |
|
"logps/rejected": -591.3455810546875, |
|
"loss": 0.4591, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.2707479000091553, |
|
"rewards/margins": 1.1174595355987549, |
|
"rewards/rejected": -3.388207197189331, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 12.375, |
|
"learning_rate": 2.6415609094604562e-06, |
|
"logits/chosen": -1.2611262798309326, |
|
"logits/rejected": -1.2067164182662964, |
|
"logps/chosen": -539.65869140625, |
|
"logps/rejected": -629.5203857421875, |
|
"loss": 0.4464, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.6909213066101074, |
|
"rewards/margins": 1.0888901948928833, |
|
"rewards/rejected": -3.7798118591308594, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 14.375, |
|
"learning_rate": 2.618747345980904e-06, |
|
"logits/chosen": -1.2389599084854126, |
|
"logits/rejected": -1.0517133474349976, |
|
"logps/chosen": -593.0328369140625, |
|
"logps/rejected": -635.1866455078125, |
|
"loss": 0.5624, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -3.446354627609253, |
|
"rewards/margins": 0.9264065027236938, |
|
"rewards/rejected": -4.3727617263793945, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 6.5, |
|
"learning_rate": 2.595923867132136e-06, |
|
"logits/chosen": -1.2825162410736084, |
|
"logits/rejected": -1.1602712869644165, |
|
"logps/chosen": -608.6810302734375, |
|
"logps/rejected": -699.3939819335938, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.277606964111328, |
|
"rewards/margins": 1.153955101966858, |
|
"rewards/rejected": -4.4315619468688965, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 8.75, |
|
"learning_rate": 2.5730923786680672e-06, |
|
"logits/chosen": -1.2274243831634521, |
|
"logits/rejected": -1.191007375717163, |
|
"logps/chosen": -544.41259765625, |
|
"logps/rejected": -638.9494018554688, |
|
"loss": 0.5467, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.92881441116333, |
|
"rewards/margins": 0.869024932384491, |
|
"rewards/rejected": -3.7978389263153076, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 7.875, |
|
"learning_rate": 2.5502547870114137e-06, |
|
"logits/chosen": -1.3184901475906372, |
|
"logits/rejected": -1.196045994758606, |
|
"logps/chosen": -512.1152954101562, |
|
"logps/rejected": -571.6860961914062, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.5673513412475586, |
|
"rewards/margins": 0.8982425928115845, |
|
"rewards/rejected": -3.4655938148498535, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 10.5625, |
|
"learning_rate": 2.527412999094507e-06, |
|
"logits/chosen": -1.3197405338287354, |
|
"logits/rejected": -1.1518932580947876, |
|
"logps/chosen": -544.2307739257812, |
|
"logps/rejected": -638.2955932617188, |
|
"loss": 0.4778, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.50474214553833, |
|
"rewards/margins": 1.0661590099334717, |
|
"rewards/rejected": -3.5709011554718018, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 2.504568922200064e-06, |
|
"logits/chosen": -1.283879041671753, |
|
"logits/rejected": -1.1339181661605835, |
|
"logps/chosen": -479.8946838378906, |
|
"logps/rejected": -564.1932373046875, |
|
"loss": 0.5068, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.385560989379883, |
|
"rewards/margins": 1.0045907497406006, |
|
"rewards/rejected": -3.3901519775390625, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_logits/chosen": -1.2462238073349, |
|
"eval_logits/rejected": -1.125494360923767, |
|
"eval_logps/chosen": -512.4297485351562, |
|
"eval_logps/rejected": -591.323974609375, |
|
"eval_loss": 0.4939311146736145, |
|
"eval_rewards/accuracies": 0.7429999709129333, |
|
"eval_rewards/chosen": -2.4777767658233643, |
|
"eval_rewards/margins": 0.9893770217895508, |
|
"eval_rewards/rejected": -3.467153787612915, |
|
"eval_runtime": 385.17, |
|
"eval_samples_per_second": 5.193, |
|
"eval_steps_per_second": 0.649, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 2.4817244638019333e-06, |
|
"logits/chosen": -1.3495204448699951, |
|
"logits/rejected": -1.1980758905410767, |
|
"logps/chosen": -514.2600708007812, |
|
"logps/rejected": -565.2801513671875, |
|
"loss": 0.5135, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.384481906890869, |
|
"rewards/margins": 0.922328770160675, |
|
"rewards/rejected": -3.3068108558654785, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 14.3125, |
|
"learning_rate": 2.4588815314058155e-06, |
|
"logits/chosen": -1.3099550008773804, |
|
"logits/rejected": -1.2511496543884277, |
|
"logps/chosen": -468.06011962890625, |
|
"logps/rejected": -523.5824584960938, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.224961757659912, |
|
"rewards/margins": 0.8980560302734375, |
|
"rewards/rejected": -3.1230177879333496, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 9.75, |
|
"learning_rate": 2.4360420323899922e-06, |
|
"logits/chosen": -1.353991985321045, |
|
"logits/rejected": -1.2306454181671143, |
|
"logps/chosen": -505.89434814453125, |
|
"logps/rejected": -550.9930419921875, |
|
"loss": 0.5674, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.3008170127868652, |
|
"rewards/margins": 0.7767833471298218, |
|
"rewards/rejected": -3.0776004791259766, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 2.4132078738460585e-06, |
|
"logits/chosen": -1.3921695947647095, |
|
"logits/rejected": -1.2415539026260376, |
|
"logps/chosen": -491.42401123046875, |
|
"logps/rejected": -556.8810424804688, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.2695276737213135, |
|
"rewards/margins": 1.0401204824447632, |
|
"rewards/rejected": -3.309648036956787, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 13.4375, |
|
"learning_rate": 2.3903809624196826e-06, |
|
"logits/chosen": -1.3411505222320557, |
|
"logits/rejected": -1.2057361602783203, |
|
"logps/chosen": -456.32452392578125, |
|
"logps/rejected": -508.445068359375, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2262067794799805, |
|
"rewards/margins": 0.8460358381271362, |
|
"rewards/rejected": -3.072242498397827, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 2.3675632041513978e-06, |
|
"logits/chosen": -1.4614931344985962, |
|
"logits/rejected": -1.2260310649871826, |
|
"logps/chosen": -524.8610229492188, |
|
"logps/rejected": -565.4326171875, |
|
"loss": 0.4894, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.378574848175049, |
|
"rewards/margins": 1.0525071620941162, |
|
"rewards/rejected": -3.431082248687744, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 2.3447565043174533e-06, |
|
"logits/chosen": -1.3028042316436768, |
|
"logits/rejected": -1.1499183177947998, |
|
"logps/chosen": -515.6001586914062, |
|
"logps/rejected": -565.5277099609375, |
|
"loss": 0.5241, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.5728909969329834, |
|
"rewards/margins": 0.8863977193832397, |
|
"rewards/rejected": -3.4592888355255127, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 2.321962767270724e-06, |
|
"logits/chosen": -1.3512235879898071, |
|
"logits/rejected": -1.2086089849472046, |
|
"logps/chosen": -495.2906188964844, |
|
"logps/rejected": -538.8243408203125, |
|
"loss": 0.5573, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.4533779621124268, |
|
"rewards/margins": 0.8070129156112671, |
|
"rewards/rejected": -3.2603907585144043, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 10.0, |
|
"learning_rate": 2.299183896281692e-06, |
|
"logits/chosen": -1.301710844039917, |
|
"logits/rejected": -1.1697108745574951, |
|
"logps/chosen": -466.3893127441406, |
|
"logps/rejected": -546.2555541992188, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.1145732402801514, |
|
"rewards/margins": 0.8218411207199097, |
|
"rewards/rejected": -2.9364142417907715, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 7.34375, |
|
"learning_rate": 2.2764217933795297e-06, |
|
"logits/chosen": -1.4019851684570312, |
|
"logits/rejected": -1.2783384323120117, |
|
"logps/chosen": -460.39227294921875, |
|
"logps/rejected": -538.6397705078125, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.9896026849746704, |
|
"rewards/margins": 0.9877565503120422, |
|
"rewards/rejected": -2.9773590564727783, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -1.289854884147644, |
|
"eval_logits/rejected": -1.1670362949371338, |
|
"eval_logps/chosen": -477.1521911621094, |
|
"eval_logps/rejected": -549.7868041992188, |
|
"eval_loss": 0.49245789647102356, |
|
"eval_rewards/accuracies": 0.7425000071525574, |
|
"eval_rewards/chosen": -2.125001907348633, |
|
"eval_rewards/margins": 0.9267801642417908, |
|
"eval_rewards/rejected": -3.05178165435791, |
|
"eval_runtime": 385.1303, |
|
"eval_samples_per_second": 5.193, |
|
"eval_steps_per_second": 0.649, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 5.1875, |
|
"learning_rate": 2.2536783591932786e-06, |
|
"logits/chosen": -1.4467527866363525, |
|
"logits/rejected": -1.2898051738739014, |
|
"logps/chosen": -501.9493103027344, |
|
"logps/rejected": -568.07080078125, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.26446270942688, |
|
"rewards/margins": 0.842617392539978, |
|
"rewards/rejected": -3.1070799827575684, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 7.84375, |
|
"learning_rate": 2.230955492793149e-06, |
|
"logits/chosen": -1.2303822040557861, |
|
"logits/rejected": -1.1834524869918823, |
|
"logps/chosen": -536.91796875, |
|
"logps/rejected": -603.58203125, |
|
"loss": 0.5935, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.6312527656555176, |
|
"rewards/margins": 0.7955335378646851, |
|
"rewards/rejected": -3.4267868995666504, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 5.6875, |
|
"learning_rate": 2.208255091531947e-06, |
|
"logits/chosen": -1.2445331811904907, |
|
"logits/rejected": -1.1615046262741089, |
|
"logps/chosen": -523.9738159179688, |
|
"logps/rejected": -601.7839965820312, |
|
"loss": 0.4818, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.469764232635498, |
|
"rewards/margins": 1.127774953842163, |
|
"rewards/rejected": -3.597539186477661, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 11.75, |
|
"learning_rate": 2.1855790508866435e-06, |
|
"logits/chosen": -1.3009926080703735, |
|
"logits/rejected": -1.1936320066452026, |
|
"logps/chosen": -551.2839965820312, |
|
"logps/rejected": -635.8419799804688, |
|
"loss": 0.5122, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.5426268577575684, |
|
"rewards/margins": 1.0221750736236572, |
|
"rewards/rejected": -3.5648021697998047, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 7.0, |
|
"learning_rate": 2.162929264300107e-06, |
|
"logits/chosen": -1.313072919845581, |
|
"logits/rejected": -1.2196762561798096, |
|
"logps/chosen": -495.29840087890625, |
|
"logps/rejected": -598.8929443359375, |
|
"loss": 0.4195, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2633450031280518, |
|
"rewards/margins": 1.2595245838165283, |
|
"rewards/rejected": -3.52286958694458, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 12.0, |
|
"learning_rate": 2.1403076230230006e-06, |
|
"logits/chosen": -1.2646925449371338, |
|
"logits/rejected": -1.1446959972381592, |
|
"logps/chosen": -531.4093017578125, |
|
"logps/rejected": -583.4620971679688, |
|
"loss": 0.587, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.6022684574127197, |
|
"rewards/margins": 0.7986178994178772, |
|
"rewards/rejected": -3.4008865356445312, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 11.625, |
|
"learning_rate": 2.11771601595586e-06, |
|
"logits/chosen": -1.3460241556167603, |
|
"logits/rejected": -1.232742428779602, |
|
"logps/chosen": -530.1009521484375, |
|
"logps/rejected": -569.1173095703125, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.4805283546447754, |
|
"rewards/margins": 0.9129024744033813, |
|
"rewards/rejected": -3.3934311866760254, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 16.625, |
|
"learning_rate": 2.0951563294913737e-06, |
|
"logits/chosen": -1.344582438468933, |
|
"logits/rejected": -1.1410505771636963, |
|
"logps/chosen": -493.6297912597656, |
|
"logps/rejected": -556.1669921875, |
|
"loss": 0.4651, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.3209660053253174, |
|
"rewards/margins": 0.930306613445282, |
|
"rewards/rejected": -3.251272678375244, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 2.0726304473568693e-06, |
|
"logits/chosen": -1.3250610828399658, |
|
"logits/rejected": -1.207024097442627, |
|
"logps/chosen": -501.9657287597656, |
|
"logps/rejected": -565.3271484375, |
|
"loss": 0.4841, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.4302685260772705, |
|
"rewards/margins": 0.9568912386894226, |
|
"rewards/rejected": -3.387159824371338, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 10.25, |
|
"learning_rate": 2.050140250457023e-06, |
|
"logits/chosen": -1.4138681888580322, |
|
"logits/rejected": -1.1992824077606201, |
|
"logps/chosen": -557.7728881835938, |
|
"logps/rejected": -629.088623046875, |
|
"loss": 0.4731, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.802109479904175, |
|
"rewards/margins": 1.0537548065185547, |
|
"rewards/rejected": -3.8558642864227295, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_logits/chosen": -1.2155396938323975, |
|
"eval_logits/rejected": -1.095304250717163, |
|
"eval_logps/chosen": -552.5741577148438, |
|
"eval_logps/rejected": -645.44482421875, |
|
"eval_loss": 0.49232217669487, |
|
"eval_rewards/accuracies": 0.7434999942779541, |
|
"eval_rewards/chosen": -2.8792214393615723, |
|
"eval_rewards/margins": 1.129140853881836, |
|
"eval_rewards/rejected": -4.008362770080566, |
|
"eval_runtime": 385.2143, |
|
"eval_samples_per_second": 5.192, |
|
"eval_steps_per_second": 0.649, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 14.0625, |
|
"learning_rate": 2.0276876167168042e-06, |
|
"logits/chosen": -1.1646645069122314, |
|
"logits/rejected": -1.0743215084075928, |
|
"logps/chosen": -514.7222900390625, |
|
"logps/rejected": -580.4427490234375, |
|
"loss": 0.5834, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.9022274017333984, |
|
"rewards/margins": 0.9829545021057129, |
|
"rewards/rejected": -3.8851819038391113, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 2.0052744209246682e-06, |
|
"logits/chosen": -1.3135536909103394, |
|
"logits/rejected": -1.1998984813690186, |
|
"logps/chosen": -542.7693481445312, |
|
"logps/rejected": -606.01123046875, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.874311685562134, |
|
"rewards/margins": 0.9611810445785522, |
|
"rewards/rejected": -3.8354930877685547, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 9.625, |
|
"learning_rate": 1.9829025345760127e-06, |
|
"logits/chosen": -1.3124678134918213, |
|
"logits/rejected": -1.2832801342010498, |
|
"logps/chosen": -549.1907958984375, |
|
"logps/rejected": -632.5858764648438, |
|
"loss": 0.5333, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.7018771171569824, |
|
"rewards/margins": 0.8947007060050964, |
|
"rewards/rejected": -3.5965774059295654, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 7.65625, |
|
"learning_rate": 1.9605738257169115e-06, |
|
"logits/chosen": -1.2838572263717651, |
|
"logits/rejected": -1.117290735244751, |
|
"logps/chosen": -497.5326232910156, |
|
"logps/rejected": -604.8740234375, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.6455423831939697, |
|
"rewards/margins": 1.1532337665557861, |
|
"rewards/rejected": -3.798776149749756, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 1.9382901587881275e-06, |
|
"logits/chosen": -1.3377434015274048, |
|
"logits/rejected": -1.2184029817581177, |
|
"logps/chosen": -514.0582275390625, |
|
"logps/rejected": -602.654052734375, |
|
"loss": 0.4292, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.5530195236206055, |
|
"rewards/margins": 1.208957552909851, |
|
"rewards/rejected": -3.761976957321167, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 10.5625, |
|
"learning_rate": 1.916053394469437e-06, |
|
"logits/chosen": -1.3620846271514893, |
|
"logits/rejected": -1.1589324474334717, |
|
"logps/chosen": -535.8505859375, |
|
"logps/rejected": -625.4491577148438, |
|
"loss": 0.5293, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.725583791732788, |
|
"rewards/margins": 1.0414365530014038, |
|
"rewards/rejected": -3.7670199871063232, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 11.3125, |
|
"learning_rate": 1.8938653895242604e-06, |
|
"logits/chosen": -1.3228267431259155, |
|
"logits/rejected": -1.1428587436676025, |
|
"logps/chosen": -536.3853759765625, |
|
"logps/rejected": -627.5452880859375, |
|
"loss": 0.441, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.720797300338745, |
|
"rewards/margins": 1.1999356746673584, |
|
"rewards/rejected": -3.9207332134246826, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 10.125, |
|
"learning_rate": 1.8717279966446267e-06, |
|
"logits/chosen": -1.1800651550292969, |
|
"logits/rejected": -1.102126955986023, |
|
"logps/chosen": -539.4421997070312, |
|
"logps/rejected": -641.0511474609375, |
|
"loss": 0.4566, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.90867018699646, |
|
"rewards/margins": 1.1115381717681885, |
|
"rewards/rejected": -4.020208358764648, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 10.125, |
|
"learning_rate": 1.8496430642964698e-06, |
|
"logits/chosen": -1.258175015449524, |
|
"logits/rejected": -1.1534559726715088, |
|
"logps/chosen": -557.5374755859375, |
|
"logps/rejected": -637.3475341796875, |
|
"loss": 0.51, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.8512537479400635, |
|
"rewards/margins": 1.0346183776855469, |
|
"rewards/rejected": -3.8858723640441895, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 8.75, |
|
"learning_rate": 1.827612436565286e-06, |
|
"logits/chosen": -1.2754342555999756, |
|
"logits/rejected": -1.123130440711975, |
|
"logps/chosen": -543.8443603515625, |
|
"logps/rejected": -633.3651123046875, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.808797836303711, |
|
"rewards/margins": 1.1008532047271729, |
|
"rewards/rejected": -3.909651279449463, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -1.1977647542953491, |
|
"eval_logits/rejected": -1.0794349908828735, |
|
"eval_logps/chosen": -549.680419921875, |
|
"eval_logps/rejected": -637.0914306640625, |
|
"eval_loss": 0.4923146665096283, |
|
"eval_rewards/accuracies": 0.7419999837875366, |
|
"eval_rewards/chosen": -2.8502840995788574, |
|
"eval_rewards/margins": 1.0745435953140259, |
|
"eval_rewards/rejected": -3.9248275756835938, |
|
"eval_runtime": 385.0636, |
|
"eval_samples_per_second": 5.194, |
|
"eval_steps_per_second": 0.649, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 13.875, |
|
"learning_rate": 1.8056379530021492e-06, |
|
"logits/chosen": -1.3143008947372437, |
|
"logits/rejected": -1.2356500625610352, |
|
"logps/chosen": -539.9703979492188, |
|
"logps/rejected": -599.3643188476562, |
|
"loss": 0.5312, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.9577202796936035, |
|
"rewards/margins": 0.8420518040657043, |
|
"rewards/rejected": -3.799771785736084, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 1.7837214484701154e-06, |
|
"logits/chosen": -1.3325443267822266, |
|
"logits/rejected": -1.2115572690963745, |
|
"logps/chosen": -515.3961181640625, |
|
"logps/rejected": -601.1583862304688, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.630460262298584, |
|
"rewards/margins": 1.091335415840149, |
|
"rewards/rejected": -3.7217955589294434, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 13.9375, |
|
"learning_rate": 1.7618647529910043e-06, |
|
"logits/chosen": -1.3422627449035645, |
|
"logits/rejected": -1.2155346870422363, |
|
"logps/chosen": -517.1422119140625, |
|
"logps/rejected": -613.8555908203125, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.6036880016326904, |
|
"rewards/margins": 1.0863001346588135, |
|
"rewards/rejected": -3.689988613128662, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 9.25, |
|
"learning_rate": 1.7400696915925996e-06, |
|
"logits/chosen": -1.3564714193344116, |
|
"logits/rejected": -1.1683833599090576, |
|
"logps/chosen": -539.3397216796875, |
|
"logps/rejected": -584.2203979492188, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.687293529510498, |
|
"rewards/margins": 1.0315442085266113, |
|
"rewards/rejected": -3.7188377380371094, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 11.125, |
|
"learning_rate": 1.718338084156254e-06, |
|
"logits/chosen": -1.3139379024505615, |
|
"logits/rejected": -1.1639807224273682, |
|
"logps/chosen": -541.3829956054688, |
|
"logps/rejected": -613.3155517578125, |
|
"loss": 0.4505, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.545919179916382, |
|
"rewards/margins": 1.1031758785247803, |
|
"rewards/rejected": -3.649095058441162, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 10.8125, |
|
"learning_rate": 1.6966717452649372e-06, |
|
"logits/chosen": -1.4163377285003662, |
|
"logits/rejected": -1.2610633373260498, |
|
"logps/chosen": -529.8837890625, |
|
"logps/rejected": -588.6536254882812, |
|
"loss": 0.4533, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.5294137001037598, |
|
"rewards/margins": 1.1063209772109985, |
|
"rewards/rejected": -3.6357345581054688, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 7.78125, |
|
"learning_rate": 1.6750724840517103e-06, |
|
"logits/chosen": -1.3619472980499268, |
|
"logits/rejected": -1.2863503694534302, |
|
"logps/chosen": -506.430908203125, |
|
"logps/rejected": -603.09228515625, |
|
"loss": 0.5196, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.5362462997436523, |
|
"rewards/margins": 0.925518810749054, |
|
"rewards/rejected": -3.4617652893066406, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 14.375, |
|
"learning_rate": 1.6535421040486686e-06, |
|
"logits/chosen": -1.175429105758667, |
|
"logits/rejected": -1.0819575786590576, |
|
"logps/chosen": -522.57373046875, |
|
"logps/rejected": -610.5762939453125, |
|
"loss": 0.4362, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.69221830368042, |
|
"rewards/margins": 1.2225408554077148, |
|
"rewards/rejected": -3.914759874343872, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 12.25, |
|
"learning_rate": 1.6320824030363458e-06, |
|
"logits/chosen": -1.2581863403320312, |
|
"logits/rejected": -1.1994664669036865, |
|
"logps/chosen": -505.96783447265625, |
|
"logps/rejected": -609.1953735351562, |
|
"loss": 0.4515, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.7113680839538574, |
|
"rewards/margins": 1.186835527420044, |
|
"rewards/rejected": -3.8982033729553223, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 13.75, |
|
"learning_rate": 1.6106951728936028e-06, |
|
"logits/chosen": -1.3734843730926514, |
|
"logits/rejected": -1.2433956861495972, |
|
"logps/chosen": -518.4763793945312, |
|
"logps/rejected": -614.0827026367188, |
|
"loss": 0.4983, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.6127266883850098, |
|
"rewards/margins": 1.034812569618225, |
|
"rewards/rejected": -3.6475391387939453, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -1.2522040605545044, |
|
"eval_logits/rejected": -1.1292414665222168, |
|
"eval_logps/chosen": -521.7777709960938, |
|
"eval_logps/rejected": -610.1890258789062, |
|
"eval_loss": 0.49058130383491516, |
|
"eval_rewards/accuracies": 0.7409999966621399, |
|
"eval_rewards/chosen": -2.5712568759918213, |
|
"eval_rewards/margins": 1.0845470428466797, |
|
"eval_rewards/rejected": -3.655803918838501, |
|
"eval_runtime": 384.7732, |
|
"eval_samples_per_second": 5.198, |
|
"eval_steps_per_second": 0.65, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 1.5893821994479996e-06, |
|
"logits/chosen": -1.372878909111023, |
|
"logits/rejected": -1.2597870826721191, |
|
"logps/chosen": -519.8887939453125, |
|
"logps/rejected": -593.8539428710938, |
|
"loss": 0.476, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.424806594848633, |
|
"rewards/margins": 1.1252799034118652, |
|
"rewards/rejected": -3.550086259841919, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 9.0625, |
|
"learning_rate": 1.5681452623266868e-06, |
|
"logits/chosen": -1.347572684288025, |
|
"logits/rejected": -1.115192174911499, |
|
"logps/chosen": -546.6536254882812, |
|
"logps/rejected": -608.205078125, |
|
"loss": 0.478, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.5760815143585205, |
|
"rewards/margins": 1.198232889175415, |
|
"rewards/rejected": -3.7743141651153564, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 1.5469861348078014e-06, |
|
"logits/chosen": -1.3562158346176147, |
|
"logits/rejected": -1.2117723226547241, |
|
"logps/chosen": -505.29254150390625, |
|
"logps/rejected": -614.58251953125, |
|
"loss": 0.4407, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.5852229595184326, |
|
"rewards/margins": 1.1992766857147217, |
|
"rewards/rejected": -3.784499406814575, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 1.5259065836724035e-06, |
|
"logits/chosen": -1.2109121084213257, |
|
"logits/rejected": -1.152276635169983, |
|
"logps/chosen": -509.5875549316406, |
|
"logps/rejected": -634.964111328125, |
|
"loss": 0.4268, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.6477839946746826, |
|
"rewards/margins": 1.3151264190673828, |
|
"rewards/rejected": -3.9629104137420654, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 19.375, |
|
"learning_rate": 1.5049083690569456e-06, |
|
"logits/chosen": -1.2700594663619995, |
|
"logits/rejected": -1.166520118713379, |
|
"logps/chosen": -509.182861328125, |
|
"logps/rejected": -621.1192626953125, |
|
"loss": 0.5163, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.71712589263916, |
|
"rewards/margins": 1.1634531021118164, |
|
"rewards/rejected": -3.8805785179138184, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 16.5, |
|
"learning_rate": 1.4839932443063057e-06, |
|
"logits/chosen": -1.275468349456787, |
|
"logits/rejected": -1.1098088026046753, |
|
"logps/chosen": -555.3331909179688, |
|
"logps/rejected": -615.7780151367188, |
|
"loss": 0.4743, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.6335442066192627, |
|
"rewards/margins": 1.167301058769226, |
|
"rewards/rejected": -3.8008453845977783, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 18.0, |
|
"learning_rate": 1.4631629558273803e-06, |
|
"logits/chosen": -1.2889525890350342, |
|
"logits/rejected": -1.1872795820236206, |
|
"logps/chosen": -510.55615234375, |
|
"logps/rejected": -586.6162109375, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.688310146331787, |
|
"rewards/margins": 0.8770621418952942, |
|
"rewards/rejected": -3.5653719902038574, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 6.71875, |
|
"learning_rate": 1.4424192429432657e-06, |
|
"logits/chosen": -1.359438419342041, |
|
"logits/rejected": -1.2795076370239258, |
|
"logps/chosen": -480.8011779785156, |
|
"logps/rejected": -599.15966796875, |
|
"loss": 0.4647, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.249246120452881, |
|
"rewards/margins": 1.150412917137146, |
|
"rewards/rejected": -3.3996593952178955, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 13.1875, |
|
"learning_rate": 1.421763837748016e-06, |
|
"logits/chosen": -1.326791763305664, |
|
"logits/rejected": -1.2331459522247314, |
|
"logps/chosen": -485.2764587402344, |
|
"logps/rejected": -594.4434814453125, |
|
"loss": 0.4524, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.346505641937256, |
|
"rewards/margins": 1.1767139434814453, |
|
"rewards/rejected": -3.523219585418701, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 10.8125, |
|
"learning_rate": 1.401198464962021e-06, |
|
"logits/chosen": -1.3617570400238037, |
|
"logits/rejected": -1.1875524520874023, |
|
"logps/chosen": -524.5842895507812, |
|
"logps/rejected": -588.7896728515625, |
|
"loss": 0.4746, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.5600085258483887, |
|
"rewards/margins": 1.0283123254776, |
|
"rewards/rejected": -3.5883209705352783, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": -1.2491270303726196, |
|
"eval_logits/rejected": -1.1266547441482544, |
|
"eval_logps/chosen": -523.2234497070312, |
|
"eval_logps/rejected": -616.9339599609375, |
|
"eval_loss": 0.4946673512458801, |
|
"eval_rewards/accuracies": 0.7365000247955322, |
|
"eval_rewards/chosen": -2.585714340209961, |
|
"eval_rewards/margins": 1.1375384330749512, |
|
"eval_rewards/rejected": -3.723253011703491, |
|
"eval_runtime": 385.1919, |
|
"eval_samples_per_second": 5.192, |
|
"eval_steps_per_second": 0.649, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 1.3807248417879896e-06, |
|
"logits/chosen": -1.3990509510040283, |
|
"logits/rejected": -1.2910901308059692, |
|
"logps/chosen": -524.749267578125, |
|
"logps/rejected": -631.2271728515625, |
|
"loss": 0.445, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.5168325901031494, |
|
"rewards/margins": 1.2660022974014282, |
|
"rewards/rejected": -3.782834529876709, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 25.875, |
|
"learning_rate": 1.3603446777675665e-06, |
|
"logits/chosen": -1.2434417009353638, |
|
"logits/rejected": -1.1283738613128662, |
|
"logps/chosen": -539.6519165039062, |
|
"logps/rejected": -630.5535888671875, |
|
"loss": 0.5282, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.7645440101623535, |
|
"rewards/margins": 1.129831314086914, |
|
"rewards/rejected": -3.8943753242492676, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 7.5625, |
|
"learning_rate": 1.3400596746385817e-06, |
|
"logits/chosen": -1.3770835399627686, |
|
"logits/rejected": -1.216672658920288, |
|
"logps/chosen": -541.1361694335938, |
|
"logps/rejected": -622.9951171875, |
|
"loss": 0.5016, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.7195262908935547, |
|
"rewards/margins": 1.0894376039505005, |
|
"rewards/rejected": -3.8089637756347656, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 1.3198715261929587e-06, |
|
"logits/chosen": -1.344639539718628, |
|
"logits/rejected": -1.1973941326141357, |
|
"logps/chosen": -521.10888671875, |
|
"logps/rejected": -628.2103881835938, |
|
"loss": 0.4222, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.8465256690979004, |
|
"rewards/margins": 1.2073593139648438, |
|
"rewards/rejected": -4.053884983062744, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 7.96875, |
|
"learning_rate": 1.2997819181352823e-06, |
|
"logits/chosen": -1.3569964170455933, |
|
"logits/rejected": -1.2025775909423828, |
|
"logps/chosen": -566.9078369140625, |
|
"logps/rejected": -691.9054565429688, |
|
"loss": 0.4043, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.7007460594177246, |
|
"rewards/margins": 1.4950422048568726, |
|
"rewards/rejected": -4.195788383483887, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 16.375, |
|
"learning_rate": 1.2797925279420454e-06, |
|
"logits/chosen": -1.3312625885009766, |
|
"logits/rejected": -1.1907614469528198, |
|
"logps/chosen": -577.4212646484375, |
|
"logps/rejected": -690.229248046875, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.055377244949341, |
|
"rewards/margins": 1.2482696771621704, |
|
"rewards/rejected": -4.303646564483643, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 16.875, |
|
"learning_rate": 1.2599050247215764e-06, |
|
"logits/chosen": -1.2753608226776123, |
|
"logits/rejected": -1.1736326217651367, |
|
"logps/chosen": -555.798828125, |
|
"logps/rejected": -654.16357421875, |
|
"loss": 0.4766, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.978670835494995, |
|
"rewards/margins": 1.214051365852356, |
|
"rewards/rejected": -4.192722320556641, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 12.0, |
|
"learning_rate": 1.2401210690746705e-06, |
|
"logits/chosen": -1.3060388565063477, |
|
"logits/rejected": -1.1588232517242432, |
|
"logps/chosen": -556.8359985351562, |
|
"logps/rejected": -636.2288818359375, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.8967931270599365, |
|
"rewards/margins": 1.125984787940979, |
|
"rewards/rejected": -4.022777557373047, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 10.625, |
|
"learning_rate": 1.2204423129559306e-06, |
|
"logits/chosen": -1.3615459203720093, |
|
"logits/rejected": -1.3014076948165894, |
|
"logps/chosen": -538.6215209960938, |
|
"logps/rejected": -644.1961669921875, |
|
"loss": 0.5168, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.731085777282715, |
|
"rewards/margins": 1.0968948602676392, |
|
"rewards/rejected": -3.8279807567596436, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 15.1875, |
|
"learning_rate": 1.20087039953583e-06, |
|
"logits/chosen": -1.375808596611023, |
|
"logits/rejected": -1.252746820449829, |
|
"logps/chosen": -531.059326171875, |
|
"logps/rejected": -624.744140625, |
|
"loss": 0.514, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.668408155441284, |
|
"rewards/margins": 1.2005492448806763, |
|
"rewards/rejected": -3.86895751953125, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_logits/chosen": -1.2462804317474365, |
|
"eval_logits/rejected": -1.1248236894607544, |
|
"eval_logps/chosen": -534.3994140625, |
|
"eval_logps/rejected": -625.0958251953125, |
|
"eval_loss": 0.4923916161060333, |
|
"eval_rewards/accuracies": 0.7354999780654907, |
|
"eval_rewards/chosen": -2.6974740028381348, |
|
"eval_rewards/margins": 1.1073981523513794, |
|
"eval_rewards/rejected": -3.8048720359802246, |
|
"eval_runtime": 385.0439, |
|
"eval_samples_per_second": 5.194, |
|
"eval_steps_per_second": 0.649, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 13.125, |
|
"learning_rate": 1.181406963063507e-06, |
|
"logits/chosen": -1.2778210639953613, |
|
"logits/rejected": -1.228360652923584, |
|
"logps/chosen": -523.0855102539062, |
|
"logps/rejected": -629.9219970703125, |
|
"loss": 0.5097, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.572385787963867, |
|
"rewards/margins": 1.0744374990463257, |
|
"rewards/rejected": -3.6468231678009033, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 6.6875, |
|
"learning_rate": 1.1620536287303052e-06, |
|
"logits/chosen": -1.3865063190460205, |
|
"logits/rejected": -1.2557927370071411, |
|
"logps/chosen": -545.7744750976562, |
|
"logps/rejected": -609.2724609375, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.5462071895599365, |
|
"rewards/margins": 0.9365339279174805, |
|
"rewards/rejected": -3.482741117477417, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 1.1428120125340717e-06, |
|
"logits/chosen": -1.3251538276672363, |
|
"logits/rejected": -1.1808980703353882, |
|
"logps/chosen": -494.53924560546875, |
|
"logps/rejected": -603.8756103515625, |
|
"loss": 0.3923, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.404003143310547, |
|
"rewards/margins": 1.5079169273376465, |
|
"rewards/rejected": -3.9119198322296143, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 10.125, |
|
"learning_rate": 1.123683721144223e-06, |
|
"logits/chosen": -1.319456696510315, |
|
"logits/rejected": -1.213781714439392, |
|
"logps/chosen": -539.8772583007812, |
|
"logps/rejected": -638.1966552734375, |
|
"loss": 0.44, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.5620384216308594, |
|
"rewards/margins": 1.3065942525863647, |
|
"rewards/rejected": -3.8686325550079346, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 6.25, |
|
"learning_rate": 1.1046703517675848e-06, |
|
"logits/chosen": -1.3422720432281494, |
|
"logits/rejected": -1.2605860233306885, |
|
"logps/chosen": -512.2991943359375, |
|
"logps/rejected": -620.3077392578125, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.5718300342559814, |
|
"rewards/margins": 1.018854022026062, |
|
"rewards/rejected": -3.590684175491333, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 10.75, |
|
"learning_rate": 1.085773492015028e-06, |
|
"logits/chosen": -1.3229783773422241, |
|
"logits/rejected": -1.1519359350204468, |
|
"logps/chosen": -497.25701904296875, |
|
"logps/rejected": -590.8815307617188, |
|
"loss": 0.4271, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.508775472640991, |
|
"rewards/margins": 1.2793452739715576, |
|
"rewards/rejected": -3.788120985031128, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 32.0, |
|
"learning_rate": 1.0669947197689034e-06, |
|
"logits/chosen": -1.2877874374389648, |
|
"logits/rejected": -1.1616923809051514, |
|
"logps/chosen": -543.9298095703125, |
|
"logps/rejected": -625.6560668945312, |
|
"loss": 0.487, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.712940216064453, |
|
"rewards/margins": 1.1042835712432861, |
|
"rewards/rejected": -3.8172237873077393, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 1.048335603051291e-06, |
|
"logits/chosen": -1.282389521598816, |
|
"logits/rejected": -1.1512477397918701, |
|
"logps/chosen": -572.5489501953125, |
|
"logps/rejected": -676.9873046875, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.8474509716033936, |
|
"rewards/margins": 1.3263962268829346, |
|
"rewards/rejected": -4.173847198486328, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 9.0, |
|
"learning_rate": 1.0297976998930665e-06, |
|
"logits/chosen": -1.2781507968902588, |
|
"logits/rejected": -1.1678388118743896, |
|
"logps/chosen": -534.2879638671875, |
|
"logps/rejected": -643.2774047851562, |
|
"loss": 0.4393, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.7367353439331055, |
|
"rewards/margins": 1.3677116632461548, |
|
"rewards/rejected": -4.104446887969971, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 1.0113825582038078e-06, |
|
"logits/chosen": -1.3029879331588745, |
|
"logits/rejected": -1.196803092956543, |
|
"logps/chosen": -556.0444946289062, |
|
"logps/rejected": -652.0103149414062, |
|
"loss": 0.4662, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.9077467918395996, |
|
"rewards/margins": 1.127124547958374, |
|
"rewards/rejected": -4.0348711013793945, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -1.2345499992370605, |
|
"eval_logits/rejected": -1.1134350299835205, |
|
"eval_logps/chosen": -547.6557006835938, |
|
"eval_logps/rejected": -641.2913208007812, |
|
"eval_loss": 0.4899207055568695, |
|
"eval_rewards/accuracies": 0.7379999756813049, |
|
"eval_rewards/chosen": -2.830036163330078, |
|
"eval_rewards/margins": 1.1367909908294678, |
|
"eval_rewards/rejected": -3.966827154159546, |
|
"eval_runtime": 384.9651, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 0.649, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 9.930917156425477e-07, |
|
"logits/chosen": -1.2949634790420532, |
|
"logits/rejected": -1.183593988418579, |
|
"logps/chosen": -563.5440673828125, |
|
"logps/rejected": -668.3243408203125, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.0242040157318115, |
|
"rewards/margins": 1.0766557455062866, |
|
"rewards/rejected": -4.100859642028809, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 19.0, |
|
"learning_rate": 9.749266994893756e-07, |
|
"logits/chosen": -1.2192089557647705, |
|
"logits/rejected": -1.0985405445098877, |
|
"logps/chosen": -531.9083251953125, |
|
"logps/rejected": -606.6322021484375, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.9121012687683105, |
|
"rewards/margins": 0.8815471529960632, |
|
"rewards/rejected": -3.7936484813690186, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 12.125, |
|
"learning_rate": 9.56889026517913e-07, |
|
"logits/chosen": -1.2642897367477417, |
|
"logits/rejected": -1.1569067239761353, |
|
"logps/chosen": -561.394287109375, |
|
"logps/rejected": -641.754638671875, |
|
"loss": 0.5072, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.0005598068237305, |
|
"rewards/margins": 1.063594102859497, |
|
"rewards/rejected": -4.064153671264648, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 7.40625, |
|
"learning_rate": 9.389802028686617e-07, |
|
"logits/chosen": -1.3579823970794678, |
|
"logits/rejected": -1.2555077075958252, |
|
"logps/chosen": -551.67626953125, |
|
"logps/rejected": -596.185546875, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.90908145904541, |
|
"rewards/margins": 0.7748203277587891, |
|
"rewards/rejected": -3.6839020252227783, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 9.75, |
|
"learning_rate": 9.212017239232427e-07, |
|
"logits/chosen": -1.2956401109695435, |
|
"logits/rejected": -1.1352595090866089, |
|
"logps/chosen": -550.6188354492188, |
|
"logps/rejected": -647.8556518554688, |
|
"loss": 0.4704, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.7766430377960205, |
|
"rewards/margins": 1.2017085552215576, |
|
"rewards/rejected": -3.97835111618042, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 9.875, |
|
"learning_rate": 9.03555074179533e-07, |
|
"logits/chosen": -1.2600593566894531, |
|
"logits/rejected": -1.2393784523010254, |
|
"logps/chosen": -524.3843994140625, |
|
"logps/rejected": -654.7698364257812, |
|
"loss": 0.4337, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.6760458946228027, |
|
"rewards/margins": 1.2455599308013916, |
|
"rewards/rejected": -3.921605348587036, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 20.5, |
|
"learning_rate": 8.860417271277067e-07, |
|
"logits/chosen": -1.3854873180389404, |
|
"logits/rejected": -1.3558924198150635, |
|
"logps/chosen": -545.82568359375, |
|
"logps/rejected": -628.0182495117188, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.765906810760498, |
|
"rewards/margins": 0.901807963848114, |
|
"rewards/rejected": -3.6677145957946777, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 10.125, |
|
"learning_rate": 8.686631451272029e-07, |
|
"logits/chosen": -1.3561471700668335, |
|
"logits/rejected": -1.2010211944580078, |
|
"logps/chosen": -551.3495483398438, |
|
"logps/rejected": -639.118896484375, |
|
"loss": 0.5022, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.979217052459717, |
|
"rewards/margins": 1.1320674419403076, |
|
"rewards/rejected": -4.1112847328186035, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 8.514207792846168e-07, |
|
"logits/chosen": -1.3641732931137085, |
|
"logits/rejected": -1.2438944578170776, |
|
"logps/chosen": -541.0029296875, |
|
"logps/rejected": -626.8678588867188, |
|
"loss": 0.487, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.901430130004883, |
|
"rewards/margins": 1.142988681793213, |
|
"rewards/rejected": -4.044418811798096, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 8.5, |
|
"learning_rate": 8.343160693325356e-07, |
|
"logits/chosen": -1.2573918104171753, |
|
"logits/rejected": -1.1431939601898193, |
|
"logps/chosen": -554.5100708007812, |
|
"logps/rejected": -662.68212890625, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.98957896232605, |
|
"rewards/margins": 1.1248613595962524, |
|
"rewards/rejected": -4.11444091796875, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_logits/chosen": -1.2396172285079956, |
|
"eval_logits/rejected": -1.1188315153121948, |
|
"eval_logps/chosen": -558.570556640625, |
|
"eval_logps/rejected": -650.9627075195312, |
|
"eval_loss": 0.48732802271842957, |
|
"eval_rewards/accuracies": 0.7404999732971191, |
|
"eval_rewards/chosen": -2.9391860961914062, |
|
"eval_rewards/margins": 1.1243551969528198, |
|
"eval_rewards/rejected": -4.063540935516357, |
|
"eval_runtime": 385.3295, |
|
"eval_samples_per_second": 5.19, |
|
"eval_steps_per_second": 0.649, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 8.173504435093174e-07, |
|
"logits/chosen": -1.252179741859436, |
|
"logits/rejected": -1.0778075456619263, |
|
"logps/chosen": -531.073974609375, |
|
"logps/rejected": -619.1007690429688, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.895054817199707, |
|
"rewards/margins": 1.2014925479888916, |
|
"rewards/rejected": -4.096547603607178, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 6.84375, |
|
"learning_rate": 8.00525318439836e-07, |
|
"logits/chosen": -1.2942620515823364, |
|
"logits/rejected": -1.1525405645370483, |
|
"logps/chosen": -569.043701171875, |
|
"logps/rejected": -657.7420043945312, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.9221444129943848, |
|
"rewards/margins": 0.9633318185806274, |
|
"rewards/rejected": -3.8854763507843018, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 7.53125, |
|
"learning_rate": 7.838420990171927e-07, |
|
"logits/chosen": -1.3769783973693848, |
|
"logits/rejected": -1.217556357383728, |
|
"logps/chosen": -552.2919921875, |
|
"logps/rejected": -631.7188720703125, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.8292160034179688, |
|
"rewards/margins": 1.050167202949524, |
|
"rewards/rejected": -3.879383087158203, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 9.5, |
|
"learning_rate": 7.673021782854084e-07, |
|
"logits/chosen": -1.2488492727279663, |
|
"logits/rejected": -1.1089154481887817, |
|
"logps/chosen": -549.6131591796875, |
|
"logps/rejected": -629.2005615234375, |
|
"loss": 0.4792, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.8892455101013184, |
|
"rewards/margins": 1.214680790901184, |
|
"rewards/rejected": -4.103926658630371, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 11.75, |
|
"learning_rate": 7.509069373231039e-07, |
|
"logits/chosen": -1.259916067123413, |
|
"logits/rejected": -1.1467456817626953, |
|
"logps/chosen": -547.0595092773438, |
|
"logps/rejected": -607.7587280273438, |
|
"loss": 0.5723, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.9437592029571533, |
|
"rewards/margins": 0.854836106300354, |
|
"rewards/rejected": -3.798595428466797, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 7.34375, |
|
"learning_rate": 7.346577451281822e-07, |
|
"logits/chosen": -1.275743007659912, |
|
"logits/rejected": -1.1921640634536743, |
|
"logps/chosen": -545.425537109375, |
|
"logps/rejected": -653.1339111328125, |
|
"loss": 0.4519, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.832059383392334, |
|
"rewards/margins": 1.3181660175323486, |
|
"rewards/rejected": -4.150225639343262, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 12.25, |
|
"learning_rate": 7.185559585035138e-07, |
|
"logits/chosen": -1.3098807334899902, |
|
"logits/rejected": -1.1533119678497314, |
|
"logps/chosen": -584.9642333984375, |
|
"logps/rejected": -682.4730224609375, |
|
"loss": 0.4797, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -3.021878957748413, |
|
"rewards/margins": 1.133847951889038, |
|
"rewards/rejected": -4.155727386474609, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 8.625, |
|
"learning_rate": 7.026029219436504e-07, |
|
"logits/chosen": -1.3365461826324463, |
|
"logits/rejected": -1.1761207580566406, |
|
"logps/chosen": -542.1203002929688, |
|
"logps/rejected": -646.118896484375, |
|
"loss": 0.4723, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.8982977867126465, |
|
"rewards/margins": 1.1679728031158447, |
|
"rewards/rejected": -4.0662713050842285, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 7.0, |
|
"learning_rate": 6.867999675225523e-07, |
|
"logits/chosen": -1.3771815299987793, |
|
"logits/rejected": -1.2472676038742065, |
|
"logps/chosen": -512.2825317382812, |
|
"logps/rejected": -608.7750854492188, |
|
"loss": 0.487, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.8136465549468994, |
|
"rewards/margins": 1.1070338487625122, |
|
"rewards/rejected": -3.920680284500122, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 10.6875, |
|
"learning_rate": 6.711484147823663e-07, |
|
"logits/chosen": -1.2860959768295288, |
|
"logits/rejected": -1.2111051082611084, |
|
"logps/chosen": -506.64581298828125, |
|
"logps/rejected": -628.4481811523438, |
|
"loss": 0.4758, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.7242074012756348, |
|
"rewards/margins": 1.1695196628570557, |
|
"rewards/rejected": -3.8937268257141113, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -1.2526096105575562, |
|
"eval_logits/rejected": -1.1318107843399048, |
|
"eval_logps/chosen": -550.865478515625, |
|
"eval_logps/rejected": -638.7723999023438, |
|
"eval_loss": 0.4866448938846588, |
|
"eval_rewards/accuracies": 0.7409999966621399, |
|
"eval_rewards/chosen": -2.8621349334716797, |
|
"eval_rewards/margins": 1.079501986503601, |
|
"eval_rewards/rejected": -3.9416370391845703, |
|
"eval_runtime": 385.0884, |
|
"eval_samples_per_second": 5.194, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 10.625, |
|
"learning_rate": 6.556495706232413e-07, |
|
"logits/chosen": -1.2896664142608643, |
|
"logits/rejected": -1.1979024410247803, |
|
"logps/chosen": -560.0714721679688, |
|
"logps/rejected": -646.5289916992188, |
|
"loss": 0.5296, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.9137609004974365, |
|
"rewards/margins": 1.0487867593765259, |
|
"rewards/rejected": -3.9625473022460938, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 6.403047291942057e-07, |
|
"logits/chosen": -1.2192307710647583, |
|
"logits/rejected": -1.0712454319000244, |
|
"logps/chosen": -515.818115234375, |
|
"logps/rejected": -601.6507568359375, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.895503520965576, |
|
"rewards/margins": 1.1022310256958008, |
|
"rewards/rejected": -3.997734785079956, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 12.6875, |
|
"learning_rate": 6.251151717851023e-07, |
|
"logits/chosen": -1.2880637645721436, |
|
"logits/rejected": -1.2091928720474243, |
|
"logps/chosen": -509.5738220214844, |
|
"logps/rejected": -608.5218505859375, |
|
"loss": 0.4853, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.7617316246032715, |
|
"rewards/margins": 1.1223886013031006, |
|
"rewards/rejected": -3.884120464324951, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 6.40625, |
|
"learning_rate": 6.100821667196041e-07, |
|
"logits/chosen": -1.4694463014602661, |
|
"logits/rejected": -1.2010104656219482, |
|
"logps/chosen": -551.3878173828125, |
|
"logps/rejected": -589.3790283203125, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.7574946880340576, |
|
"rewards/margins": 1.0347812175750732, |
|
"rewards/rejected": -3.792275905609131, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 29.5, |
|
"learning_rate": 5.952069692493062e-07, |
|
"logits/chosen": -1.2609448432922363, |
|
"logits/rejected": -1.1505969762802124, |
|
"logps/chosen": -498.6568908691406, |
|
"logps/rejected": -627.9306640625, |
|
"loss": 0.4171, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.703416347503662, |
|
"rewards/margins": 1.2583777904510498, |
|
"rewards/rejected": -3.961793899536133, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 10.625, |
|
"learning_rate": 5.80490821448918e-07, |
|
"logits/chosen": -1.216658353805542, |
|
"logits/rejected": -1.2167049646377563, |
|
"logps/chosen": -540.7564086914062, |
|
"logps/rejected": -711.563232421875, |
|
"loss": 0.4298, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.7823424339294434, |
|
"rewards/margins": 1.2834153175354004, |
|
"rewards/rejected": -4.065757751464844, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 5.659349521125459e-07, |
|
"logits/chosen": -1.4194704294204712, |
|
"logits/rejected": -1.3601640462875366, |
|
"logps/chosen": -555.782958984375, |
|
"logps/rejected": -634.6406860351562, |
|
"loss": 0.5047, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.743645191192627, |
|
"rewards/margins": 0.9945963025093079, |
|
"rewards/rejected": -3.7382407188415527, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 6.4375, |
|
"learning_rate": 5.5154057665109e-07, |
|
"logits/chosen": -1.3637388944625854, |
|
"logits/rejected": -1.216048240661621, |
|
"logps/chosen": -546.4483642578125, |
|
"logps/rejected": -646.1047973632812, |
|
"loss": 0.4807, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.855586528778076, |
|
"rewards/margins": 1.2608329057693481, |
|
"rewards/rejected": -4.116419792175293, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 5.373088969907586e-07, |
|
"logits/chosen": -1.3931351900100708, |
|
"logits/rejected": -1.2272682189941406, |
|
"logps/chosen": -558.13232421875, |
|
"logps/rejected": -618.197265625, |
|
"loss": 0.4482, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.8131866455078125, |
|
"rewards/margins": 1.1027860641479492, |
|
"rewards/rejected": -3.915972948074341, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 7.53125, |
|
"learning_rate": 5.23241101472709e-07, |
|
"logits/chosen": -1.3162554502487183, |
|
"logits/rejected": -1.1940876245498657, |
|
"logps/chosen": -549.4010009765625, |
|
"logps/rejected": -625.9002075195312, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.7309937477111816, |
|
"rewards/margins": 0.9850690960884094, |
|
"rewards/rejected": -3.7160630226135254, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_logits/chosen": -1.2554689645767212, |
|
"eval_logits/rejected": -1.1347445249557495, |
|
"eval_logps/chosen": -549.6837158203125, |
|
"eval_logps/rejected": -638.7192993164062, |
|
"eval_loss": 0.4868563115596771, |
|
"eval_rewards/accuracies": 0.7419999837875366, |
|
"eval_rewards/chosen": -2.8503170013427734, |
|
"eval_rewards/margins": 1.0907903909683228, |
|
"eval_rewards/rejected": -3.9411072731018066, |
|
"eval_runtime": 385.4515, |
|
"eval_samples_per_second": 5.189, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 8.375, |
|
"learning_rate": 5.09338364753818e-07, |
|
"logits/chosen": -1.3838107585906982, |
|
"logits/rejected": -1.2234851121902466, |
|
"logps/chosen": -565.4810791015625, |
|
"logps/rejected": -655.7274169921875, |
|
"loss": 0.5191, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.800589084625244, |
|
"rewards/margins": 1.0603386163711548, |
|
"rewards/rejected": -3.8609280586242676, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 4.956018477086005e-07, |
|
"logits/chosen": -1.3474712371826172, |
|
"logits/rejected": -1.1852939128875732, |
|
"logps/chosen": -559.21142578125, |
|
"logps/rejected": -640.77685546875, |
|
"loss": 0.5116, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.9007859230041504, |
|
"rewards/margins": 1.0891984701156616, |
|
"rewards/rejected": -3.9899849891662598, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 12.125, |
|
"learning_rate": 4.820326973322764e-07, |
|
"logits/chosen": -1.2560558319091797, |
|
"logits/rejected": -1.1815481185913086, |
|
"logps/chosen": -549.0807495117188, |
|
"logps/rejected": -643.4081420898438, |
|
"loss": 0.5513, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.9811716079711914, |
|
"rewards/margins": 1.0034395456314087, |
|
"rewards/rejected": -3.9846110343933105, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 10.5, |
|
"learning_rate": 4.686320466449981e-07, |
|
"logits/chosen": -1.2670228481292725, |
|
"logits/rejected": -1.0823358297348022, |
|
"logps/chosen": -515.7471923828125, |
|
"logps/rejected": -646.492919921875, |
|
"loss": 0.454, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.7649807929992676, |
|
"rewards/margins": 1.3290727138519287, |
|
"rewards/rejected": -4.094053745269775, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 6.8125, |
|
"learning_rate": 4.554010145972418e-07, |
|
"logits/chosen": -1.4123005867004395, |
|
"logits/rejected": -1.2410565614700317, |
|
"logps/chosen": -551.8477783203125, |
|
"logps/rejected": -645.891357421875, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.880317449569702, |
|
"rewards/margins": 1.0536738634109497, |
|
"rewards/rejected": -3.9339919090270996, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 8.75, |
|
"learning_rate": 4.4234070597637455e-07, |
|
"logits/chosen": -1.2695270776748657, |
|
"logits/rejected": -1.1814700365066528, |
|
"logps/chosen": -558.7033081054688, |
|
"logps/rejected": -645.794189453125, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.831700086593628, |
|
"rewards/margins": 0.990101158618927, |
|
"rewards/rejected": -3.8218014240264893, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 6.75, |
|
"learning_rate": 4.2945221131440783e-07, |
|
"logits/chosen": -1.244091272354126, |
|
"logits/rejected": -1.0454550981521606, |
|
"logps/chosen": -539.8818359375, |
|
"logps/rejected": -634.0319213867188, |
|
"loss": 0.43, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.7417213916778564, |
|
"rewards/margins": 1.257968544960022, |
|
"rewards/rejected": -3.9996895790100098, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 4.167366067969381e-07, |
|
"logits/chosen": -1.3269858360290527, |
|
"logits/rejected": -1.2656229734420776, |
|
"logps/chosen": -505.6949157714844, |
|
"logps/rejected": -628.41015625, |
|
"loss": 0.4885, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.827846050262451, |
|
"rewards/margins": 0.9887911677360535, |
|
"rewards/rejected": -3.816636562347412, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 7.0, |
|
"learning_rate": 4.041949541732826e-07, |
|
"logits/chosen": -1.327467441558838, |
|
"logits/rejected": -1.272200584411621, |
|
"logps/chosen": -555.987060546875, |
|
"logps/rejected": -642.5946655273438, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.9210267066955566, |
|
"rewards/margins": 1.0088088512420654, |
|
"rewards/rejected": -3.929835557937622, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 3.9182830066782614e-07, |
|
"logits/chosen": -1.2530772686004639, |
|
"logits/rejected": -1.2375959157943726, |
|
"logps/chosen": -542.0306396484375, |
|
"logps/rejected": -671.2916259765625, |
|
"loss": 0.4641, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.8428683280944824, |
|
"rewards/margins": 1.1683650016784668, |
|
"rewards/rejected": -4.011233329772949, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -1.2554447650909424, |
|
"eval_logits/rejected": -1.1346678733825684, |
|
"eval_logps/chosen": -545.7666015625, |
|
"eval_logps/rejected": -634.5078735351562, |
|
"eval_loss": 0.48661333322525024, |
|
"eval_rewards/accuracies": 0.7404999732971191, |
|
"eval_rewards/chosen": -2.8111462593078613, |
|
"eval_rewards/margins": 1.0878463983535767, |
|
"eval_rewards/rejected": -3.8989927768707275, |
|
"eval_runtime": 385.3303, |
|
"eval_samples_per_second": 5.19, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 8.375, |
|
"learning_rate": 3.796376788925771e-07, |
|
"logits/chosen": -1.264981985092163, |
|
"logits/rejected": -1.1978137493133545, |
|
"logps/chosen": -532.4588623046875, |
|
"logps/rejected": -602.8772583007812, |
|
"loss": 0.5036, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.7103111743927, |
|
"rewards/margins": 0.944588840007782, |
|
"rewards/rejected": -3.654900074005127, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 6.78125, |
|
"learning_rate": 3.676241067609465e-07, |
|
"logits/chosen": -1.3384299278259277, |
|
"logits/rejected": -1.2301527261734009, |
|
"logps/chosen": -568.9376220703125, |
|
"logps/rejected": -628.9427490234375, |
|
"loss": 0.5105, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.7523765563964844, |
|
"rewards/margins": 1.0309317111968994, |
|
"rewards/rejected": -3.7833080291748047, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 11.625, |
|
"learning_rate": 3.5578858740274976e-07, |
|
"logits/chosen": -1.2620373964309692, |
|
"logits/rejected": -1.1610171794891357, |
|
"logps/chosen": -548.6265258789062, |
|
"logps/rejected": -628.5254516601562, |
|
"loss": 0.5183, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.8689258098602295, |
|
"rewards/margins": 0.9248504638671875, |
|
"rewards/rejected": -3.793776273727417, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 12.0625, |
|
"learning_rate": 3.44132109080447e-07, |
|
"logits/chosen": -1.4505221843719482, |
|
"logits/rejected": -1.2806892395019531, |
|
"logps/chosen": -536.9176025390625, |
|
"logps/rejected": -614.2163696289062, |
|
"loss": 0.4513, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.71694016456604, |
|
"rewards/margins": 1.147782802581787, |
|
"rewards/rejected": -3.864722490310669, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 10.375, |
|
"learning_rate": 3.3265564510662344e-07, |
|
"logits/chosen": -1.377443790435791, |
|
"logits/rejected": -1.2464927434921265, |
|
"logps/chosen": -556.8729858398438, |
|
"logps/rejected": -654.7142333984375, |
|
"loss": 0.4331, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.6353092193603516, |
|
"rewards/margins": 1.2298697233200073, |
|
"rewards/rejected": -3.8651790618896484, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 16.0, |
|
"learning_rate": 3.213601537627195e-07, |
|
"logits/chosen": -1.2895920276641846, |
|
"logits/rejected": -1.1866865158081055, |
|
"logps/chosen": -556.0447998046875, |
|
"logps/rejected": -639.7942504882812, |
|
"loss": 0.5502, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.989259719848633, |
|
"rewards/margins": 1.0085315704345703, |
|
"rewards/rejected": -3.997791290283203, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 12.3125, |
|
"learning_rate": 3.1024657821901063e-07, |
|
"logits/chosen": -1.3556302785873413, |
|
"logits/rejected": -1.2743966579437256, |
|
"logps/chosen": -520.0645141601562, |
|
"logps/rejected": -612.8271484375, |
|
"loss": 0.5058, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.72322678565979, |
|
"rewards/margins": 1.0860865116119385, |
|
"rewards/rejected": -3.8093135356903076, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 13.625, |
|
"learning_rate": 2.9931584645585654e-07, |
|
"logits/chosen": -1.3034099340438843, |
|
"logits/rejected": -1.271439790725708, |
|
"logps/chosen": -548.0056762695312, |
|
"logps/rejected": -648.6056518554688, |
|
"loss": 0.514, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.734912395477295, |
|
"rewards/margins": 0.9768469929695129, |
|
"rewards/rejected": -3.711759090423584, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 5.9375, |
|
"learning_rate": 2.885688711862136e-07, |
|
"logits/chosen": -1.3113230466842651, |
|
"logits/rejected": -1.3101108074188232, |
|
"logps/chosen": -549.2462768554688, |
|
"logps/rejected": -667.9041748046875, |
|
"loss": 0.5153, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.8819470405578613, |
|
"rewards/margins": 1.2062867879867554, |
|
"rewards/rejected": -4.0882344245910645, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 2.7800654977942486e-07, |
|
"logits/chosen": -1.3042861223220825, |
|
"logits/rejected": -1.1825424432754517, |
|
"logps/chosen": -531.413818359375, |
|
"logps/rejected": -631.4432373046875, |
|
"loss": 0.5096, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.740572452545166, |
|
"rewards/margins": 1.0384232997894287, |
|
"rewards/rejected": -3.778996229171753, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_logits/chosen": -1.2585511207580566, |
|
"eval_logits/rejected": -1.1378772258758545, |
|
"eval_logps/chosen": -544.573974609375, |
|
"eval_logps/rejected": -633.404052734375, |
|
"eval_loss": 0.4864084720611572, |
|
"eval_rewards/accuracies": 0.7394999861717224, |
|
"eval_rewards/chosen": -2.7992191314697266, |
|
"eval_rewards/margins": 1.0887356996536255, |
|
"eval_rewards/rejected": -3.8879551887512207, |
|
"eval_runtime": 385.2344, |
|
"eval_samples_per_second": 5.192, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 15.8125, |
|
"learning_rate": 2.6762976418628797e-07, |
|
"logits/chosen": -1.345733880996704, |
|
"logits/rejected": -1.2021456956863403, |
|
"logps/chosen": -504.2335510253906, |
|
"logps/rejected": -557.3888549804688, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.744576930999756, |
|
"rewards/margins": 0.9676315188407898, |
|
"rewards/rejected": -3.7122085094451904, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 10.25, |
|
"learning_rate": 2.5743938086541354e-07, |
|
"logits/chosen": -1.3191635608673096, |
|
"logits/rejected": -1.1912063360214233, |
|
"logps/chosen": -537.1017456054688, |
|
"logps/rejected": -623.8556518554688, |
|
"loss": 0.4854, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.728193998336792, |
|
"rewards/margins": 1.1158511638641357, |
|
"rewards/rejected": -3.8440451622009277, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 12.5625, |
|
"learning_rate": 2.4743625071087574e-07, |
|
"logits/chosen": -1.4545891284942627, |
|
"logits/rejected": -1.2835543155670166, |
|
"logps/chosen": -547.1002807617188, |
|
"logps/rejected": -641.8110961914062, |
|
"loss": 0.481, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.6952996253967285, |
|
"rewards/margins": 1.241824746131897, |
|
"rewards/rejected": -3.937124252319336, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 11.9375, |
|
"learning_rate": 2.3762120898116498e-07, |
|
"logits/chosen": -1.3351377248764038, |
|
"logits/rejected": -1.2292808294296265, |
|
"logps/chosen": -560.9166259765625, |
|
"logps/rejected": -651.31103515625, |
|
"loss": 0.4951, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.9212374687194824, |
|
"rewards/margins": 0.9840442538261414, |
|
"rewards/rejected": -3.9052817821502686, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 9.25, |
|
"learning_rate": 2.2799507522944048e-07, |
|
"logits/chosen": -1.2632300853729248, |
|
"logits/rejected": -1.1798118352890015, |
|
"logps/chosen": -535.8150634765625, |
|
"logps/rejected": -648.6831665039062, |
|
"loss": 0.4501, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.7001757621765137, |
|
"rewards/margins": 1.211038589477539, |
|
"rewards/rejected": -3.9112143516540527, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 2.1855865323510056e-07, |
|
"logits/chosen": -1.3267244100570679, |
|
"logits/rejected": -1.1418159008026123, |
|
"logps/chosen": -551.0586547851562, |
|
"logps/rejected": -682.3759765625, |
|
"loss": 0.4332, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.783801317214966, |
|
"rewards/margins": 1.3722645044326782, |
|
"rewards/rejected": -4.156065940856934, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 6.84375, |
|
"learning_rate": 2.0931273093666575e-07, |
|
"logits/chosen": -1.2914237976074219, |
|
"logits/rejected": -1.1433568000793457, |
|
"logps/chosen": -527.1336669921875, |
|
"logps/rejected": -625.6705322265625, |
|
"loss": 0.4545, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.8694703578948975, |
|
"rewards/margins": 1.1879098415374756, |
|
"rewards/rejected": -4.057379722595215, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 2.002580803659873e-07, |
|
"logits/chosen": -1.2892788648605347, |
|
"logits/rejected": -1.1720714569091797, |
|
"logps/chosen": -541.9439697265625, |
|
"logps/rejected": -635.7294311523438, |
|
"loss": 0.4668, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.873889446258545, |
|
"rewards/margins": 1.1193937063217163, |
|
"rewards/rejected": -3.9932830333709717, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 5.5625, |
|
"learning_rate": 1.913954575837826e-07, |
|
"logits/chosen": -1.3597743511199951, |
|
"logits/rejected": -1.1029024124145508, |
|
"logps/chosen": -555.9939575195312, |
|
"logps/rejected": -613.6519775390625, |
|
"loss": 0.4753, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.8387951850891113, |
|
"rewards/margins": 1.0735225677490234, |
|
"rewards/rejected": -3.9123177528381348, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 8.875, |
|
"learning_rate": 1.827256026165028e-07, |
|
"logits/chosen": -1.37373685836792, |
|
"logits/rejected": -1.178899884223938, |
|
"logps/chosen": -578.7473754882812, |
|
"logps/rejected": -641.29345703125, |
|
"loss": 0.455, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.636767864227295, |
|
"rewards/margins": 1.1961476802825928, |
|
"rewards/rejected": -3.8329155445098877, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": -1.2543540000915527, |
|
"eval_logits/rejected": -1.1335822343826294, |
|
"eval_logps/chosen": -545.915283203125, |
|
"eval_logps/rejected": -635.4321899414062, |
|
"eval_loss": 0.48658648133277893, |
|
"eval_rewards/accuracies": 0.7394999861717224, |
|
"eval_rewards/chosen": -2.8126325607299805, |
|
"eval_rewards/margins": 1.0956026315689087, |
|
"eval_rewards/rejected": -3.9082350730895996, |
|
"eval_runtime": 385.1178, |
|
"eval_samples_per_second": 5.193, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 14.25, |
|
"learning_rate": 1.7424923939454274e-07, |
|
"logits/chosen": -1.3175909519195557, |
|
"logits/rejected": -1.1420743465423584, |
|
"logps/chosen": -559.6712036132812, |
|
"logps/rejected": -638.4949951171875, |
|
"loss": 0.4204, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.7832179069519043, |
|
"rewards/margins": 1.2219712734222412, |
|
"rewards/rejected": -4.005189418792725, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 18.25, |
|
"learning_rate": 1.6596707569179304e-07, |
|
"logits/chosen": -1.4023295640945435, |
|
"logits/rejected": -1.2579714059829712, |
|
"logps/chosen": -565.6453857421875, |
|
"logps/rejected": -633.6785278320312, |
|
"loss": 0.5107, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.844580888748169, |
|
"rewards/margins": 1.0315876007080078, |
|
"rewards/rejected": -3.8761680126190186, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 1.578798030665385e-07, |
|
"logits/chosen": -1.3531277179718018, |
|
"logits/rejected": -1.1701006889343262, |
|
"logps/chosen": -551.0891723632812, |
|
"logps/rejected": -663.329345703125, |
|
"loss": 0.4451, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.7639107704162598, |
|
"rewards/margins": 1.2928920984268188, |
|
"rewards/rejected": -4.056802749633789, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 8.625, |
|
"learning_rate": 1.499880968037165e-07, |
|
"logits/chosen": -1.3360685110092163, |
|
"logits/rejected": -1.204347014427185, |
|
"logps/chosen": -529.5865478515625, |
|
"logps/rejected": -599.6653442382812, |
|
"loss": 0.5141, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.7397305965423584, |
|
"rewards/margins": 1.0634615421295166, |
|
"rewards/rejected": -3.803192138671875, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 14.5, |
|
"learning_rate": 1.4229261585852805e-07, |
|
"logits/chosen": -1.3513076305389404, |
|
"logits/rejected": -1.2741743326187134, |
|
"logps/chosen": -541.47216796875, |
|
"logps/rejected": -626.5045776367188, |
|
"loss": 0.457, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.730405807495117, |
|
"rewards/margins": 1.1085281372070312, |
|
"rewards/rejected": -3.8389339447021484, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 1.3479400280141886e-07, |
|
"logits/chosen": -1.2767086029052734, |
|
"logits/rejected": -1.2311673164367676, |
|
"logps/chosen": -530.0853881835938, |
|
"logps/rejected": -643.3887939453125, |
|
"loss": 0.4882, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.8382620811462402, |
|
"rewards/margins": 1.153211236000061, |
|
"rewards/rejected": -3.9914729595184326, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 9.375, |
|
"learning_rate": 1.2749288376442044e-07, |
|
"logits/chosen": -1.3543965816497803, |
|
"logits/rejected": -1.169668436050415, |
|
"logps/chosen": -568.0045166015625, |
|
"logps/rejected": -621.7847900390625, |
|
"loss": 0.4688, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.714702606201172, |
|
"rewards/margins": 1.1129640340805054, |
|
"rewards/rejected": -3.827666759490967, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 1.203898683888713e-07, |
|
"logits/chosen": -1.3755584955215454, |
|
"logits/rejected": -1.2311782836914062, |
|
"logps/chosen": -532.9498901367188, |
|
"logps/rejected": -625.6002197265625, |
|
"loss": 0.5499, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.8895249366760254, |
|
"rewards/margins": 0.9575474858283997, |
|
"rewards/rejected": -3.8470726013183594, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 9.75, |
|
"learning_rate": 1.1348554977451132e-07, |
|
"logits/chosen": -1.4002835750579834, |
|
"logits/rejected": -1.255327820777893, |
|
"logps/chosen": -557.9288330078125, |
|
"logps/rejected": -629.8707275390625, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.759397029876709, |
|
"rewards/margins": 1.0484449863433838, |
|
"rewards/rejected": -3.8078417778015137, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 6.75, |
|
"learning_rate": 1.0678050442995802e-07, |
|
"logits/chosen": -1.345915675163269, |
|
"logits/rejected": -1.1581257581710815, |
|
"logps/chosen": -561.4415283203125, |
|
"logps/rejected": -623.0227661132812, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.8064818382263184, |
|
"rewards/margins": 1.0665854215621948, |
|
"rewards/rejected": -3.8730673789978027, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": -1.255007028579712, |
|
"eval_logits/rejected": -1.1342185735702515, |
|
"eval_logps/chosen": -545.7534790039062, |
|
"eval_logps/rejected": -635.4207153320312, |
|
"eval_loss": 0.4864389896392822, |
|
"eval_rewards/accuracies": 0.7409999966621399, |
|
"eval_rewards/chosen": -2.8110146522521973, |
|
"eval_rewards/margins": 1.0971060991287231, |
|
"eval_rewards/rejected": -3.908120632171631, |
|
"eval_runtime": 385.1023, |
|
"eval_samples_per_second": 5.193, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 9.0, |
|
"learning_rate": 1.0027529222456755e-07, |
|
"logits/chosen": -1.3322703838348389, |
|
"logits/rejected": -1.164650321006775, |
|
"logps/chosen": -529.0792236328125, |
|
"logps/rejected": -624.087646484375, |
|
"loss": 0.4498, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.7491297721862793, |
|
"rewards/margins": 1.125643014907837, |
|
"rewards/rejected": -3.8747730255126953, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 8.375, |
|
"learning_rate": 9.397045634168766e-08, |
|
"logits/chosen": -1.36007821559906, |
|
"logits/rejected": -1.2929136753082275, |
|
"logps/chosen": -542.2086181640625, |
|
"logps/rejected": -667.2059326171875, |
|
"loss": 0.4547, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.736257553100586, |
|
"rewards/margins": 1.2475742101669312, |
|
"rewards/rejected": -3.9838318824768066, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 11.625, |
|
"learning_rate": 8.78665232332998e-08, |
|
"logits/chosen": -1.2781856060028076, |
|
"logits/rejected": -1.210409164428711, |
|
"logps/chosen": -516.8308715820312, |
|
"logps/rejected": -617.1106567382812, |
|
"loss": 0.4795, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.8612074851989746, |
|
"rewards/margins": 1.0104413032531738, |
|
"rewards/rejected": -3.8716487884521484, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 9.375, |
|
"learning_rate": 8.196400257606208e-08, |
|
"logits/chosen": -1.385122537612915, |
|
"logits/rejected": -1.24273681640625, |
|
"logps/chosen": -560.0970458984375, |
|
"logps/rejected": -682.8623046875, |
|
"loss": 0.4425, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.7701196670532227, |
|
"rewards/margins": 1.2804229259490967, |
|
"rewards/rejected": -4.05054235458374, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 7.626338722875076e-08, |
|
"logits/chosen": -1.3233528137207031, |
|
"logits/rejected": -1.269012212753296, |
|
"logps/chosen": -527.5838623046875, |
|
"logps/rejected": -637.5836791992188, |
|
"loss": 0.4828, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.7349019050598145, |
|
"rewards/margins": 1.0903024673461914, |
|
"rewards/rejected": -3.8252041339874268, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 7.03125, |
|
"learning_rate": 7.076515319110688e-08, |
|
"logits/chosen": -1.3301162719726562, |
|
"logits/rejected": -1.2381629943847656, |
|
"logps/chosen": -530.2079467773438, |
|
"logps/rejected": -606.7450561523438, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.739201545715332, |
|
"rewards/margins": 1.1811503171920776, |
|
"rewards/rejected": -3.9203522205352783, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 6.54697595640899e-08, |
|
"logits/chosen": -1.3412398099899292, |
|
"logits/rejected": -1.2113425731658936, |
|
"logps/chosen": -574.6760864257812, |
|
"logps/rejected": -662.1055908203125, |
|
"loss": 0.4814, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.8245387077331543, |
|
"rewards/margins": 1.1259464025497437, |
|
"rewards/rejected": -3.9504852294921875, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 10.125, |
|
"learning_rate": 6.037764851154426e-08, |
|
"logits/chosen": -1.3283928632736206, |
|
"logits/rejected": -1.286163568496704, |
|
"logps/chosen": -535.401611328125, |
|
"logps/rejected": -654.7335205078125, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.7134242057800293, |
|
"rewards/margins": 1.1457810401916504, |
|
"rewards/rejected": -3.8592045307159424, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 6.9375, |
|
"learning_rate": 5.548924522327748e-08, |
|
"logits/chosen": -1.3209599256515503, |
|
"logits/rejected": -1.182340383529663, |
|
"logps/chosen": -537.6961669921875, |
|
"logps/rejected": -631.2122192382812, |
|
"loss": 0.4825, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.7593882083892822, |
|
"rewards/margins": 1.081923484802246, |
|
"rewards/rejected": -3.8413116931915283, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 5.0804957879556915e-08, |
|
"logits/chosen": -1.249495506286621, |
|
"logits/rejected": -1.1639585494995117, |
|
"logps/chosen": -500.98992919921875, |
|
"logps/rejected": -613.1359252929688, |
|
"loss": 0.466, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.7542366981506348, |
|
"rewards/margins": 1.0757157802581787, |
|
"rewards/rejected": -3.8299522399902344, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": -1.2554669380187988, |
|
"eval_logits/rejected": -1.1347417831420898, |
|
"eval_logps/chosen": -545.9835815429688, |
|
"eval_logps/rejected": -635.6727294921875, |
|
"eval_loss": 0.48658978939056396, |
|
"eval_rewards/accuracies": 0.7400000095367432, |
|
"eval_rewards/chosen": -2.813314914703369, |
|
"eval_rewards/margins": 1.0973262786865234, |
|
"eval_rewards/rejected": -3.9106414318084717, |
|
"eval_runtime": 385.0907, |
|
"eval_samples_per_second": 5.194, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 4.632517761702815e-08, |
|
"logits/chosen": -1.2666916847229004, |
|
"logits/rejected": -1.127403974533081, |
|
"logps/chosen": -518.2024536132812, |
|
"logps/rejected": -636.0374145507812, |
|
"loss": 0.4363, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.827592134475708, |
|
"rewards/margins": 1.3016375303268433, |
|
"rewards/rejected": -4.129229545593262, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 12.5, |
|
"learning_rate": 4.205027849605359e-08, |
|
"logits/chosen": -1.2991037368774414, |
|
"logits/rejected": -1.2022724151611328, |
|
"logps/chosen": -558.1617431640625, |
|
"logps/rejected": -612.0789794921875, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.1023831367492676, |
|
"rewards/margins": 0.8397830128669739, |
|
"rewards/rejected": -3.9421660900115967, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 9.375, |
|
"learning_rate": 3.798061746947995e-08, |
|
"logits/chosen": -1.4298536777496338, |
|
"logits/rejected": -1.2711670398712158, |
|
"logps/chosen": -541.1964721679688, |
|
"logps/rejected": -615.2937622070312, |
|
"loss": 0.479, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.770465135574341, |
|
"rewards/margins": 1.1328895092010498, |
|
"rewards/rejected": -3.9033546447753906, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 3.411653435283158e-08, |
|
"logits/chosen": -1.3373726606369019, |
|
"logits/rejected": -1.1359134912490845, |
|
"logps/chosen": -546.8034057617188, |
|
"logps/rejected": -593.7394409179688, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.7354676723480225, |
|
"rewards/margins": 1.0266190767288208, |
|
"rewards/rejected": -3.762086868286133, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 7.03125, |
|
"learning_rate": 3.04583517959367e-08, |
|
"logits/chosen": -1.3844215869903564, |
|
"logits/rejected": -1.2332738637924194, |
|
"logps/chosen": -517.3903198242188, |
|
"logps/rejected": -597.7869262695312, |
|
"loss": 0.4532, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.6527369022369385, |
|
"rewards/margins": 1.128391146659851, |
|
"rewards/rejected": -3.7811279296875, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 9.0625, |
|
"learning_rate": 2.7006375255985984e-08, |
|
"logits/chosen": -1.3098169565200806, |
|
"logits/rejected": -1.2761331796646118, |
|
"logps/chosen": -556.2361450195312, |
|
"logps/rejected": -640.65283203125, |
|
"loss": 0.5747, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.914341449737549, |
|
"rewards/margins": 0.838718056678772, |
|
"rewards/rejected": -3.753058910369873, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 2.3760892972027328e-08, |
|
"logits/chosen": -1.4296592473983765, |
|
"logits/rejected": -1.253159761428833, |
|
"logps/chosen": -565.5921020507812, |
|
"logps/rejected": -640.4884643554688, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.93088960647583, |
|
"rewards/margins": 1.0891053676605225, |
|
"rewards/rejected": -4.01999568939209, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 11.875, |
|
"learning_rate": 2.072217594089765e-08, |
|
"logits/chosen": -1.2928217649459839, |
|
"logits/rejected": -1.2739886045455933, |
|
"logps/chosen": -544.60205078125, |
|
"logps/rejected": -657.6286010742188, |
|
"loss": 0.4198, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.8306102752685547, |
|
"rewards/margins": 1.2527996301651, |
|
"rewards/rejected": -4.083409786224365, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 1.789047789459375e-08, |
|
"logits/chosen": -1.3845082521438599, |
|
"logits/rejected": -1.1975808143615723, |
|
"logps/chosen": -600.7689819335938, |
|
"logps/rejected": -661.3809204101562, |
|
"loss": 0.5254, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.837923049926758, |
|
"rewards/margins": 1.0966850519180298, |
|
"rewards/rejected": -3.934607744216919, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 6.875, |
|
"learning_rate": 1.5266035279088708e-08, |
|
"logits/chosen": -1.2211766242980957, |
|
"logits/rejected": -1.0900758504867554, |
|
"logps/chosen": -589.4644775390625, |
|
"logps/rejected": -672.6224975585938, |
|
"loss": 0.4945, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.928469181060791, |
|
"rewards/margins": 1.0935529470443726, |
|
"rewards/rejected": -4.022022247314453, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_logits/chosen": -1.252778172492981, |
|
"eval_logits/rejected": -1.1321126222610474, |
|
"eval_logps/chosen": -545.6665649414062, |
|
"eval_logps/rejected": -635.412353515625, |
|
"eval_loss": 0.486397385597229, |
|
"eval_rewards/accuracies": 0.7400000095367432, |
|
"eval_rewards/chosen": -2.8101449012756348, |
|
"eval_rewards/margins": 1.0978920459747314, |
|
"eval_rewards/rejected": -3.9080374240875244, |
|
"eval_runtime": 385.1334, |
|
"eval_samples_per_second": 5.193, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 1.2849067234584623e-08, |
|
"logits/chosen": -1.2232288122177124, |
|
"logits/rejected": -1.1615407466888428, |
|
"logps/chosen": -513.3945922851562, |
|
"logps/rejected": -625.0435791015625, |
|
"loss": 0.4715, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.7703394889831543, |
|
"rewards/margins": 1.192608118057251, |
|
"rewards/rejected": -3.9629478454589844, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 11.1875, |
|
"learning_rate": 1.0639775577218625e-08, |
|
"logits/chosen": -1.2154030799865723, |
|
"logits/rejected": -1.0470209121704102, |
|
"logps/chosen": -534.564453125, |
|
"logps/rejected": -611.8311767578125, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.8537747859954834, |
|
"rewards/margins": 1.1320233345031738, |
|
"rewards/rejected": -3.985797882080078, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 8.638344782207486e-09, |
|
"logits/chosen": -1.2473368644714355, |
|
"logits/rejected": -1.1350939273834229, |
|
"logps/chosen": -516.2252197265625, |
|
"logps/rejected": -601.020263671875, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.709373950958252, |
|
"rewards/margins": 1.07589852809906, |
|
"rewards/rejected": -3.7852725982666016, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 6.84494196844715e-09, |
|
"logits/chosen": -1.2988349199295044, |
|
"logits/rejected": -1.1963183879852295, |
|
"logps/chosen": -549.7848510742188, |
|
"logps/rejected": -670.5635986328125, |
|
"loss": 0.4567, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.7767837047576904, |
|
"rewards/margins": 1.3236409425735474, |
|
"rewards/rejected": -4.100424766540527, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 7.96875, |
|
"learning_rate": 5.259716884556121e-09, |
|
"logits/chosen": -1.3606340885162354, |
|
"logits/rejected": -1.224469780921936, |
|
"logps/chosen": -543.1236572265625, |
|
"logps/rejected": -640.4927978515625, |
|
"loss": 0.4694, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.7865681648254395, |
|
"rewards/margins": 1.1219072341918945, |
|
"rewards/rejected": -3.908475399017334, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 9.0, |
|
"learning_rate": 3.882801896372967e-09, |
|
"logits/chosen": -1.3460079431533813, |
|
"logits/rejected": -1.2832306623458862, |
|
"logps/chosen": -539.73583984375, |
|
"logps/rejected": -619.1431884765625, |
|
"loss": 0.4913, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.7437405586242676, |
|
"rewards/margins": 1.1124091148376465, |
|
"rewards/rejected": -3.8561501502990723, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 2.7143119759026614e-09, |
|
"logits/chosen": -1.3690940141677856, |
|
"logits/rejected": -1.1916528940200806, |
|
"logps/chosen": -560.9449462890625, |
|
"logps/rejected": -645.1325073242188, |
|
"loss": 0.4329, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.7667250633239746, |
|
"rewards/margins": 1.096879482269287, |
|
"rewards/rejected": -3.8636043071746826, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 7.65625, |
|
"learning_rate": 1.754344691717591e-09, |
|
"logits/chosen": -1.2690956592559814, |
|
"logits/rejected": -1.2165257930755615, |
|
"logps/chosen": -535.2860107421875, |
|
"logps/rejected": -644.5172119140625, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.8549444675445557, |
|
"rewards/margins": 0.8715957403182983, |
|
"rewards/rejected": -3.7265400886535645, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 1.0029802008096335e-09, |
|
"logits/chosen": -1.2903029918670654, |
|
"logits/rejected": -1.1491575241088867, |
|
"logps/chosen": -556.7200927734375, |
|
"logps/rejected": -649.4456176757812, |
|
"loss": 0.4709, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.824282169342041, |
|
"rewards/margins": 1.1551063060760498, |
|
"rewards/rejected": -3.979388475418091, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 9.625, |
|
"learning_rate": 4.602812418974534e-10, |
|
"logits/chosen": -1.3866922855377197, |
|
"logits/rejected": -1.2608470916748047, |
|
"logps/chosen": -567.4412231445312, |
|
"logps/rejected": -653.213623046875, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.856003999710083, |
|
"rewards/margins": 1.1031793355941772, |
|
"rewards/rejected": -3.9591832160949707, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": -1.2524324655532837, |
|
"eval_logits/rejected": -1.1317205429077148, |
|
"eval_logps/chosen": -545.9131469726562, |
|
"eval_logps/rejected": -635.618408203125, |
|
"eval_loss": 0.48637571930885315, |
|
"eval_rewards/accuracies": 0.7394999861717224, |
|
"eval_rewards/chosen": -2.8126115798950195, |
|
"eval_rewards/margins": 1.0974864959716797, |
|
"eval_rewards/rejected": -3.910098075866699, |
|
"eval_runtime": 385.0016, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 0.649, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 1.2629313018819312e-10, |
|
"logits/chosen": -1.312417984008789, |
|
"logits/rejected": -1.191304326057434, |
|
"logps/chosen": -529.3787231445312, |
|
"logps/rejected": -611.1173095703125, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.7690625190734863, |
|
"rewards/margins": 0.9739207029342651, |
|
"rewards/rejected": -3.742983341217041, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 21.25, |
|
"learning_rate": 1.0437535929996855e-12, |
|
"logits/chosen": -1.306217908859253, |
|
"logits/rejected": -1.1442514657974243, |
|
"logps/chosen": -570.4493408203125, |
|
"logps/rejected": -660.94970703125, |
|
"loss": 0.4569, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.836827516555786, |
|
"rewards/margins": 1.3515799045562744, |
|
"rewards/rejected": -4.188406944274902, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3821, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5238101308459981, |
|
"train_runtime": 42749.2467, |
|
"train_samples_per_second": 1.43, |
|
"train_steps_per_second": 0.089 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3821, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|