|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 100, |
|
"global_step": 2390, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.0920502092050206e-09, |
|
"logits/chosen": -2.8099329471588135, |
|
"logits/rejected": -2.7572641372680664, |
|
"logps/chosen": -241.48843383789062, |
|
"logps/rejected": -197.4517822265625, |
|
"loss": 0.2769, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.0920502092050206e-08, |
|
"logits/chosen": -2.8319787979125977, |
|
"logits/rejected": -2.808582305908203, |
|
"logps/chosen": -292.6686706542969, |
|
"logps/rejected": -278.59375, |
|
"loss": 0.2933, |
|
"rewards/accuracies": 0.4513888955116272, |
|
"rewards/chosen": 0.00022136639745440334, |
|
"rewards/margins": 0.0002973621594719589, |
|
"rewards/rejected": -7.599574018968269e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.184100418410041e-08, |
|
"logits/chosen": -2.81412410736084, |
|
"logits/rejected": -2.7854788303375244, |
|
"logps/chosen": -290.38568115234375, |
|
"logps/rejected": -290.733154296875, |
|
"loss": 0.2783, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00013949527055956423, |
|
"rewards/margins": 0.0003723447152879089, |
|
"rewards/rejected": -0.0002328494592802599, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.276150627615062e-08, |
|
"logits/chosen": -2.768853187561035, |
|
"logits/rejected": -2.737245559692383, |
|
"logps/chosen": -246.38821411132812, |
|
"logps/rejected": -226.2839813232422, |
|
"loss": 0.2884, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.0002608283539302647, |
|
"rewards/margins": -0.0002197624125983566, |
|
"rewards/rejected": -4.106592677999288e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.368200836820083e-08, |
|
"logits/chosen": -2.823577404022217, |
|
"logits/rejected": -2.793687343597412, |
|
"logps/chosen": -299.4330749511719, |
|
"logps/rejected": -261.3934631347656, |
|
"loss": 0.2875, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0008854336920194328, |
|
"rewards/margins": 0.00218599964864552, |
|
"rewards/rejected": -0.0013005656655877829, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.0460251046025103e-07, |
|
"logits/chosen": -2.759429693222046, |
|
"logits/rejected": -2.7360119819641113, |
|
"logps/chosen": -277.38555908203125, |
|
"logps/rejected": -263.96270751953125, |
|
"loss": 0.2851, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.00044868001714348793, |
|
"rewards/margins": 0.0031055849976837635, |
|
"rewards/rejected": -0.0026569045148789883, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.2552301255230124e-07, |
|
"logits/chosen": -2.780626058578491, |
|
"logits/rejected": -2.754009246826172, |
|
"logps/chosen": -259.0357666015625, |
|
"logps/rejected": -233.1998291015625, |
|
"loss": 0.2818, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.00040507837547920644, |
|
"rewards/margins": 0.0062531172297894955, |
|
"rewards/rejected": -0.005848039872944355, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.4644351464435146e-07, |
|
"logits/chosen": -2.759467124938965, |
|
"logits/rejected": -2.7270069122314453, |
|
"logps/chosen": -268.027587890625, |
|
"logps/rejected": -235.6522674560547, |
|
"loss": 0.2825, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0039499360136687756, |
|
"rewards/margins": 0.011635703034698963, |
|
"rewards/rejected": -0.0076857665553689, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6736401673640165e-07, |
|
"logits/chosen": -2.720489025115967, |
|
"logits/rejected": -2.717080593109131, |
|
"logps/chosen": -270.4134521484375, |
|
"logps/rejected": -269.0198974609375, |
|
"loss": 0.2805, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0031835869885981083, |
|
"rewards/margins": 0.018135149031877518, |
|
"rewards/rejected": -0.014951561577618122, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8828451882845187e-07, |
|
"logits/chosen": -2.7781665325164795, |
|
"logits/rejected": -2.7453417778015137, |
|
"logps/chosen": -254.9724578857422, |
|
"logps/rejected": -237.5113525390625, |
|
"loss": 0.2802, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -7.982123497640714e-05, |
|
"rewards/margins": 0.028089797124266624, |
|
"rewards/rejected": -0.028169620782136917, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.0920502092050206e-07, |
|
"logits/chosen": -2.809565305709839, |
|
"logits/rejected": -2.7795722484588623, |
|
"logps/chosen": -290.9457092285156, |
|
"logps/rejected": -273.58453369140625, |
|
"loss": 0.2786, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.01506691426038742, |
|
"rewards/margins": 0.04217763990163803, |
|
"rewards/rejected": -0.05724455043673515, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.7774462699890137, |
|
"eval_logits/rejected": -2.762296438217163, |
|
"eval_logps/chosen": -257.83673095703125, |
|
"eval_logps/rejected": -264.458251953125, |
|
"eval_loss": 0.27809038758277893, |
|
"eval_rewards/accuracies": 0.671875, |
|
"eval_rewards/chosen": -0.007970910519361496, |
|
"eval_rewards/margins": 0.06307876110076904, |
|
"eval_rewards/rejected": -0.07104967534542084, |
|
"eval_runtime": 53.5134, |
|
"eval_samples_per_second": 37.374, |
|
"eval_steps_per_second": 0.598, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.3012552301255228e-07, |
|
"logits/chosen": -2.664180040359497, |
|
"logits/rejected": -2.6355907917022705, |
|
"logps/chosen": -270.4234619140625, |
|
"logps/rejected": -231.29629516601562, |
|
"loss": 0.2679, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.0032597160898149014, |
|
"rewards/margins": 0.09160834550857544, |
|
"rewards/rejected": -0.09486806392669678, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.510460251046025e-07, |
|
"logits/chosen": -2.747262954711914, |
|
"logits/rejected": -2.7251694202423096, |
|
"logps/chosen": -272.30267333984375, |
|
"logps/rejected": -275.91827392578125, |
|
"loss": 0.2606, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.03451583534479141, |
|
"rewards/margins": 0.09615737944841385, |
|
"rewards/rejected": -0.13067321479320526, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.719665271966527e-07, |
|
"logits/chosen": -2.7391302585601807, |
|
"logits/rejected": -2.7268309593200684, |
|
"logps/chosen": -280.5605773925781, |
|
"logps/rejected": -267.3287048339844, |
|
"loss": 0.2553, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.06065766140818596, |
|
"rewards/margins": 0.10610239207744598, |
|
"rewards/rejected": -0.16676005721092224, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.928870292887029e-07, |
|
"logits/chosen": -2.7398619651794434, |
|
"logits/rejected": -2.7162110805511475, |
|
"logps/chosen": -293.87701416015625, |
|
"logps/rejected": -301.07061767578125, |
|
"loss": 0.2294, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.132425457239151, |
|
"rewards/margins": 0.1706453114748001, |
|
"rewards/rejected": -0.3030707836151123, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.1380753138075313e-07, |
|
"logits/chosen": -2.7051219940185547, |
|
"logits/rejected": -2.7295825481414795, |
|
"logps/chosen": -245.402587890625, |
|
"logps/rejected": -279.4499206542969, |
|
"loss": 0.1883, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.18723666667938232, |
|
"rewards/margins": 0.24440130591392517, |
|
"rewards/rejected": -0.4316380023956299, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.347280334728033e-07, |
|
"logits/chosen": -2.7354378700256348, |
|
"logits/rejected": -2.7091259956359863, |
|
"logps/chosen": -285.7065734863281, |
|
"logps/rejected": -298.4839782714844, |
|
"loss": 0.1846, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.29734712839126587, |
|
"rewards/margins": 0.2287193089723587, |
|
"rewards/rejected": -0.5260664224624634, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.556485355648535e-07, |
|
"logits/chosen": -2.6840271949768066, |
|
"logits/rejected": -2.668735980987549, |
|
"logps/chosen": -327.84564208984375, |
|
"logps/rejected": -338.6101989746094, |
|
"loss": 0.1594, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4567885398864746, |
|
"rewards/margins": 0.3276306688785553, |
|
"rewards/rejected": -0.7844191789627075, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.7656903765690374e-07, |
|
"logits/chosen": -2.7223949432373047, |
|
"logits/rejected": -2.7302968502044678, |
|
"logps/chosen": -320.07708740234375, |
|
"logps/rejected": -347.97613525390625, |
|
"loss": 0.1396, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5089918375015259, |
|
"rewards/margins": 0.4169326722621918, |
|
"rewards/rejected": -0.9259245991706848, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.9748953974895396e-07, |
|
"logits/chosen": -2.722072124481201, |
|
"logits/rejected": -2.686973810195923, |
|
"logps/chosen": -355.3212585449219, |
|
"logps/rejected": -370.8379211425781, |
|
"loss": 0.1351, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6392548680305481, |
|
"rewards/margins": 0.4283716678619385, |
|
"rewards/rejected": -1.0676265954971313, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.184100418410041e-07, |
|
"logits/chosen": -2.7191638946533203, |
|
"logits/rejected": -2.738208532333374, |
|
"logps/chosen": -352.812255859375, |
|
"logps/rejected": -363.7830810546875, |
|
"loss": 0.1377, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5664477348327637, |
|
"rewards/margins": 0.42822542786598206, |
|
"rewards/rejected": -0.9946731328964233, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -2.7512197494506836, |
|
"eval_logits/rejected": -2.736517906188965, |
|
"eval_logps/chosen": -315.21453857421875, |
|
"eval_logps/rejected": -361.2017517089844, |
|
"eval_loss": 0.14495064318180084, |
|
"eval_rewards/accuracies": 0.69921875, |
|
"eval_rewards/chosen": -0.5817492008209229, |
|
"eval_rewards/margins": 0.45673587918281555, |
|
"eval_rewards/rejected": -1.0384851694107056, |
|
"eval_runtime": 53.4607, |
|
"eval_samples_per_second": 37.411, |
|
"eval_steps_per_second": 0.599, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.3933054393305435e-07, |
|
"logits/chosen": -2.675798177719116, |
|
"logits/rejected": -2.656069278717041, |
|
"logps/chosen": -326.40350341796875, |
|
"logps/rejected": -355.9562072753906, |
|
"loss": 0.14, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5337026715278625, |
|
"rewards/margins": 0.5065020322799683, |
|
"rewards/rejected": -1.0402047634124756, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.6025104602510457e-07, |
|
"logits/chosen": -2.6344687938690186, |
|
"logits/rejected": -2.5925331115722656, |
|
"logps/chosen": -330.45086669921875, |
|
"logps/rejected": -363.1759948730469, |
|
"loss": 0.1385, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5548520088195801, |
|
"rewards/margins": 0.45384103059768677, |
|
"rewards/rejected": -1.0086930990219116, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.811715481171548e-07, |
|
"logits/chosen": -2.6501173973083496, |
|
"logits/rejected": -2.596782684326172, |
|
"logps/chosen": -365.44525146484375, |
|
"logps/rejected": -400.89031982421875, |
|
"loss": 0.1282, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9089735746383667, |
|
"rewards/margins": 0.3997742533683777, |
|
"rewards/rejected": -1.3087480068206787, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.999997333578144e-07, |
|
"logits/chosen": -2.5894546508789062, |
|
"logits/rejected": -2.559335231781006, |
|
"logps/chosen": -364.35888671875, |
|
"logps/rejected": -378.43646240234375, |
|
"loss": 0.1259, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7818881869316101, |
|
"rewards/margins": 0.46951135993003845, |
|
"rewards/rejected": -1.2513995170593262, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.999677369837696e-07, |
|
"logits/chosen": -2.5737695693969727, |
|
"logits/rejected": -2.5291452407836914, |
|
"logps/chosen": -351.3750305175781, |
|
"logps/rejected": -382.34600830078125, |
|
"loss": 0.1262, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7980272173881531, |
|
"rewards/margins": 0.48120132088661194, |
|
"rewards/rejected": -1.279228687286377, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.998824199931228e-07, |
|
"logits/chosen": -2.537308931350708, |
|
"logits/rejected": -2.5202934741973877, |
|
"logps/chosen": -369.4532775878906, |
|
"logps/rejected": -424.20831298828125, |
|
"loss": 0.1159, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.103058099746704, |
|
"rewards/margins": 0.5565687417984009, |
|
"rewards/rejected": -1.6596267223358154, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.997438005848408e-07, |
|
"logits/chosen": -2.555568218231201, |
|
"logits/rejected": -2.5435547828674316, |
|
"logps/chosen": -362.97308349609375, |
|
"logps/rejected": -409.76641845703125, |
|
"loss": 0.1142, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7452017068862915, |
|
"rewards/margins": 0.7018192410469055, |
|
"rewards/rejected": -1.4470210075378418, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.995519083278287e-07, |
|
"logits/chosen": -2.4650330543518066, |
|
"logits/rejected": -2.4678432941436768, |
|
"logps/chosen": -402.68707275390625, |
|
"logps/rejected": -432.5755310058594, |
|
"loss": 0.1023, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.9494403600692749, |
|
"rewards/margins": 0.7206406593322754, |
|
"rewards/rejected": -1.6700811386108398, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.99306784154623e-07, |
|
"logits/chosen": -2.509040355682373, |
|
"logits/rejected": -2.4864211082458496, |
|
"logps/chosen": -373.8565673828125, |
|
"logps/rejected": -406.95697021484375, |
|
"loss": 0.1069, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9609284400939941, |
|
"rewards/margins": 0.5756130814552307, |
|
"rewards/rejected": -1.53654146194458, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.990084803526596e-07, |
|
"logits/chosen": -2.5352022647857666, |
|
"logits/rejected": -2.514341354370117, |
|
"logps/chosen": -358.0209655761719, |
|
"logps/rejected": -401.28802490234375, |
|
"loss": 0.1162, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.0641438961029053, |
|
"rewards/margins": 0.5253941416740417, |
|
"rewards/rejected": -1.5895380973815918, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -2.505751132965088, |
|
"eval_logits/rejected": -2.488804340362549, |
|
"eval_logps/chosen": -361.1052551269531, |
|
"eval_logps/rejected": -424.5982971191406, |
|
"eval_loss": 0.11855494976043701, |
|
"eval_rewards/accuracies": 0.7265625, |
|
"eval_rewards/chosen": -1.0406556129455566, |
|
"eval_rewards/margins": 0.6317947506904602, |
|
"eval_rewards/rejected": -1.6724504232406616, |
|
"eval_runtime": 53.4442, |
|
"eval_samples_per_second": 37.422, |
|
"eval_steps_per_second": 0.599, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.98657060553121e-07, |
|
"logits/chosen": -2.473529815673828, |
|
"logits/rejected": -2.438892364501953, |
|
"logps/chosen": -391.3240966796875, |
|
"logps/rejected": -432.38177490234375, |
|
"loss": 0.1128, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0361790657043457, |
|
"rewards/margins": 0.7299066781997681, |
|
"rewards/rejected": -1.7660858631134033, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.982525997173624e-07, |
|
"logits/chosen": -2.3862195014953613, |
|
"logits/rejected": -2.358474016189575, |
|
"logps/chosen": -399.8985900878906, |
|
"logps/rejected": -458.3641052246094, |
|
"loss": 0.0968, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.1403719186782837, |
|
"rewards/margins": 0.79006028175354, |
|
"rewards/rejected": -1.9304320812225342, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.977951841209228e-07, |
|
"logits/chosen": -2.3514888286590576, |
|
"logits/rejected": -2.3194773197174072, |
|
"logps/chosen": -380.63470458984375, |
|
"logps/rejected": -449.43524169921875, |
|
"loss": 0.0977, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0660531520843506, |
|
"rewards/margins": 0.7859581708908081, |
|
"rewards/rejected": -1.8520113229751587, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.972849113351203e-07, |
|
"logits/chosen": -2.3656859397888184, |
|
"logits/rejected": -2.3494842052459717, |
|
"logps/chosen": -398.6328125, |
|
"logps/rejected": -479.0165100097656, |
|
"loss": 0.0824, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.440366506576538, |
|
"rewards/margins": 0.8205418586730957, |
|
"rewards/rejected": -2.260908365249634, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.967218902062403e-07, |
|
"logits/chosen": -2.3261966705322266, |
|
"logits/rejected": -2.313175678253174, |
|
"logps/chosen": -421.1576232910156, |
|
"logps/rejected": -472.1260681152344, |
|
"loss": 0.0991, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.447040319442749, |
|
"rewards/margins": 0.643539547920227, |
|
"rewards/rejected": -2.0905799865722656, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.961062408323165e-07, |
|
"logits/chosen": -2.301395893096924, |
|
"logits/rejected": -2.281790256500244, |
|
"logps/chosen": -369.58282470703125, |
|
"logps/rejected": -446.14605712890625, |
|
"loss": 0.095, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.111981987953186, |
|
"rewards/margins": 0.9158403277397156, |
|
"rewards/rejected": -2.0278220176696777, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.954380945375134e-07, |
|
"logits/chosen": -2.330751657485962, |
|
"logits/rejected": -2.3153843879699707, |
|
"logps/chosen": -421.24029541015625, |
|
"logps/rejected": -476.91796875, |
|
"loss": 0.0855, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4318022727966309, |
|
"rewards/margins": 0.7008517384529114, |
|
"rewards/rejected": -2.1326539516448975, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.947175938441137e-07, |
|
"logits/chosen": -2.238253593444824, |
|
"logits/rejected": -2.224094867706299, |
|
"logps/chosen": -397.0077819824219, |
|
"logps/rejected": -480.01336669921875, |
|
"loss": 0.0942, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7416664361953735, |
|
"rewards/margins": 0.6288383603096008, |
|
"rewards/rejected": -2.370504856109619, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.939448924421163e-07, |
|
"logits/chosen": -2.2960822582244873, |
|
"logits/rejected": -2.277590036392212, |
|
"logps/chosen": -389.71868896484375, |
|
"logps/rejected": -456.01849365234375, |
|
"loss": 0.0995, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.381588101387024, |
|
"rewards/margins": 0.823552131652832, |
|
"rewards/rejected": -2.2051398754119873, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.931201551564529e-07, |
|
"logits/chosen": -2.2592544555664062, |
|
"logits/rejected": -2.2072255611419678, |
|
"logps/chosen": -437.38189697265625, |
|
"logps/rejected": -469.7354431152344, |
|
"loss": 0.1019, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5167815685272217, |
|
"rewards/margins": 0.7052708864212036, |
|
"rewards/rejected": -2.2220523357391357, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -2.3013134002685547, |
|
"eval_logits/rejected": -2.2735753059387207, |
|
"eval_logps/chosen": -420.30938720703125, |
|
"eval_logps/rejected": -505.6364440917969, |
|
"eval_loss": 0.0996592789888382, |
|
"eval_rewards/accuracies": 0.74609375, |
|
"eval_rewards/chosen": -1.6326974630355835, |
|
"eval_rewards/margins": 0.8501344919204712, |
|
"eval_rewards/rejected": -2.4828317165374756, |
|
"eval_runtime": 53.4802, |
|
"eval_samples_per_second": 37.397, |
|
"eval_steps_per_second": 0.598, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.922435579118294e-07, |
|
"logits/chosen": -2.2972185611724854, |
|
"logits/rejected": -2.228670597076416, |
|
"logps/chosen": -475.92315673828125, |
|
"logps/rejected": -551.5372314453125, |
|
"loss": 0.0966, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.1233341693878174, |
|
"rewards/margins": 0.5774027109146118, |
|
"rewards/rejected": -2.7007367610931396, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.913152876951994e-07, |
|
"logits/chosen": -2.3571770191192627, |
|
"logits/rejected": -2.3337106704711914, |
|
"logps/chosen": -464.90057373046875, |
|
"logps/rejected": -533.636474609375, |
|
"loss": 0.1026, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.7808752059936523, |
|
"rewards/margins": 0.6313996911048889, |
|
"rewards/rejected": -2.4122753143310547, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.903355425158774e-07, |
|
"logits/chosen": -2.315852403640747, |
|
"logits/rejected": -2.292701244354248, |
|
"logps/chosen": -440.13006591796875, |
|
"logps/rejected": -502.9768981933594, |
|
"loss": 0.111, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.4987695217132568, |
|
"rewards/margins": 0.7871022820472717, |
|
"rewards/rejected": -2.285871982574463, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.893045313633024e-07, |
|
"logits/chosen": -2.273681879043579, |
|
"logits/rejected": -2.2409262657165527, |
|
"logps/chosen": -490.00164794921875, |
|
"logps/rejected": -494.80035400390625, |
|
"loss": 0.0949, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.7495065927505493, |
|
"rewards/margins": 0.5425639748573303, |
|
"rewards/rejected": -2.2920703887939453, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.882224741624578e-07, |
|
"logits/chosen": -2.1807281970977783, |
|
"logits/rejected": -2.1516079902648926, |
|
"logps/chosen": -451.31964111328125, |
|
"logps/rejected": -510.9944763183594, |
|
"loss": 0.0849, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.704620361328125, |
|
"rewards/margins": 0.9034457206726074, |
|
"rewards/rejected": -2.6080660820007324, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.87089601726959e-07, |
|
"logits/chosen": -2.2478954792022705, |
|
"logits/rejected": -2.2095189094543457, |
|
"logps/chosen": -450.0931701660156, |
|
"logps/rejected": -501.3701171875, |
|
"loss": 0.1047, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.4970637559890747, |
|
"rewards/margins": 0.8102753758430481, |
|
"rewards/rejected": -2.3073391914367676, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.85906155709819e-07, |
|
"logits/chosen": -2.344259738922119, |
|
"logits/rejected": -2.2746520042419434, |
|
"logps/chosen": -414.1932678222656, |
|
"logps/rejected": -418.90509033203125, |
|
"loss": 0.1029, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1598687171936035, |
|
"rewards/margins": 0.6987718343734741, |
|
"rewards/rejected": -1.858640432357788, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.846723885519011e-07, |
|
"logits/chosen": -2.330390691757202, |
|
"logits/rejected": -2.294926166534424, |
|
"logps/chosen": -371.785888671875, |
|
"logps/rejected": -441.8988342285156, |
|
"loss": 0.0816, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.1650288105010986, |
|
"rewards/margins": 0.9054770469665527, |
|
"rewards/rejected": -2.0705060958862305, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.833885634280713e-07, |
|
"logits/chosen": -2.3310351371765137, |
|
"logits/rejected": -2.2751598358154297, |
|
"logps/chosen": -430.7669982910156, |
|
"logps/rejected": -546.8540649414062, |
|
"loss": 0.0408, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.5875298976898193, |
|
"rewards/margins": 1.4582951068878174, |
|
"rewards/rejected": -3.0458247661590576, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.820549541910595e-07, |
|
"logits/chosen": -2.2020456790924072, |
|
"logits/rejected": -2.1707472801208496, |
|
"logps/chosen": -556.9027709960938, |
|
"logps/rejected": -680.4606323242188, |
|
"loss": 0.0226, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.8451037406921387, |
|
"rewards/margins": 1.6044034957885742, |
|
"rewards/rejected": -4.449507236480713, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_logits/chosen": -2.2302603721618652, |
|
"eval_logits/rejected": -2.1928586959838867, |
|
"eval_logps/chosen": -552.5746459960938, |
|
"eval_logps/rejected": -683.00341796875, |
|
"eval_loss": 0.04056515917181969, |
|
"eval_rewards/accuracies": 0.7265625, |
|
"eval_rewards/chosen": -2.955350399017334, |
|
"eval_rewards/margins": 1.3011513948440552, |
|
"eval_rewards/rejected": -4.2565016746521, |
|
"eval_runtime": 53.4741, |
|
"eval_samples_per_second": 37.401, |
|
"eval_steps_per_second": 0.598, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.806718453130447e-07, |
|
"logits/chosen": -2.265007257461548, |
|
"logits/rejected": -2.193450689315796, |
|
"logps/chosen": -606.5016479492188, |
|
"logps/rejected": -754.6884155273438, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.720303773880005, |
|
"rewards/margins": 2.0228867530822754, |
|
"rewards/rejected": -4.743190288543701, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.79239531824974e-07, |
|
"logits/chosen": -2.1913039684295654, |
|
"logits/rejected": -2.1540169715881348, |
|
"logps/chosen": -516.7843627929688, |
|
"logps/rejected": -724.0062866210938, |
|
"loss": 0.0173, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.5707671642303467, |
|
"rewards/margins": 1.9443508386611938, |
|
"rewards/rejected": -4.51511812210083, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.777583192536299e-07, |
|
"logits/chosen": -2.212864398956299, |
|
"logits/rejected": -2.201310634613037, |
|
"logps/chosen": -471.45556640625, |
|
"logps/rejected": -576.8563842773438, |
|
"loss": 0.0211, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.9474525451660156, |
|
"rewards/margins": 1.513797402381897, |
|
"rewards/rejected": -3.461249828338623, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.762285235564577e-07, |
|
"logits/chosen": -2.220897674560547, |
|
"logits/rejected": -2.153154134750366, |
|
"logps/chosen": -493.2330017089844, |
|
"logps/rejected": -615.1651000976562, |
|
"loss": 0.0188, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.036349058151245, |
|
"rewards/margins": 1.6692250967025757, |
|
"rewards/rejected": -3.7055745124816895, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.746504710541692e-07, |
|
"logits/chosen": -2.110464096069336, |
|
"logits/rejected": -2.034914255142212, |
|
"logps/chosen": -540.3013305664062, |
|
"logps/rejected": -667.2684326171875, |
|
"loss": 0.0167, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.9087443351745605, |
|
"rewards/margins": 1.5584461688995361, |
|
"rewards/rejected": -4.467190742492676, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.730244983611349e-07, |
|
"logits/chosen": -2.2672922611236572, |
|
"logits/rejected": -2.2252161502838135, |
|
"logps/chosen": -509.082763671875, |
|
"logps/rejected": -638.5903930664062, |
|
"loss": 0.0212, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.2937216758728027, |
|
"rewards/margins": 1.4224469661712646, |
|
"rewards/rejected": -3.7161686420440674, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.7135095231358113e-07, |
|
"logits/chosen": -2.1369729042053223, |
|
"logits/rejected": -2.1034133434295654, |
|
"logps/chosen": -513.8828735351562, |
|
"logps/rejected": -698.0314331054688, |
|
"loss": 0.0169, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -2.2990219593048096, |
|
"rewards/margins": 1.9289143085479736, |
|
"rewards/rejected": -4.227935791015625, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.69630189895606e-07, |
|
"logits/chosen": -2.258005142211914, |
|
"logits/rejected": -2.214163303375244, |
|
"logps/chosen": -544.955810546875, |
|
"logps/rejected": -684.7013549804688, |
|
"loss": 0.0145, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.581272840499878, |
|
"rewards/margins": 1.7254451513290405, |
|
"rewards/rejected": -4.306718349456787, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.678625781630315e-07, |
|
"logits/chosen": -2.1814045906066895, |
|
"logits/rejected": -2.1445200443267822, |
|
"logps/chosen": -524.6762084960938, |
|
"logps/rejected": -690.3218994140625, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.601634979248047, |
|
"rewards/margins": 1.7734676599502563, |
|
"rewards/rejected": -4.375102996826172, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.6604849416510653e-07, |
|
"logits/chosen": -2.0610811710357666, |
|
"logits/rejected": -2.025383949279785, |
|
"logps/chosen": -527.0828247070312, |
|
"logps/rejected": -724.1614990234375, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.816039562225342, |
|
"rewards/margins": 1.8535451889038086, |
|
"rewards/rejected": -4.669585227966309, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_logits/chosen": -2.1642775535583496, |
|
"eval_logits/rejected": -2.1365013122558594, |
|
"eval_logps/chosen": -558.1375732421875, |
|
"eval_logps/rejected": -694.5244140625, |
|
"eval_loss": 0.02983885258436203, |
|
"eval_rewards/accuracies": 0.73046875, |
|
"eval_rewards/chosen": -3.010979413986206, |
|
"eval_rewards/margins": 1.3607325553894043, |
|
"eval_rewards/rejected": -4.3717122077941895, |
|
"eval_runtime": 53.4413, |
|
"eval_samples_per_second": 37.424, |
|
"eval_steps_per_second": 0.599, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.6418832486407883e-07, |
|
"logits/chosen": -2.1922335624694824, |
|
"logits/rejected": -2.1194629669189453, |
|
"logps/chosen": -543.6282348632812, |
|
"logps/rejected": -681.7916259765625, |
|
"loss": 0.0131, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.844820022583008, |
|
"rewards/margins": 1.685089111328125, |
|
"rewards/rejected": -4.529910087585449, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.622824670526516e-07, |
|
"logits/chosen": -2.1402950286865234, |
|
"logits/rejected": -2.1509034633636475, |
|
"logps/chosen": -514.2518310546875, |
|
"logps/rejected": -685.3341064453125, |
|
"loss": 0.0146, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.341930389404297, |
|
"rewards/margins": 1.9204639196395874, |
|
"rewards/rejected": -4.262394905090332, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.603313272693443e-07, |
|
"logits/chosen": -2.2139639854431152, |
|
"logits/rejected": -2.1723666191101074, |
|
"logps/chosen": -522.5643310546875, |
|
"logps/rejected": -689.7904052734375, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.3508732318878174, |
|
"rewards/margins": 1.773174524307251, |
|
"rewards/rejected": -4.124047756195068, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.5833532171177325e-07, |
|
"logits/chosen": -2.1888368129730225, |
|
"logits/rejected": -2.1320838928222656, |
|
"logps/chosen": -523.3480224609375, |
|
"logps/rejected": -653.1837158203125, |
|
"loss": 0.0169, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.4582064151763916, |
|
"rewards/margins": 1.5990861654281616, |
|
"rewards/rejected": -4.057292461395264, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.5629487614787297e-07, |
|
"logits/chosen": -2.1233179569244385, |
|
"logits/rejected": -2.0853347778320312, |
|
"logps/chosen": -557.8826293945312, |
|
"logps/rejected": -670.3534545898438, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.627781629562378, |
|
"rewards/margins": 1.7120717763900757, |
|
"rewards/rejected": -4.339853763580322, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.542104258250756e-07, |
|
"logits/chosen": -2.0561859607696533, |
|
"logits/rejected": -2.050246477127075, |
|
"logps/chosen": -511.35791015625, |
|
"logps/rejected": -677.2140502929688, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.600620985031128, |
|
"rewards/margins": 1.7464053630828857, |
|
"rewards/rejected": -4.347026348114014, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.52082415377468e-07, |
|
"logits/chosen": -2.0040194988250732, |
|
"logits/rejected": -2.003857135772705, |
|
"logps/chosen": -582.1134033203125, |
|
"logps/rejected": -732.797119140625, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.9073662757873535, |
|
"rewards/margins": 1.9569756984710693, |
|
"rewards/rejected": -4.864341735839844, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.499112987309469e-07, |
|
"logits/chosen": -2.1404829025268555, |
|
"logits/rejected": -2.091064929962158, |
|
"logps/chosen": -597.4866333007812, |
|
"logps/rejected": -734.6881103515625, |
|
"loss": 0.0117, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -2.796691656112671, |
|
"rewards/margins": 1.9776580333709717, |
|
"rewards/rejected": -4.774350166320801, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.476975390063924e-07, |
|
"logits/chosen": -2.1481356620788574, |
|
"logits/rejected": -2.137308120727539, |
|
"logps/chosen": -526.8170776367188, |
|
"logps/rejected": -667.6612548828125, |
|
"loss": 0.0144, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.477726697921753, |
|
"rewards/margins": 1.6972558498382568, |
|
"rewards/rejected": -4.17498254776001, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.454416084208792e-07, |
|
"logits/chosen": -2.1825122833251953, |
|
"logits/rejected": -2.1353771686553955, |
|
"logps/chosen": -538.6439819335938, |
|
"logps/rejected": -668.5504150390625, |
|
"loss": 0.0132, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.429628849029541, |
|
"rewards/margins": 1.7306365966796875, |
|
"rewards/rejected": -4.1602654457092285, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_logits/chosen": -2.1453137397766113, |
|
"eval_logits/rejected": -2.11730694770813, |
|
"eval_logps/chosen": -544.354248046875, |
|
"eval_logps/rejected": -669.5265502929688, |
|
"eval_loss": 0.03203188255429268, |
|
"eval_rewards/accuracies": 0.73828125, |
|
"eval_rewards/chosen": -2.8731460571289062, |
|
"eval_rewards/margins": 1.248586893081665, |
|
"eval_rewards/rejected": -4.121732711791992, |
|
"eval_runtime": 53.4979, |
|
"eval_samples_per_second": 37.385, |
|
"eval_steps_per_second": 0.598, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.43143988186948e-07, |
|
"logits/chosen": -2.0838775634765625, |
|
"logits/rejected": -2.0534298419952393, |
|
"logps/chosen": -567.5451049804688, |
|
"logps/rejected": -709.284423828125, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.8426408767700195, |
|
"rewards/margins": 1.7196298837661743, |
|
"rewards/rejected": -4.562270164489746, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.4080516840995816e-07, |
|
"logits/chosen": -2.0725064277648926, |
|
"logits/rejected": -2.0133423805236816, |
|
"logps/chosen": -553.6807861328125, |
|
"logps/rejected": -667.474609375, |
|
"loss": 0.0122, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.7032227516174316, |
|
"rewards/margins": 1.5242440700531006, |
|
"rewards/rejected": -4.227466583251953, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.384256479835437e-07, |
|
"logits/chosen": -2.117086887359619, |
|
"logits/rejected": -2.105010986328125, |
|
"logps/chosen": -514.7193603515625, |
|
"logps/rejected": -667.7491455078125, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.584163188934326, |
|
"rewards/margins": 1.6134048700332642, |
|
"rewards/rejected": -4.197568416595459, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.3600593448319356e-07, |
|
"logits/chosen": -2.0963408946990967, |
|
"logits/rejected": -2.0860424041748047, |
|
"logps/chosen": -540.3511962890625, |
|
"logps/rejected": -717.6316528320312, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.849027633666992, |
|
"rewards/margins": 1.7384001016616821, |
|
"rewards/rejected": -4.587428092956543, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.335465440579812e-07, |
|
"logits/chosen": -2.059631824493408, |
|
"logits/rejected": -2.0190889835357666, |
|
"logps/chosen": -570.0716552734375, |
|
"logps/rejected": -778.4046630859375, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.9363150596618652, |
|
"rewards/margins": 1.9713761806488037, |
|
"rewards/rejected": -4.907691478729248, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.3104800132046474e-07, |
|
"logits/chosen": -1.9983686208724976, |
|
"logits/rejected": -1.996934175491333, |
|
"logps/chosen": -477.49285888671875, |
|
"logps/rejected": -672.7355346679688, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.4780962467193604, |
|
"rewards/margins": 1.8100128173828125, |
|
"rewards/rejected": -4.288108825683594, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.285108392347818e-07, |
|
"logits/chosen": -2.0839810371398926, |
|
"logits/rejected": -2.0576441287994385, |
|
"logps/chosen": -523.9546508789062, |
|
"logps/rejected": -660.860107421875, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.505431890487671, |
|
"rewards/margins": 1.5832833051681519, |
|
"rewards/rejected": -4.088715553283691, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.2593559900296285e-07, |
|
"logits/chosen": -2.094245433807373, |
|
"logits/rejected": -2.0811045169830322, |
|
"logps/chosen": -486.9319763183594, |
|
"logps/rejected": -642.5628051757812, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.4053609371185303, |
|
"rewards/margins": 1.5519181489944458, |
|
"rewards/rejected": -3.9572792053222656, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.233228299494877e-07, |
|
"logits/chosen": -2.0389533042907715, |
|
"logits/rejected": -2.0309371948242188, |
|
"logps/chosen": -521.2991333007812, |
|
"logps/rejected": -643.174560546875, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.5973448753356934, |
|
"rewards/margins": 1.5660903453826904, |
|
"rewards/rejected": -4.1634345054626465, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.206730894041087e-07, |
|
"logits/chosen": -2.018083095550537, |
|
"logits/rejected": -2.0102462768554688, |
|
"logps/chosen": -516.707275390625, |
|
"logps/rejected": -674.5513305664062, |
|
"loss": 0.0141, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.5912764072418213, |
|
"rewards/margins": 1.785861611366272, |
|
"rewards/rejected": -4.377137660980225, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_logits/chosen": -2.055713176727295, |
|
"eval_logits/rejected": -2.0386719703674316, |
|
"eval_logps/chosen": -542.10400390625, |
|
"eval_logps/rejected": -661.8126220703125, |
|
"eval_loss": 0.02854282781481743, |
|
"eval_rewards/accuracies": 0.73828125, |
|
"eval_rewards/chosen": -2.850644111633301, |
|
"eval_rewards/margins": 1.1939489841461182, |
|
"eval_rewards/rejected": -4.044593334197998, |
|
"eval_runtime": 53.5348, |
|
"eval_samples_per_second": 37.359, |
|
"eval_steps_per_second": 0.598, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.1798694258296706e-07, |
|
"logits/chosen": -2.043966293334961, |
|
"logits/rejected": -2.039886713027954, |
|
"logps/chosen": -531.8103637695312, |
|
"logps/rejected": -674.5613403320312, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.680311918258667, |
|
"rewards/margins": 1.6952829360961914, |
|
"rewards/rejected": -4.375594615936279, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.1526496246802634e-07, |
|
"logits/chosen": -2.0284981727600098, |
|
"logits/rejected": -2.001023292541504, |
|
"logps/chosen": -568.5406494140625, |
|
"logps/rejected": -756.4666748046875, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.8695082664489746, |
|
"rewards/margins": 2.0917065143585205, |
|
"rewards/rejected": -4.961215019226074, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.1250772968485e-07, |
|
"logits/chosen": -1.9576244354248047, |
|
"logits/rejected": -1.9299163818359375, |
|
"logps/chosen": -606.5989990234375, |
|
"logps/rejected": -782.6019897460938, |
|
"loss": 0.0086, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.1009104251861572, |
|
"rewards/margins": 2.0748424530029297, |
|
"rewards/rejected": -5.175752639770508, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.097158323787479e-07, |
|
"logits/chosen": -2.0434844493865967, |
|
"logits/rejected": -2.0083999633789062, |
|
"logps/chosen": -652.4818725585938, |
|
"logps/rejected": -786.593017578125, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.216348171234131, |
|
"rewards/margins": 1.7867790460586548, |
|
"rewards/rejected": -5.0031280517578125, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.06889866089319e-07, |
|
"logits/chosen": -1.9963747262954712, |
|
"logits/rejected": -1.9636027812957764, |
|
"logps/chosen": -585.9913330078125, |
|
"logps/rejected": -745.9178466796875, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.0435433387756348, |
|
"rewards/margins": 1.9546563625335693, |
|
"rewards/rejected": -4.998199462890625, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.040304336234169e-07, |
|
"logits/chosen": -1.9372011423110962, |
|
"logits/rejected": -1.8980048894882202, |
|
"logps/chosen": -645.7947387695312, |
|
"logps/rejected": -784.5731201171875, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.4776313304901123, |
|
"rewards/margins": 1.725014328956604, |
|
"rewards/rejected": -5.202645778656006, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.0113814492656526e-07, |
|
"logits/chosen": -1.8695160150527954, |
|
"logits/rejected": -1.845237374305725, |
|
"logps/chosen": -598.3362426757812, |
|
"logps/rejected": -755.5512084960938, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.2383408546447754, |
|
"rewards/margins": 1.9676824808120728, |
|
"rewards/rejected": -5.206023216247559, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.982136169528502e-07, |
|
"logits/chosen": -1.9184964895248413, |
|
"logits/rejected": -1.8694953918457031, |
|
"logps/chosen": -607.3328857421875, |
|
"logps/rejected": -748.1498413085938, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.3264567852020264, |
|
"rewards/margins": 1.7603641748428345, |
|
"rewards/rejected": -5.08682107925415, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.952574735333174e-07, |
|
"logits/chosen": -1.915230393409729, |
|
"logits/rejected": -1.9003829956054688, |
|
"logps/chosen": -605.4636840820312, |
|
"logps/rejected": -782.3800048828125, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.3178393840789795, |
|
"rewards/margins": 2.0421977043151855, |
|
"rewards/rejected": -5.360037326812744, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.9227034524290384e-07, |
|
"logits/chosen": -1.8679676055908203, |
|
"logits/rejected": -1.8549559116363525, |
|
"logps/chosen": -601.2180786132812, |
|
"logps/rejected": -747.5027465820312, |
|
"loss": 0.008, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.252883195877075, |
|
"rewards/margins": 1.8659807443618774, |
|
"rewards/rejected": -5.118864059448242, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_logits/chosen": -1.9084383249282837, |
|
"eval_logits/rejected": -1.8927175998687744, |
|
"eval_logps/chosen": -627.9131469726562, |
|
"eval_logps/rejected": -756.0888061523438, |
|
"eval_loss": 0.02165357954800129, |
|
"eval_rewards/accuracies": 0.71484375, |
|
"eval_rewards/chosen": -3.7087349891662598, |
|
"eval_rewards/margins": 1.2786200046539307, |
|
"eval_rewards/rejected": -4.987355709075928, |
|
"eval_runtime": 53.5212, |
|
"eval_samples_per_second": 37.368, |
|
"eval_steps_per_second": 0.598, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.892528692659284e-07, |
|
"logits/chosen": -1.8823909759521484, |
|
"logits/rejected": -1.858875036239624, |
|
"logps/chosen": -603.7947998046875, |
|
"logps/rejected": -773.0620727539062, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.323474884033203, |
|
"rewards/margins": 1.7797801494598389, |
|
"rewards/rejected": -5.103255271911621, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.8620568926017527e-07, |
|
"logits/chosen": -1.9934885501861572, |
|
"logits/rejected": -1.9480845928192139, |
|
"logps/chosen": -520.8903198242188, |
|
"logps/rejected": -708.3568725585938, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.7871999740600586, |
|
"rewards/margins": 1.9104350805282593, |
|
"rewards/rejected": -4.697634696960449, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.8312945521959477e-07, |
|
"logits/chosen": -1.9462416172027588, |
|
"logits/rejected": -1.926812767982483, |
|
"logps/chosen": -583.6940307617188, |
|
"logps/rejected": -708.087646484375, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.0228469371795654, |
|
"rewards/margins": 1.6597932577133179, |
|
"rewards/rejected": -4.68264102935791, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.800248233356532e-07, |
|
"logits/chosen": -1.963945746421814, |
|
"logits/rejected": -1.9388370513916016, |
|
"logps/chosen": -555.5214233398438, |
|
"logps/rejected": -671.1959838867188, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.950378179550171, |
|
"rewards/margins": 1.3993643522262573, |
|
"rewards/rejected": -4.3497419357299805, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.7689245585736055e-07, |
|
"logits/chosen": -1.9961528778076172, |
|
"logits/rejected": -1.9523189067840576, |
|
"logps/chosen": -579.2855224609375, |
|
"logps/rejected": -735.2860717773438, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.101792812347412, |
|
"rewards/margins": 1.8292566537857056, |
|
"rewards/rejected": -4.931049823760986, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.7373302095000647e-07, |
|
"logits/chosen": -1.9783185720443726, |
|
"logits/rejected": -1.9432445764541626, |
|
"logps/chosen": -554.5430908203125, |
|
"logps/rejected": -705.3320922851562, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.950016736984253, |
|
"rewards/margins": 1.682476282119751, |
|
"rewards/rejected": -4.632493019104004, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.7054719255263365e-07, |
|
"logits/chosen": -1.9057737588882446, |
|
"logits/rejected": -1.878405213356018, |
|
"logps/chosen": -621.7274169921875, |
|
"logps/rejected": -788.4262084960938, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.1790566444396973, |
|
"rewards/margins": 2.1896862983703613, |
|
"rewards/rejected": -5.368742942810059, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.6733565023428005e-07, |
|
"logits/chosen": -1.8311294317245483, |
|
"logits/rejected": -1.7915903329849243, |
|
"logps/chosen": -733.3967895507812, |
|
"logps/rejected": -928.0328369140625, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.1054511070251465, |
|
"rewards/margins": 2.414816379547119, |
|
"rewards/rejected": -6.520268440246582, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.6409907904901995e-07, |
|
"logits/chosen": -1.6503093242645264, |
|
"logits/rejected": -1.648636817932129, |
|
"logps/chosen": -705.9769287109375, |
|
"logps/rejected": -930.3804931640625, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.372740745544434, |
|
"rewards/margins": 2.5504095554351807, |
|
"rewards/rejected": -6.923150539398193, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.60838169389835e-07, |
|
"logits/chosen": -1.6791608333587646, |
|
"logits/rejected": -1.6857120990753174, |
|
"logps/chosen": -699.28857421875, |
|
"logps/rejected": -1022.50390625, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.3519606590271, |
|
"rewards/margins": 3.293463945388794, |
|
"rewards/rejected": -7.645425319671631, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_logits/chosen": -1.7102997303009033, |
|
"eval_logits/rejected": -1.7007324695587158, |
|
"eval_logps/chosen": -746.397705078125, |
|
"eval_logps/rejected": -898.7281494140625, |
|
"eval_loss": 0.013453745283186436, |
|
"eval_rewards/accuracies": 0.7109375, |
|
"eval_rewards/chosen": -4.893580436706543, |
|
"eval_rewards/margins": 1.520168423652649, |
|
"eval_rewards/rejected": -6.413749694824219, |
|
"eval_runtime": 53.4459, |
|
"eval_samples_per_second": 37.421, |
|
"eval_steps_per_second": 0.599, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.575536168413468e-07, |
|
"logits/chosen": -1.7461280822753906, |
|
"logits/rejected": -1.7247556447982788, |
|
"logps/chosen": -724.4855346679688, |
|
"logps/rejected": -1026.6610107421875, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.33648157119751, |
|
"rewards/margins": 3.390894651412964, |
|
"rewards/rejected": -7.7273759841918945, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.5424612203144133e-07, |
|
"logits/chosen": -1.8383598327636719, |
|
"logits/rejected": -1.7824443578720093, |
|
"logps/chosen": -684.1734619140625, |
|
"logps/rejected": -926.4085083007812, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.0599045753479, |
|
"rewards/margins": 2.5672054290771484, |
|
"rewards/rejected": -6.627109527587891, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.509163904818184e-07, |
|
"logits/chosen": -1.7701866626739502, |
|
"logits/rejected": -1.7341197729110718, |
|
"logps/chosen": -706.99755859375, |
|
"logps/rejected": -913.2882690429688, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.2013092041015625, |
|
"rewards/margins": 2.450648307800293, |
|
"rewards/rejected": -6.651957035064697, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.475651324574965e-07, |
|
"logits/chosen": -1.7568556070327759, |
|
"logits/rejected": -1.7366060018539429, |
|
"logps/chosen": -715.1029052734375, |
|
"logps/rejected": -985.1531372070312, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.274824142456055, |
|
"rewards/margins": 2.7921059131622314, |
|
"rewards/rejected": -7.066928863525391, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.441930628153062e-07, |
|
"logits/chosen": -1.8146743774414062, |
|
"logits/rejected": -1.7908170223236084, |
|
"logps/chosen": -689.2205810546875, |
|
"logps/rejected": -930.24755859375, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.081226348876953, |
|
"rewards/margins": 2.699859142303467, |
|
"rewards/rejected": -6.781085014343262, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.408009008514047e-07, |
|
"logits/chosen": -1.7501418590545654, |
|
"logits/rejected": -1.734694242477417, |
|
"logps/chosen": -666.003173828125, |
|
"logps/rejected": -942.46875, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.122848033905029, |
|
"rewards/margins": 2.9047844409942627, |
|
"rewards/rejected": -7.0276312828063965, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.373893701478417e-07, |
|
"logits/chosen": -1.7783453464508057, |
|
"logits/rejected": -1.7449932098388672, |
|
"logps/chosen": -720.6685791015625, |
|
"logps/rejected": -954.2274169921875, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.406303882598877, |
|
"rewards/margins": 2.643493175506592, |
|
"rewards/rejected": -7.049797058105469, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.3395919841821273e-07, |
|
"logits/chosen": -1.775618314743042, |
|
"logits/rejected": -1.7782745361328125, |
|
"logps/chosen": -652.4534301757812, |
|
"logps/rejected": -916.9478759765625, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.122664451599121, |
|
"rewards/margins": 2.6266403198242188, |
|
"rewards/rejected": -6.74930477142334, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.3051111735243065e-07, |
|
"logits/chosen": -1.7821722030639648, |
|
"logits/rejected": -1.7939774990081787, |
|
"logps/chosen": -759.48876953125, |
|
"logps/rejected": -1047.860595703125, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.437527656555176, |
|
"rewards/margins": 2.979461431503296, |
|
"rewards/rejected": -7.416989326477051, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.2704586246064874e-07, |
|
"logits/chosen": -1.7154964208602905, |
|
"logits/rejected": -1.7195425033569336, |
|
"logps/chosen": -699.4700927734375, |
|
"logps/rejected": -1012.7001953125, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.2401862144470215, |
|
"rewards/margins": 2.9638888835906982, |
|
"rewards/rejected": -7.204075813293457, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_logits/chosen": -1.7490020990371704, |
|
"eval_logits/rejected": -1.7341245412826538, |
|
"eval_logps/chosen": -743.7908935546875, |
|
"eval_logps/rejected": -901.4539184570312, |
|
"eval_loss": 0.013995984569191933, |
|
"eval_rewards/accuracies": 0.71875, |
|
"eval_rewards/chosen": -4.8675127029418945, |
|
"eval_rewards/margins": 1.5734941959381104, |
|
"eval_rewards/rejected": -6.441006660461426, |
|
"eval_runtime": 53.4759, |
|
"eval_samples_per_second": 37.4, |
|
"eval_steps_per_second": 0.598, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.235641729163688e-07, |
|
"logits/chosen": -1.7461427450180054, |
|
"logits/rejected": -1.7461963891983032, |
|
"logps/chosen": -658.4989624023438, |
|
"logps/rejected": -929.7806396484375, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.278120517730713, |
|
"rewards/margins": 2.607133388519287, |
|
"rewards/rejected": -6.88525390625, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.200667913987685e-07, |
|
"logits/chosen": -1.7538375854492188, |
|
"logits/rejected": -1.770926833152771, |
|
"logps/chosen": -747.4215698242188, |
|
"logps/rejected": -1015.4489135742188, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.550160884857178, |
|
"rewards/margins": 2.916888475418091, |
|
"rewards/rejected": -7.467050075531006, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.1655446393427994e-07, |
|
"logits/chosen": -1.7389395236968994, |
|
"logits/rejected": -1.7241191864013672, |
|
"logps/chosen": -722.9381713867188, |
|
"logps/rejected": -993.43310546875, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.401251792907715, |
|
"rewards/margins": 2.9433751106262207, |
|
"rewards/rejected": -7.344627380371094, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.1302793973745545e-07, |
|
"logits/chosen": -1.6942611932754517, |
|
"logits/rejected": -1.6644207239151, |
|
"logps/chosen": -716.7203369140625, |
|
"logps/rejected": -944.3639526367188, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.35445499420166, |
|
"rewards/margins": 2.5589027404785156, |
|
"rewards/rejected": -6.913357734680176, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.09487971051152e-07, |
|
"logits/chosen": -1.7815608978271484, |
|
"logits/rejected": -1.7478649616241455, |
|
"logps/chosen": -695.1300048828125, |
|
"logps/rejected": -931.83349609375, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -4.2698163986206055, |
|
"rewards/margins": 2.59077787399292, |
|
"rewards/rejected": -6.860594272613525, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.059353129860707e-07, |
|
"logits/chosen": -1.7786239385604858, |
|
"logits/rejected": -1.7215652465820312, |
|
"logps/chosen": -717.4598388671875, |
|
"logps/rejected": -1013.67333984375, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.2682695388793945, |
|
"rewards/margins": 3.1084537506103516, |
|
"rewards/rejected": -7.376723289489746, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.023707233596837e-07, |
|
"logits/chosen": -1.7982879877090454, |
|
"logits/rejected": -1.7658650875091553, |
|
"logps/chosen": -717.1801147460938, |
|
"logps/rejected": -959.96826171875, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -4.4875640869140625, |
|
"rewards/margins": 2.68966007232666, |
|
"rewards/rejected": -7.177224159240723, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.987949625345847e-07, |
|
"logits/chosen": -1.7966346740722656, |
|
"logits/rejected": -1.7595239877700806, |
|
"logps/chosen": -714.2938842773438, |
|
"logps/rejected": -922.5560302734375, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.251219272613525, |
|
"rewards/margins": 2.640549421310425, |
|
"rewards/rejected": -6.891767978668213, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.9520879325629573e-07, |
|
"logits/chosen": -1.7265571355819702, |
|
"logits/rejected": -1.7147538661956787, |
|
"logps/chosen": -687.865478515625, |
|
"logps/rejected": -943.00732421875, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.247757911682129, |
|
"rewards/margins": 2.7146894931793213, |
|
"rewards/rejected": -6.962446689605713, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.9161298049056566e-07, |
|
"logits/chosen": -1.7799193859100342, |
|
"logits/rejected": -1.7424662113189697, |
|
"logps/chosen": -738.4921264648438, |
|
"logps/rejected": -971.390625, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.48504114151001, |
|
"rewards/margins": 2.599909543991089, |
|
"rewards/rejected": -7.0849504470825195, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_logits/chosen": -1.7312792539596558, |
|
"eval_logits/rejected": -1.7194280624389648, |
|
"eval_logps/chosen": -771.3602905273438, |
|
"eval_logps/rejected": -933.1905517578125, |
|
"eval_loss": 0.012777678668498993, |
|
"eval_rewards/accuracies": 0.71875, |
|
"eval_rewards/chosen": -5.143206596374512, |
|
"eval_rewards/margins": 1.6151670217514038, |
|
"eval_rewards/rejected": -6.758373737335205, |
|
"eval_runtime": 53.5315, |
|
"eval_samples_per_second": 37.361, |
|
"eval_steps_per_second": 0.598, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.880082912601959e-07, |
|
"logits/chosen": -1.7262108325958252, |
|
"logits/rejected": -1.6596851348876953, |
|
"logps/chosen": -761.3540649414062, |
|
"logps/rejected": -1040.51025390625, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.747711181640625, |
|
"rewards/margins": 2.8989500999450684, |
|
"rewards/rejected": -7.646661281585693, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.843954944814264e-07, |
|
"logits/chosen": -1.7600305080413818, |
|
"logits/rejected": -1.7589460611343384, |
|
"logps/chosen": -708.2618408203125, |
|
"logps/rejected": -1050.302490234375, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.267254829406738, |
|
"rewards/margins": 3.1916282176971436, |
|
"rewards/rejected": -7.458882808685303, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.807753607999181e-07, |
|
"logits/chosen": -1.7761443853378296, |
|
"logits/rejected": -1.7351748943328857, |
|
"logps/chosen": -744.0849609375, |
|
"logps/rejected": -1033.985107421875, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.498464584350586, |
|
"rewards/margins": 3.0137176513671875, |
|
"rewards/rejected": -7.512181282043457, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.771486624263668e-07, |
|
"logits/chosen": -1.7408230304718018, |
|
"logits/rejected": -1.719871163368225, |
|
"logps/chosen": -746.6265869140625, |
|
"logps/rejected": -1010.0968627929688, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.605108737945557, |
|
"rewards/margins": 2.901407480239868, |
|
"rewards/rejected": -7.5065155029296875, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.735161729717825e-07, |
|
"logits/chosen": -1.7129913568496704, |
|
"logits/rejected": -1.703195571899414, |
|
"logps/chosen": -760.1422119140625, |
|
"logps/rejected": -980.7059326171875, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.600218772888184, |
|
"rewards/margins": 2.6326255798339844, |
|
"rewards/rejected": -7.232844352722168, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.6987866728247084e-07, |
|
"logits/chosen": -1.7227998971939087, |
|
"logits/rejected": -1.6779905557632446, |
|
"logps/chosen": -689.9454956054688, |
|
"logps/rejected": -951.5263671875, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.222928047180176, |
|
"rewards/margins": 2.8188321590423584, |
|
"rewards/rejected": -7.041760444641113, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.6623692127475047e-07, |
|
"logits/chosen": -1.7666009664535522, |
|
"logits/rejected": -1.7336463928222656, |
|
"logps/chosen": -769.6700439453125, |
|
"logps/rejected": -1016.1780395507812, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.742946147918701, |
|
"rewards/margins": 2.6120924949645996, |
|
"rewards/rejected": -7.355038642883301, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.625917117694422e-07, |
|
"logits/chosen": -1.7655175924301147, |
|
"logits/rejected": -1.705690622329712, |
|
"logps/chosen": -751.56005859375, |
|
"logps/rejected": -1006.73095703125, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -4.676351070404053, |
|
"rewards/margins": 2.846137523651123, |
|
"rewards/rejected": -7.522488594055176, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.589438163261656e-07, |
|
"logits/chosen": -1.7569564580917358, |
|
"logits/rejected": -1.7242019176483154, |
|
"logps/chosen": -706.7811279296875, |
|
"logps/rejected": -989.7742309570312, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.494095802307129, |
|
"rewards/margins": 2.980239152908325, |
|
"rewards/rejected": -7.474334716796875, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.5529401307747763e-07, |
|
"logits/chosen": -1.7985742092132568, |
|
"logits/rejected": -1.783491849899292, |
|
"logps/chosen": -726.0729370117188, |
|
"logps/rejected": -954.61474609375, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -4.670909881591797, |
|
"rewards/margins": 2.3948566913604736, |
|
"rewards/rejected": -7.06576681137085, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_logits/chosen": -1.7387232780456543, |
|
"eval_logits/rejected": -1.728265404701233, |
|
"eval_logps/chosen": -777.9801635742188, |
|
"eval_logps/rejected": -937.86376953125, |
|
"eval_loss": 0.01256338506937027, |
|
"eval_rewards/accuracies": 0.72265625, |
|
"eval_rewards/chosen": -5.209405422210693, |
|
"eval_rewards/margins": 1.5956991910934448, |
|
"eval_rewards/rejected": -6.805105209350586, |
|
"eval_runtime": 53.4938, |
|
"eval_samples_per_second": 37.388, |
|
"eval_steps_per_second": 0.598, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.516430805628891e-07, |
|
"logits/chosen": -1.7851413488388062, |
|
"logits/rejected": -1.7629187107086182, |
|
"logps/chosen": -695.8529663085938, |
|
"logps/rejected": -926.6959228515625, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.3710784912109375, |
|
"rewards/margins": 2.5725345611572266, |
|
"rewards/rejected": -6.943613529205322, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.479917975627949e-07, |
|
"logits/chosen": -1.7919896841049194, |
|
"logits/rejected": -1.7884200811386108, |
|
"logps/chosen": -709.2462158203125, |
|
"logps/rejected": -874.7109375, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.343535900115967, |
|
"rewards/margins": 2.22951078414917, |
|
"rewards/rejected": -6.573046684265137, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.443409429323515e-07, |
|
"logits/chosen": -1.8116176128387451, |
|
"logits/rejected": -1.8125156164169312, |
|
"logps/chosen": -657.2221069335938, |
|
"logps/rejected": -929.5003662109375, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.041338920593262, |
|
"rewards/margins": 2.5582714080810547, |
|
"rewards/rejected": -6.599610805511475, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.406912954353401e-07, |
|
"logits/chosen": -1.843414545059204, |
|
"logits/rejected": -1.7798106670379639, |
|
"logps/chosen": -725.5671997070312, |
|
"logps/rejected": -1016.5926513671875, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.255373001098633, |
|
"rewards/margins": 3.3176467418670654, |
|
"rewards/rejected": -7.573018550872803, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.3704363357804809e-07, |
|
"logits/chosen": -1.7281402349472046, |
|
"logits/rejected": -1.6958348751068115, |
|
"logps/chosen": -756.3338623046875, |
|
"logps/rejected": -976.4304809570312, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -4.633506774902344, |
|
"rewards/margins": 2.725268840789795, |
|
"rewards/rejected": -7.3587751388549805, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.3339873544320642e-07, |
|
"logits/chosen": -1.7286088466644287, |
|
"logits/rejected": -1.6789947748184204, |
|
"logps/chosen": -724.1974487304688, |
|
"logps/rejected": -990.0857543945312, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -4.554182529449463, |
|
"rewards/margins": 2.831148624420166, |
|
"rewards/rejected": -7.385331153869629, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.2975737852401617e-07, |
|
"logits/chosen": -1.7173970937728882, |
|
"logits/rejected": -1.711610198020935, |
|
"logps/chosen": -659.4908447265625, |
|
"logps/rejected": -941.1331787109375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.062614440917969, |
|
"rewards/margins": 3.0739612579345703, |
|
"rewards/rejected": -7.136574745178223, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.2612033955830165e-07, |
|
"logits/chosen": -1.77591073513031, |
|
"logits/rejected": -1.7117664813995361, |
|
"logps/chosen": -739.63916015625, |
|
"logps/rejected": -996.9134521484375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -4.666342258453369, |
|
"rewards/margins": 2.720090389251709, |
|
"rewards/rejected": -7.386431694030762, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.2248839436282463e-07, |
|
"logits/chosen": -1.7224317789077759, |
|
"logits/rejected": -1.7141587734222412, |
|
"logps/chosen": -696.9925537109375, |
|
"logps/rejected": -907.7872924804688, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -4.601344108581543, |
|
"rewards/margins": 2.2717630863189697, |
|
"rewards/rejected": -6.873106956481934, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.1886231766779493e-07, |
|
"logits/chosen": -1.6640018224716187, |
|
"logits/rejected": -1.6649608612060547, |
|
"logps/chosen": -721.0974731445312, |
|
"logps/rejected": -1002.4971923828125, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.662453651428223, |
|
"rewards/margins": 3.1194331645965576, |
|
"rewards/rejected": -7.781886100769043, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_logits/chosen": -1.7185314893722534, |
|
"eval_logits/rejected": -1.7055809497833252, |
|
"eval_logps/chosen": -788.2789916992188, |
|
"eval_logps/rejected": -952.6434326171875, |
|
"eval_loss": 0.01262175664305687, |
|
"eval_rewards/accuracies": 0.71484375, |
|
"eval_rewards/chosen": -5.3123931884765625, |
|
"eval_rewards/margins": 1.6405082941055298, |
|
"eval_rewards/rejected": -6.952902317047119, |
|
"eval_runtime": 53.5177, |
|
"eval_samples_per_second": 37.371, |
|
"eval_steps_per_second": 0.598, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 2.1524288295161236e-07, |
|
"logits/chosen": -1.756303071975708, |
|
"logits/rejected": -1.7098352909088135, |
|
"logps/chosen": -754.4605712890625, |
|
"logps/rejected": -1028.431884765625, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.748194694519043, |
|
"rewards/margins": 2.8807990550994873, |
|
"rewards/rejected": -7.628994941711426, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.1163086227587633e-07, |
|
"logits/chosen": -1.7816880941390991, |
|
"logits/rejected": -1.742637276649475, |
|
"logps/chosen": -663.7523193359375, |
|
"logps/rejected": -954.9913940429688, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.258275032043457, |
|
"rewards/margins": 2.9675872325897217, |
|
"rewards/rejected": -7.2258620262146, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.0802702612069733e-07, |
|
"logits/chosen": -1.7918590307235718, |
|
"logits/rejected": -1.783734917640686, |
|
"logps/chosen": -742.8740844726562, |
|
"logps/rejected": -1052.7213134765625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.428332805633545, |
|
"rewards/margins": 3.271416425704956, |
|
"rewards/rejected": -7.699748992919922, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 2.044321432203457e-07, |
|
"logits/chosen": -1.7414076328277588, |
|
"logits/rejected": -1.684735655784607, |
|
"logps/chosen": -707.7474365234375, |
|
"logps/rejected": -1023.4807739257812, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.4503278732299805, |
|
"rewards/margins": 3.295131206512451, |
|
"rewards/rejected": -7.745459079742432, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 2.0084698039927336e-07, |
|
"logits/chosen": -1.6829373836517334, |
|
"logits/rejected": -1.672227144241333, |
|
"logps/chosen": -742.3206176757812, |
|
"logps/rejected": -1008.7132568359375, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.7719221115112305, |
|
"rewards/margins": 2.770425796508789, |
|
"rewards/rejected": -7.542346954345703, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.972723024085419e-07, |
|
"logits/chosen": -1.6940473318099976, |
|
"logits/rejected": -1.6489845514297485, |
|
"logps/chosen": -746.0736083984375, |
|
"logps/rejected": -1049.7562255859375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.929628372192383, |
|
"rewards/margins": 3.2463130950927734, |
|
"rewards/rejected": -8.175942420959473, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.937088717626938e-07, |
|
"logits/chosen": -1.6772867441177368, |
|
"logits/rejected": -1.6213195323944092, |
|
"logps/chosen": -778.2080078125, |
|
"logps/rejected": -1094.538818359375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.760254859924316, |
|
"rewards/margins": 3.4461236000061035, |
|
"rewards/rejected": -8.206377983093262, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.9015744857710058e-07, |
|
"logits/chosen": -1.7073795795440674, |
|
"logits/rejected": -1.665784239768982, |
|
"logps/chosen": -780.5112915039062, |
|
"logps/rejected": -1081.841064453125, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.835232734680176, |
|
"rewards/margins": 3.268048048019409, |
|
"rewards/rejected": -8.103281021118164, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.866187904058225e-07, |
|
"logits/chosen": -1.6878963708877563, |
|
"logits/rejected": -1.671128273010254, |
|
"logps/chosen": -782.0076904296875, |
|
"logps/rejected": -1065.0914306640625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.955259799957275, |
|
"rewards/margins": 3.111586570739746, |
|
"rewards/rejected": -8.06684684753418, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.8309365208001454e-07, |
|
"logits/chosen": -1.7943239212036133, |
|
"logits/rejected": -1.726576805114746, |
|
"logps/chosen": -825.5764770507812, |
|
"logps/rejected": -1124.283203125, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -5.114717960357666, |
|
"rewards/margins": 3.370556592941284, |
|
"rewards/rejected": -8.485274314880371, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_logits/chosen": -1.6833648681640625, |
|
"eval_logits/rejected": -1.67068350315094, |
|
"eval_logps/chosen": -820.9805908203125, |
|
"eval_logps/rejected": -994.1813354492188, |
|
"eval_loss": 0.011294456198811531, |
|
"eval_rewards/accuracies": 0.71875, |
|
"eval_rewards/chosen": -5.639409065246582, |
|
"eval_rewards/margins": 1.7288715839385986, |
|
"eval_rewards/rejected": -7.36828088760376, |
|
"eval_runtime": 53.5514, |
|
"eval_samples_per_second": 37.347, |
|
"eval_steps_per_second": 0.598, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.7958278554691365e-07, |
|
"logits/chosen": -1.6487672328948975, |
|
"logits/rejected": -1.6482921838760376, |
|
"logps/chosen": -772.1353759765625, |
|
"logps/rejected": -1089.970947265625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.103058338165283, |
|
"rewards/margins": 3.2237308025360107, |
|
"rewards/rejected": -8.326788902282715, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.7608693970944054e-07, |
|
"logits/chosen": -1.6341720819473267, |
|
"logits/rejected": -1.5817056894302368, |
|
"logps/chosen": -762.2053833007812, |
|
"logps/rejected": -1059.0550537109375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.853798866271973, |
|
"rewards/margins": 3.330787181854248, |
|
"rewards/rejected": -8.184585571289062, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.7260686026645187e-07, |
|
"logits/chosen": -1.7103843688964844, |
|
"logits/rejected": -1.638262152671814, |
|
"logps/chosen": -787.9263916015625, |
|
"logps/rejected": -1095.874755859375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -5.030910491943359, |
|
"rewards/margins": 3.4319870471954346, |
|
"rewards/rejected": -8.462898254394531, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.691432895536744e-07, |
|
"logits/chosen": -1.7237293720245361, |
|
"logits/rejected": -1.6708977222442627, |
|
"logps/chosen": -775.9380493164062, |
|
"logps/rejected": -1050.0135498046875, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.887418746948242, |
|
"rewards/margins": 3.1419596672058105, |
|
"rewards/rejected": -8.029378890991211, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.6569696638535773e-07, |
|
"logits/chosen": -1.771658182144165, |
|
"logits/rejected": -1.7095615863800049, |
|
"logps/chosen": -704.629638671875, |
|
"logps/rejected": -945.61083984375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -4.571307182312012, |
|
"rewards/margins": 2.683262825012207, |
|
"rewards/rejected": -7.254570960998535, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.6226862589667805e-07, |
|
"logits/chosen": -1.7286535501480103, |
|
"logits/rejected": -1.6768054962158203, |
|
"logps/chosen": -729.1044311523438, |
|
"logps/rejected": -1025.552978515625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.595149040222168, |
|
"rewards/margins": 3.0903663635253906, |
|
"rewards/rejected": -7.685515403747559, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.5885899938692598e-07, |
|
"logits/chosen": -1.6558005809783936, |
|
"logits/rejected": -1.6638015508651733, |
|
"logps/chosen": -760.5770263671875, |
|
"logps/rejected": -1100.409423828125, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.9419145584106445, |
|
"rewards/margins": 3.352973222732544, |
|
"rewards/rejected": -8.29488754272461, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.5546881416351385e-07, |
|
"logits/chosen": -1.7855870723724365, |
|
"logits/rejected": -1.7059357166290283, |
|
"logps/chosen": -833.2413940429688, |
|
"logps/rejected": -1135.54150390625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.167782783508301, |
|
"rewards/margins": 3.3692002296447754, |
|
"rewards/rejected": -8.536982536315918, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.520987933868329e-07, |
|
"logits/chosen": -1.7322263717651367, |
|
"logits/rejected": -1.6987117528915405, |
|
"logps/chosen": -727.4354858398438, |
|
"logps/rejected": -1017.2150268554688, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.652617454528809, |
|
"rewards/margins": 3.2251994609832764, |
|
"rewards/rejected": -7.8778181076049805, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.487496559159964e-07, |
|
"logits/chosen": -1.7461373805999756, |
|
"logits/rejected": -1.6696666479110718, |
|
"logps/chosen": -841.6943359375, |
|
"logps/rejected": -1117.2109375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -5.342791557312012, |
|
"rewards/margins": 3.175142765045166, |
|
"rewards/rejected": -8.517934799194336, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_logits/chosen": -1.6823374032974243, |
|
"eval_logits/rejected": -1.6691102981567383, |
|
"eval_logps/chosen": -821.126953125, |
|
"eval_logps/rejected": -993.9129638671875, |
|
"eval_loss": 0.011498243547976017, |
|
"eval_rewards/accuracies": 0.72265625, |
|
"eval_rewards/chosen": -5.640872955322266, |
|
"eval_rewards/margins": 1.724724292755127, |
|
"eval_rewards/rejected": -7.365597248077393, |
|
"eval_runtime": 53.4958, |
|
"eval_samples_per_second": 37.386, |
|
"eval_steps_per_second": 0.598, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.454221161554999e-07, |
|
"logits/chosen": -1.7031192779541016, |
|
"logits/rejected": -1.6780424118041992, |
|
"logps/chosen": -852.6624145507812, |
|
"logps/rejected": -1154.143310546875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -5.397541522979736, |
|
"rewards/margins": 3.214184284210205, |
|
"rewards/rejected": -8.611724853515625, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.4211688390283156e-07, |
|
"logits/chosen": -1.6724073886871338, |
|
"logits/rejected": -1.6677013635635376, |
|
"logps/chosen": -755.3863525390625, |
|
"logps/rejected": -1074.7703857421875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.910325050354004, |
|
"rewards/margins": 3.2198894023895264, |
|
"rewards/rejected": -8.130215644836426, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.388346641970648e-07, |
|
"logits/chosen": -1.6422914266586304, |
|
"logits/rejected": -1.6382758617401123, |
|
"logps/chosen": -762.4996337890625, |
|
"logps/rejected": -1091.12109375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.841983318328857, |
|
"rewards/margins": 3.3933486938476562, |
|
"rewards/rejected": -8.235331535339355, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.3557615716846724e-07, |
|
"logits/chosen": -1.6859352588653564, |
|
"logits/rejected": -1.656206488609314, |
|
"logps/chosen": -798.762939453125, |
|
"logps/rejected": -1134.86474609375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.0587053298950195, |
|
"rewards/margins": 3.6833178997039795, |
|
"rewards/rejected": -8.742023468017578, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.3234205788915503e-07, |
|
"logits/chosen": -1.6807887554168701, |
|
"logits/rejected": -1.6358258724212646, |
|
"logps/chosen": -771.5270385742188, |
|
"logps/rejected": -1112.8138427734375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -5.122917175292969, |
|
"rewards/margins": 3.5536727905273438, |
|
"rewards/rejected": -8.676589012145996, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.2913305622482773e-07, |
|
"logits/chosen": -1.707622766494751, |
|
"logits/rejected": -1.6326669454574585, |
|
"logps/chosen": -774.7271728515625, |
|
"logps/rejected": -1105.3125, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -5.121769905090332, |
|
"rewards/margins": 3.41931414604187, |
|
"rewards/rejected": -8.541084289550781, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.2594983668761285e-07, |
|
"logits/chosen": -1.6661421060562134, |
|
"logits/rejected": -1.6621348857879639, |
|
"logps/chosen": -807.9115600585938, |
|
"logps/rejected": -1087.288818359375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -5.170226097106934, |
|
"rewards/margins": 3.2703235149383545, |
|
"rewards/rejected": -8.440549850463867, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.227930782900522e-07, |
|
"logits/chosen": -1.6998815536499023, |
|
"logits/rejected": -1.6739810705184937, |
|
"logps/chosen": -771.7286987304688, |
|
"logps/rejected": -1018.63720703125, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -4.855947017669678, |
|
"rewards/margins": 2.938450336456299, |
|
"rewards/rejected": -7.794397830963135, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.1966345440026236e-07, |
|
"logits/chosen": -1.6453163623809814, |
|
"logits/rejected": -1.64557683467865, |
|
"logps/chosen": -674.52099609375, |
|
"logps/rejected": -1037.59228515625, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.500472545623779, |
|
"rewards/margins": 3.383714199066162, |
|
"rewards/rejected": -7.8841872215271, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.1656163259829779e-07, |
|
"logits/chosen": -1.7794278860092163, |
|
"logits/rejected": -1.75070321559906, |
|
"logps/chosen": -760.6216430664062, |
|
"logps/rejected": -1099.56689453125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.979386806488037, |
|
"rewards/margins": 3.331554412841797, |
|
"rewards/rejected": -8.310940742492676, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_logits/chosen": -1.672732949256897, |
|
"eval_logits/rejected": -1.6579583883285522, |
|
"eval_logps/chosen": -825.9682006835938, |
|
"eval_logps/rejected": -1002.9027099609375, |
|
"eval_loss": 0.011379951611161232, |
|
"eval_rewards/accuracies": 0.72265625, |
|
"eval_rewards/chosen": -5.689286231994629, |
|
"eval_rewards/margins": 1.7662086486816406, |
|
"eval_rewards/rejected": -7.4554948806762695, |
|
"eval_runtime": 53.4845, |
|
"eval_samples_per_second": 37.394, |
|
"eval_steps_per_second": 0.598, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.134882745337504e-07, |
|
"logits/chosen": -1.654252052307129, |
|
"logits/rejected": -1.5575919151306152, |
|
"logps/chosen": -801.1277465820312, |
|
"logps/rejected": -1077.87109375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.162522315979004, |
|
"rewards/margins": 3.2653865814208984, |
|
"rewards/rejected": -8.427908897399902, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.1044403578461175e-07, |
|
"logits/chosen": -1.725961446762085, |
|
"logits/rejected": -1.6640355587005615, |
|
"logps/chosen": -797.0972900390625, |
|
"logps/rejected": -1085.770751953125, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -5.192586421966553, |
|
"rewards/margins": 3.1663448810577393, |
|
"rewards/rejected": -8.358930587768555, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 1.0742956571743292e-07, |
|
"logits/chosen": -1.6532520055770874, |
|
"logits/rejected": -1.6114171743392944, |
|
"logps/chosen": -777.9821166992188, |
|
"logps/rejected": -1123.428955078125, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.027825355529785, |
|
"rewards/margins": 3.5974502563476562, |
|
"rewards/rejected": -8.625275611877441, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.0444550734880711e-07, |
|
"logits/chosen": -1.6810929775238037, |
|
"logits/rejected": -1.6335642337799072, |
|
"logps/chosen": -830.5534057617188, |
|
"logps/rejected": -1137.841796875, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -5.260356903076172, |
|
"rewards/margins": 3.5155322551727295, |
|
"rewards/rejected": -8.77588939666748, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.0149249720820869e-07, |
|
"logits/chosen": -1.7187340259552002, |
|
"logits/rejected": -1.6748294830322266, |
|
"logps/chosen": -701.11767578125, |
|
"logps/rejected": -961.9978637695312, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.461955547332764, |
|
"rewards/margins": 2.819507598876953, |
|
"rewards/rejected": -7.281462669372559, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 9.857116520221456e-08, |
|
"logits/chosen": -1.773799180984497, |
|
"logits/rejected": -1.7349733114242554, |
|
"logps/chosen": -776.501953125, |
|
"logps/rejected": -1053.7852783203125, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.631603240966797, |
|
"rewards/margins": 3.281996250152588, |
|
"rewards/rejected": -7.913599967956543, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 9.56821344801387e-08, |
|
"logits/chosen": -1.6899513006210327, |
|
"logits/rejected": -1.6103346347808838, |
|
"logps/chosen": -769.49267578125, |
|
"logps/rejected": -1103.720703125, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.78816032409668, |
|
"rewards/margins": 3.4869580268859863, |
|
"rewards/rejected": -8.275118827819824, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 9.282602130110875e-08, |
|
"logits/chosen": -1.7122066020965576, |
|
"logits/rejected": -1.6189101934432983, |
|
"logps/chosen": -811.09619140625, |
|
"logps/rejected": -1047.5286865234375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -5.191458702087402, |
|
"rewards/margins": 2.880140781402588, |
|
"rewards/rejected": -8.071599960327148, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 9.000343490261095e-08, |
|
"logits/chosen": -1.69900381565094, |
|
"logits/rejected": -1.6344228982925415, |
|
"logps/chosen": -812.76806640625, |
|
"logps/rejected": -1108.335205078125, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.10341739654541, |
|
"rewards/margins": 3.4086670875549316, |
|
"rewards/rejected": -8.512084007263184, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 8.721497737053479e-08, |
|
"logits/chosen": -1.6052570343017578, |
|
"logits/rejected": -1.5805437564849854, |
|
"logps/chosen": -735.450439453125, |
|
"logps/rejected": -983.8560791015625, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.932892799377441, |
|
"rewards/margins": 2.790759325027466, |
|
"rewards/rejected": -7.723651885986328, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_logits/chosen": -1.6619986295700073, |
|
"eval_logits/rejected": -1.6466898918151855, |
|
"eval_logps/chosen": -832.3765869140625, |
|
"eval_logps/rejected": -1010.2193603515625, |
|
"eval_loss": 0.011269805952906609, |
|
"eval_rewards/accuracies": 0.72265625, |
|
"eval_rewards/chosen": -5.753369331359863, |
|
"eval_rewards/margins": 1.7752916812896729, |
|
"eval_rewards/rejected": -7.528660774230957, |
|
"eval_runtime": 53.4996, |
|
"eval_samples_per_second": 37.383, |
|
"eval_steps_per_second": 0.598, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 8.446124351074086e-08, |
|
"logits/chosen": -1.7419313192367554, |
|
"logits/rejected": -1.6705381870269775, |
|
"logps/chosen": -745.1817626953125, |
|
"logps/rejected": -1017.2252197265625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.87359619140625, |
|
"rewards/margins": 3.0035781860351562, |
|
"rewards/rejected": -7.877175331115723, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 8.174282072218383e-08, |
|
"logits/chosen": -1.6777490377426147, |
|
"logits/rejected": -1.6540842056274414, |
|
"logps/chosen": -742.0955810546875, |
|
"logps/rejected": -1138.681396484375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.766180515289307, |
|
"rewards/margins": 4.033736705780029, |
|
"rewards/rejected": -8.799917221069336, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 7.906028887161359e-08, |
|
"logits/chosen": -1.7269493341445923, |
|
"logits/rejected": -1.6654075384140015, |
|
"logps/chosen": -784.79638671875, |
|
"logps/rejected": -1125.8931884765625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.131833076477051, |
|
"rewards/margins": 3.476910352706909, |
|
"rewards/rejected": -8.608742713928223, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 7.641422016988458e-08, |
|
"logits/chosen": -1.6880807876586914, |
|
"logits/rejected": -1.6409103870391846, |
|
"logps/chosen": -846.4464721679688, |
|
"logps/rejected": -1123.95263671875, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.053952693939209, |
|
"rewards/margins": 3.477269411087036, |
|
"rewards/rejected": -8.531222343444824, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 7.380517904989697e-08, |
|
"logits/chosen": -1.6753791570663452, |
|
"logits/rejected": -1.6205085515975952, |
|
"logps/chosen": -769.8165283203125, |
|
"logps/rejected": -1126.5391845703125, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.965677261352539, |
|
"rewards/margins": 3.6328155994415283, |
|
"rewards/rejected": -8.598493576049805, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 7.123372204619782e-08, |
|
"logits/chosen": -1.6212303638458252, |
|
"logits/rejected": -1.5875134468078613, |
|
"logps/chosen": -775.5635986328125, |
|
"logps/rejected": -1059.912109375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.037333965301514, |
|
"rewards/margins": 3.2719006538391113, |
|
"rewards/rejected": -8.309233665466309, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 6.870039767626628e-08, |
|
"logits/chosen": -1.7232681512832642, |
|
"logits/rejected": -1.6719175577163696, |
|
"logps/chosen": -833.2469482421875, |
|
"logps/rejected": -1081.467529296875, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -5.353999137878418, |
|
"rewards/margins": 2.9406702518463135, |
|
"rewards/rejected": -8.294670104980469, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 6.620574632350992e-08, |
|
"logits/chosen": -1.6346582174301147, |
|
"logits/rejected": -1.6037356853485107, |
|
"logps/chosen": -806.7518310546875, |
|
"logps/rejected": -1104.5587158203125, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.127570152282715, |
|
"rewards/margins": 3.4935317039489746, |
|
"rewards/rejected": -8.621101379394531, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 6.375030012199506e-08, |
|
"logits/chosen": -1.675046682357788, |
|
"logits/rejected": -1.6435983180999756, |
|
"logps/chosen": -749.980712890625, |
|
"logps/rejected": -1111.273681640625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.958588123321533, |
|
"rewards/margins": 3.4616618156433105, |
|
"rewards/rejected": -8.420249938964844, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 6.133458284293738e-08, |
|
"logits/chosen": -1.7064793109893799, |
|
"logits/rejected": -1.6347249746322632, |
|
"logps/chosen": -757.0708618164062, |
|
"logps/rejected": -993.1224365234375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.93691873550415, |
|
"rewards/margins": 2.8626270294189453, |
|
"rewards/rejected": -7.799546241760254, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_logits/chosen": -1.6731221675872803, |
|
"eval_logits/rejected": -1.6581306457519531, |
|
"eval_logps/chosen": -830.1170654296875, |
|
"eval_logps/rejected": -1008.2512817382812, |
|
"eval_loss": 0.011325540952384472, |
|
"eval_rewards/accuracies": 0.72265625, |
|
"eval_rewards/chosen": -5.73077392578125, |
|
"eval_rewards/margins": 1.7782068252563477, |
|
"eval_rewards/rejected": -7.5089802742004395, |
|
"eval_runtime": 53.5068, |
|
"eval_samples_per_second": 37.378, |
|
"eval_steps_per_second": 0.598, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 5.8959109782976495e-08, |
|
"logits/chosen": -1.732601523399353, |
|
"logits/rejected": -1.668806791305542, |
|
"logps/chosen": -756.4334716796875, |
|
"logps/rejected": -1063.438720703125, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.985089302062988, |
|
"rewards/margins": 3.1089515686035156, |
|
"rewards/rejected": -8.09404182434082, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 5.6624387654257644e-08, |
|
"logits/chosen": -1.7215986251831055, |
|
"logits/rejected": -1.6772810220718384, |
|
"logps/chosen": -789.0236206054688, |
|
"logps/rejected": -1100.3035888671875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.106588840484619, |
|
"rewards/margins": 3.2181344032287598, |
|
"rewards/rejected": -8.324724197387695, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 5.433091447634536e-08, |
|
"logits/chosen": -1.6453773975372314, |
|
"logits/rejected": -1.6506948471069336, |
|
"logps/chosen": -756.458251953125, |
|
"logps/rejected": -1086.7874755859375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.978815078735352, |
|
"rewards/margins": 3.332310199737549, |
|
"rewards/rejected": -8.311124801635742, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 5.207917946999058e-08, |
|
"logits/chosen": -1.6846460103988647, |
|
"logits/rejected": -1.6364688873291016, |
|
"logps/chosen": -727.8033447265625, |
|
"logps/rejected": -1009.51953125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.658383369445801, |
|
"rewards/margins": 3.1433708667755127, |
|
"rewards/rejected": -7.801754951477051, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.9869662952775224e-08, |
|
"logits/chosen": -1.6443278789520264, |
|
"logits/rejected": -1.6249277591705322, |
|
"logps/chosen": -843.3905029296875, |
|
"logps/rejected": -1076.40771484375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -5.5775251388549805, |
|
"rewards/margins": 2.608987331390381, |
|
"rewards/rejected": -8.18651294708252, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 4.7702836236655276e-08, |
|
"logits/chosen": -1.700831413269043, |
|
"logits/rejected": -1.674584150314331, |
|
"logps/chosen": -755.4937133789062, |
|
"logps/rejected": -1112.8975830078125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.924407005310059, |
|
"rewards/margins": 3.571908950805664, |
|
"rewards/rejected": -8.496315956115723, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 4.5579161527425747e-08, |
|
"logits/chosen": -1.6642249822616577, |
|
"logits/rejected": -1.6223475933074951, |
|
"logps/chosen": -798.3468627929688, |
|
"logps/rejected": -1079.736328125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -5.28649377822876, |
|
"rewards/margins": 3.088961124420166, |
|
"rewards/rejected": -8.375455856323242, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 4.349909182612724e-08, |
|
"logits/chosen": -1.6570409536361694, |
|
"logits/rejected": -1.6466248035430908, |
|
"logps/chosen": -800.0408325195312, |
|
"logps/rejected": -1084.8062744140625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -5.0194244384765625, |
|
"rewards/margins": 3.2717928886413574, |
|
"rewards/rejected": -8.291218757629395, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 4.146307083241615e-08, |
|
"logits/chosen": -1.7375593185424805, |
|
"logits/rejected": -1.695077896118164, |
|
"logps/chosen": -764.1077880859375, |
|
"logps/rejected": -1082.352294921875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -5.059436321258545, |
|
"rewards/margins": 3.311573028564453, |
|
"rewards/rejected": -8.37100887298584, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.947153284991961e-08, |
|
"logits/chosen": -1.7560580968856812, |
|
"logits/rejected": -1.696624994277954, |
|
"logps/chosen": -757.1976318359375, |
|
"logps/rejected": -1076.9356689453125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.949436187744141, |
|
"rewards/margins": 3.531515598297119, |
|
"rewards/rejected": -8.480951309204102, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_logits/chosen": -1.6537894010543823, |
|
"eval_logits/rejected": -1.6381454467773438, |
|
"eval_logps/chosen": -845.908935546875, |
|
"eval_logps/rejected": -1026.5013427734375, |
|
"eval_loss": 0.010896253399550915, |
|
"eval_rewards/accuracies": 0.7265625, |
|
"eval_rewards/chosen": -5.888693332672119, |
|
"eval_rewards/margins": 1.8027865886688232, |
|
"eval_rewards/rejected": -7.6914801597595215, |
|
"eval_runtime": 53.5294, |
|
"eval_samples_per_second": 37.363, |
|
"eval_steps_per_second": 0.598, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 3.7524902693593566e-08, |
|
"logits/chosen": -1.6262800693511963, |
|
"logits/rejected": -1.570649266242981, |
|
"logps/chosen": -791.1529541015625, |
|
"logps/rejected": -1135.0606689453125, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.183774471282959, |
|
"rewards/margins": 3.6536669731140137, |
|
"rewards/rejected": -8.837442398071289, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 3.562359559910605e-08, |
|
"logits/chosen": -1.6874490976333618, |
|
"logits/rejected": -1.6482093334197998, |
|
"logps/chosen": -787.5520629882812, |
|
"logps/rejected": -1107.3006591796875, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.030928134918213, |
|
"rewards/margins": 3.5772910118103027, |
|
"rewards/rejected": -8.608219146728516, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 3.3768017134262945e-08, |
|
"logits/chosen": -1.7088485956192017, |
|
"logits/rejected": -1.6713438034057617, |
|
"logps/chosen": -747.4099731445312, |
|
"logps/rejected": -1096.4776611328125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -5.017291069030762, |
|
"rewards/margins": 3.4673256874084473, |
|
"rewards/rejected": -8.484617233276367, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 3.195856311249642e-08, |
|
"logits/chosen": -1.6516939401626587, |
|
"logits/rejected": -1.635688066482544, |
|
"logps/chosen": -776.6866455078125, |
|
"logps/rejected": -1136.5660400390625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.071151256561279, |
|
"rewards/margins": 3.6160826683044434, |
|
"rewards/rejected": -8.687234878540039, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 3.019561950843383e-08, |
|
"logits/chosen": -1.5578938722610474, |
|
"logits/rejected": -1.522306203842163, |
|
"logps/chosen": -787.5445556640625, |
|
"logps/rejected": -1147.2486572265625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.394728660583496, |
|
"rewards/margins": 3.5870540142059326, |
|
"rewards/rejected": -8.981782913208008, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.8479562375565674e-08, |
|
"logits/chosen": -1.6432325839996338, |
|
"logits/rejected": -1.6142183542251587, |
|
"logps/chosen": -776.4083251953125, |
|
"logps/rejected": -1124.1138916015625, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -5.133936405181885, |
|
"rewards/margins": 3.458998203277588, |
|
"rewards/rejected": -8.592934608459473, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.6810757766029644e-08, |
|
"logits/chosen": -1.6703994274139404, |
|
"logits/rejected": -1.6068084239959717, |
|
"logps/chosen": -816.8751220703125, |
|
"logps/rejected": -1083.3255615234375, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -5.362771034240723, |
|
"rewards/margins": 3.151947259902954, |
|
"rewards/rejected": -8.514719009399414, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.5189561652527668e-08, |
|
"logits/chosen": -1.622635841369629, |
|
"logits/rejected": -1.6020011901855469, |
|
"logps/chosen": -796.3896484375, |
|
"logps/rejected": -1119.7279052734375, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -5.1902289390563965, |
|
"rewards/margins": 3.441279172897339, |
|
"rewards/rejected": -8.631508827209473, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 2.3616319852393928e-08, |
|
"logits/chosen": -1.7253414392471313, |
|
"logits/rejected": -1.6721127033233643, |
|
"logps/chosen": -831.2769775390625, |
|
"logps/rejected": -1158.5269775390625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -5.273201942443848, |
|
"rewards/margins": 3.51367449760437, |
|
"rewards/rejected": -8.78687572479248, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 2.2091367953828043e-08, |
|
"logits/chosen": -1.6632686853408813, |
|
"logits/rejected": -1.568414330482483, |
|
"logps/chosen": -835.1687622070312, |
|
"logps/rejected": -1123.8179931640625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -5.474801063537598, |
|
"rewards/margins": 3.423898220062256, |
|
"rewards/rejected": -8.898698806762695, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_logits/chosen": -1.650109052658081, |
|
"eval_logits/rejected": -1.634474754333496, |
|
"eval_logps/chosen": -847.995849609375, |
|
"eval_logps/rejected": -1029.7469482421875, |
|
"eval_loss": 0.010857866145670414, |
|
"eval_rewards/accuracies": 0.7265625, |
|
"eval_rewards/chosen": -5.909562110900879, |
|
"eval_rewards/margins": 1.8143751621246338, |
|
"eval_rewards/rejected": -7.723937034606934, |
|
"eval_runtime": 53.531, |
|
"eval_samples_per_second": 37.362, |
|
"eval_steps_per_second": 0.598, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 2.061503124431127e-08, |
|
"logits/chosen": -1.6174163818359375, |
|
"logits/rejected": -1.602459192276001, |
|
"logps/chosen": -727.8194580078125, |
|
"logps/rejected": -1064.78759765625, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.729340076446533, |
|
"rewards/margins": 3.583315372467041, |
|
"rewards/rejected": -8.312655448913574, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.918762464121887e-08, |
|
"logits/chosen": -1.6917333602905273, |
|
"logits/rejected": -1.6598358154296875, |
|
"logps/chosen": -729.9446411132812, |
|
"logps/rejected": -1037.272216796875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.782055377960205, |
|
"rewards/margins": 3.1830573081970215, |
|
"rewards/rejected": -7.965112209320068, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.7809452624645588e-08, |
|
"logits/chosen": -1.675626516342163, |
|
"logits/rejected": -1.6368210315704346, |
|
"logps/chosen": -813.89794921875, |
|
"logps/rejected": -1105.9638671875, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -5.377071380615234, |
|
"rewards/margins": 3.124335765838623, |
|
"rewards/rejected": -8.501407623291016, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.6480809172456645e-08, |
|
"logits/chosen": -1.693802833557129, |
|
"logits/rejected": -1.6782232522964478, |
|
"logps/chosen": -753.5497436523438, |
|
"logps/rejected": -1048.1123046875, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -5.177882194519043, |
|
"rewards/margins": 3.0096545219421387, |
|
"rewards/rejected": -8.187536239624023, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.5201977697579528e-08, |
|
"logits/chosen": -1.7007213830947876, |
|
"logits/rejected": -1.6627371311187744, |
|
"logps/chosen": -787.9459838867188, |
|
"logps/rejected": -1116.4107666015625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.116583347320557, |
|
"rewards/margins": 3.5010743141174316, |
|
"rewards/rejected": -8.617658615112305, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 1.3973230987549128e-08, |
|
"logits/chosen": -1.6622626781463623, |
|
"logits/rejected": -1.5994372367858887, |
|
"logps/chosen": -784.28955078125, |
|
"logps/rejected": -1101.155517578125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.170934677124023, |
|
"rewards/margins": 3.459881544113159, |
|
"rewards/rejected": -8.630815505981445, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.2794831146319463e-08, |
|
"logits/chosen": -1.64266836643219, |
|
"logits/rejected": -1.5961706638336182, |
|
"logps/chosen": -763.9287109375, |
|
"logps/rejected": -1089.2882080078125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.95175838470459, |
|
"rewards/margins": 3.4329380989074707, |
|
"rewards/rejected": -8.384696006774902, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 1.1667029538354106e-08, |
|
"logits/chosen": -1.6163564920425415, |
|
"logits/rejected": -1.5590986013412476, |
|
"logps/chosen": -793.8729858398438, |
|
"logps/rejected": -1148.1383056640625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.08200740814209, |
|
"rewards/margins": 3.4866957664489746, |
|
"rewards/rejected": -8.568702697753906, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 1.0590066735007869e-08, |
|
"logits/chosen": -1.7199102640151978, |
|
"logits/rejected": -1.7026830911636353, |
|
"logps/chosen": -778.306640625, |
|
"logps/rejected": -1062.194580078125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.963874816894531, |
|
"rewards/margins": 2.975109338760376, |
|
"rewards/rejected": -7.938983917236328, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 9.564172463210452e-09, |
|
"logits/chosen": -1.6167020797729492, |
|
"logits/rejected": -1.6062465906143188, |
|
"logps/chosen": -788.8961791992188, |
|
"logps/rejected": -1116.4625244140625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.223470211029053, |
|
"rewards/margins": 3.4364757537841797, |
|
"rewards/rejected": -8.659947395324707, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_logits/chosen": -1.6515895128250122, |
|
"eval_logits/rejected": -1.635999321937561, |
|
"eval_logps/chosen": -846.569091796875, |
|
"eval_logps/rejected": -1028.406494140625, |
|
"eval_loss": 0.010899505577981472, |
|
"eval_rewards/accuracies": 0.7265625, |
|
"eval_rewards/chosen": -5.895295143127441, |
|
"eval_rewards/margins": 1.8152374029159546, |
|
"eval_rewards/rejected": -7.710532188415527, |
|
"eval_runtime": 53.5423, |
|
"eval_samples_per_second": 37.354, |
|
"eval_steps_per_second": 0.598, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 8.589565556463312e-09, |
|
"logits/chosen": -1.6802539825439453, |
|
"logits/rejected": -1.611122488975525, |
|
"logps/chosen": -811.3452758789062, |
|
"logps/rejected": -1190.2470703125, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.255425453186035, |
|
"rewards/margins": 4.032586574554443, |
|
"rewards/rejected": -9.288012504577637, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 7.66645390816037e-09, |
|
"logits/chosen": -1.6802237033843994, |
|
"logits/rejected": -1.6471099853515625, |
|
"logps/chosen": -787.4656982421875, |
|
"logps/rejected": -1110.0933837890625, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -5.306075572967529, |
|
"rewards/margins": 3.329667329788208, |
|
"rewards/rejected": -8.635743141174316, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 6.795034427242219e-09, |
|
"logits/chosen": -1.6536931991577148, |
|
"logits/rejected": -1.6010814905166626, |
|
"logps/chosen": -808.8085327148438, |
|
"logps/rejected": -1184.5263671875, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -5.284842491149902, |
|
"rewards/margins": 4.067378044128418, |
|
"rewards/rejected": -9.35222053527832, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 5.975492996193598e-09, |
|
"logits/chosen": -1.682647943496704, |
|
"logits/rejected": -1.6313056945800781, |
|
"logps/chosen": -763.5498657226562, |
|
"logps/rejected": -1127.759033203125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.927932262420654, |
|
"rewards/margins": 3.714057207107544, |
|
"rewards/rejected": -8.641988754272461, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 5.208004431392632e-09, |
|
"logits/chosen": -1.6226962804794312, |
|
"logits/rejected": -1.6078941822052002, |
|
"logps/chosen": -786.3790283203125, |
|
"logps/rejected": -1102.757568359375, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -5.062457084655762, |
|
"rewards/margins": 3.3659615516662598, |
|
"rewards/rejected": -8.428418159484863, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 4.492732445820852e-09, |
|
"logits/chosen": -1.635556936264038, |
|
"logits/rejected": -1.6252281665802002, |
|
"logps/chosen": -783.4513549804688, |
|
"logps/rejected": -1165.322998046875, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.212643146514893, |
|
"rewards/margins": 3.8157362937927246, |
|
"rewards/rejected": -9.0283784866333, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 3.829829614141605e-09, |
|
"logits/chosen": -1.6864633560180664, |
|
"logits/rejected": -1.6537306308746338, |
|
"logps/chosen": -787.3248901367188, |
|
"logps/rejected": -1157.3614501953125, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -5.272486209869385, |
|
"rewards/margins": 3.663388729095459, |
|
"rewards/rejected": -8.935874938964844, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 3.21943734015423e-09, |
|
"logits/chosen": -1.6243202686309814, |
|
"logits/rejected": -1.5534226894378662, |
|
"logps/chosen": -760.0283203125, |
|
"logps/rejected": -1035.1715087890625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -5.177143096923828, |
|
"rewards/margins": 2.9509847164154053, |
|
"rewards/rejected": -8.12812614440918, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 2.6616858266313437e-09, |
|
"logits/chosen": -1.6577612161636353, |
|
"logits/rejected": -1.6137058734893799, |
|
"logps/chosen": -720.5675659179688, |
|
"logps/rejected": -1045.2166748046875, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.613186359405518, |
|
"rewards/margins": 3.4953677654266357, |
|
"rewards/rejected": -8.108553886413574, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 2.15669404754526e-09, |
|
"logits/chosen": -1.667386770248413, |
|
"logits/rejected": -1.6189031600952148, |
|
"logps/chosen": -764.5189819335938, |
|
"logps/rejected": -1097.7879638671875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -5.066816329956055, |
|
"rewards/margins": 3.255791425704956, |
|
"rewards/rejected": -8.322607040405273, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_logits/chosen": -1.6493228673934937, |
|
"eval_logits/rejected": -1.633424997329712, |
|
"eval_logps/chosen": -848.4521484375, |
|
"eval_logps/rejected": -1030.737060546875, |
|
"eval_loss": 0.010826395824551582, |
|
"eval_rewards/accuracies": 0.7265625, |
|
"eval_rewards/chosen": -5.914124965667725, |
|
"eval_rewards/margins": 1.8197134733200073, |
|
"eval_rewards/rejected": -7.733838081359863, |
|
"eval_runtime": 53.4392, |
|
"eval_samples_per_second": 37.426, |
|
"eval_steps_per_second": 0.599, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.7045697226895916e-09, |
|
"logits/chosen": -1.6693267822265625, |
|
"logits/rejected": -1.634177803993225, |
|
"logps/chosen": -806.8555908203125, |
|
"logps/rejected": -1105.922607421875, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -5.2042694091796875, |
|
"rewards/margins": 3.1718578338623047, |
|
"rewards/rejected": -8.376127243041992, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.3054092947015483e-09, |
|
"logits/chosen": -1.72137451171875, |
|
"logits/rejected": -1.696382761001587, |
|
"logps/chosen": -810.8291625976562, |
|
"logps/rejected": -1051.6142578125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -5.302319526672363, |
|
"rewards/margins": 2.8361294269561768, |
|
"rewards/rejected": -8.138448715209961, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 9.592979084898645e-10, |
|
"logits/chosen": -1.7072471380233765, |
|
"logits/rejected": -1.6153850555419922, |
|
"logps/chosen": -838.7713012695312, |
|
"logps/rejected": -1162.943115234375, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.46695613861084, |
|
"rewards/margins": 3.433185577392578, |
|
"rewards/rejected": -8.900141716003418, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 6.663093930724395e-10, |
|
"logits/chosen": -1.62856924533844, |
|
"logits/rejected": -1.5820225477218628, |
|
"logps/chosen": -734.3726806640625, |
|
"logps/rejected": -1025.76171875, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.715569972991943, |
|
"rewards/margins": 3.294113874435425, |
|
"rewards/rejected": -8.009683609008789, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 4.2650624582785123e-10, |
|
"logits/chosen": -1.7328513860702515, |
|
"logits/rejected": -1.657863974571228, |
|
"logps/chosen": -791.9859008789062, |
|
"logps/rejected": -1091.558349609375, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.1448822021484375, |
|
"rewards/margins": 3.4487979412078857, |
|
"rewards/rejected": -8.593680381774902, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 2.399396191641589e-10, |
|
"logits/chosen": -1.673937439918518, |
|
"logits/rejected": -1.6486890316009521, |
|
"logps/chosen": -785.8735961914062, |
|
"logps/rejected": -1166.8218994140625, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.292018413543701, |
|
"rewards/margins": 3.727411985397339, |
|
"rewards/rejected": -9.019430160522461, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 1.0664930960743724e-10, |
|
"logits/chosen": -1.6634880304336548, |
|
"logits/rejected": -1.5936485528945923, |
|
"logps/chosen": -855.3974609375, |
|
"logps/rejected": -1172.80810546875, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -5.5183305740356445, |
|
"rewards/margins": 3.509974718093872, |
|
"rewards/rejected": -9.028306007385254, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 2.6663749312927364e-11, |
|
"logits/chosen": -1.679797887802124, |
|
"logits/rejected": -1.6278190612792969, |
|
"logps/chosen": -793.5391235351562, |
|
"logps/rejected": -1068.722900390625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -5.161255836486816, |
|
"rewards/margins": 3.2344837188720703, |
|
"rewards/rejected": -8.395739555358887, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.7044368982315063, |
|
"logits/rejected": -1.6767940521240234, |
|
"logps/chosen": -832.3815307617188, |
|
"logps/rejected": -1178.121337890625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.397744655609131, |
|
"rewards/margins": 3.5688297748565674, |
|
"rewards/rejected": -8.966574668884277, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 2390, |
|
"total_flos": 0.0, |
|
"train_loss": 0.035704591817896675, |
|
"train_runtime": 20238.2555, |
|
"train_samples_per_second": 15.104, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2390, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|