|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 2889, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7301038062283737e-09, |
|
"logits/chosen": -2.8762407302856445, |
|
"logits/rejected": -3.173783540725708, |
|
"logps/chosen": -321.2347412109375, |
|
"logps/rejected": -216.31759643554688, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.7301038062283738e-08, |
|
"logits/chosen": -2.934302568435669, |
|
"logits/rejected": -2.866340398788452, |
|
"logps/chosen": -282.9244384765625, |
|
"logps/rejected": -240.00062561035156, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/chosen": -0.00089820078574121, |
|
"rewards/margins": 0.005787495523691177, |
|
"rewards/rejected": -0.006685695610940456, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.4602076124567476e-08, |
|
"logits/chosen": -3.0233330726623535, |
|
"logits/rejected": -2.6111900806427, |
|
"logps/chosen": -242.19534301757812, |
|
"logps/rejected": -218.64111328125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.004412095528095961, |
|
"rewards/margins": 0.008006840944290161, |
|
"rewards/rejected": -0.003594745649024844, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.190311418685121e-08, |
|
"logits/chosen": -2.994711399078369, |
|
"logits/rejected": -2.569889545440674, |
|
"logps/chosen": -289.1451110839844, |
|
"logps/rejected": -205.26162719726562, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.01539570651948452, |
|
"rewards/margins": 0.015161572024226189, |
|
"rewards/rejected": 0.00023413417511619627, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.920415224913495e-08, |
|
"logits/chosen": -2.974442958831787, |
|
"logits/rejected": -2.7724432945251465, |
|
"logps/chosen": -243.93896484375, |
|
"logps/rejected": -226.4752655029297, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0148529764264822, |
|
"rewards/margins": 0.015147365629673004, |
|
"rewards/rejected": -0.0002943886793218553, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.650519031141867e-08, |
|
"logits/chosen": -2.989952802658081, |
|
"logits/rejected": -2.679442882537842, |
|
"logps/chosen": -256.74560546875, |
|
"logps/rejected": -226.18637084960938, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.023135431110858917, |
|
"rewards/margins": 0.03898553177714348, |
|
"rewards/rejected": -0.01585010625422001, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0380622837370242e-07, |
|
"logits/chosen": -3.1331353187561035, |
|
"logits/rejected": -2.938162326812744, |
|
"logps/chosen": -353.51629638671875, |
|
"logps/rejected": -251.57357788085938, |
|
"loss": 0.6388, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.09346623718738556, |
|
"rewards/margins": 0.13534647226333618, |
|
"rewards/rejected": -0.041880227625370026, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.2110726643598616e-07, |
|
"logits/chosen": -3.092569351196289, |
|
"logits/rejected": -2.9506618976593018, |
|
"logps/chosen": -269.38006591796875, |
|
"logps/rejected": -253.18295288085938, |
|
"loss": 0.6073, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.12881746888160706, |
|
"rewards/margins": 0.19464503228664398, |
|
"rewards/rejected": -0.06582757830619812, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.384083044982699e-07, |
|
"logits/chosen": -3.1748270988464355, |
|
"logits/rejected": -2.881019115447998, |
|
"logps/chosen": -318.3662109375, |
|
"logps/rejected": -266.37249755859375, |
|
"loss": 0.5724, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.20069709420204163, |
|
"rewards/margins": 0.47188153862953186, |
|
"rewards/rejected": -0.2711844742298126, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5570934256055362e-07, |
|
"logits/chosen": -2.9505438804626465, |
|
"logits/rejected": -2.949632167816162, |
|
"logps/chosen": -280.816650390625, |
|
"logps/rejected": -239.5382843017578, |
|
"loss": 0.5404, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.19082725048065186, |
|
"rewards/margins": 0.41627758741378784, |
|
"rewards/rejected": -0.22545035183429718, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7301038062283734e-07, |
|
"logits/chosen": -2.823728084564209, |
|
"logits/rejected": -3.019169569015503, |
|
"logps/chosen": -284.28936767578125, |
|
"logps/rejected": -280.98675537109375, |
|
"loss": 0.5051, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.16983866691589355, |
|
"rewards/margins": 0.6204549670219421, |
|
"rewards/rejected": -0.45061635971069336, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -3.043107271194458, |
|
"eval_logits/rejected": -2.7438313961029053, |
|
"eval_logps/chosen": -297.5411682128906, |
|
"eval_logps/rejected": -246.62863159179688, |
|
"eval_loss": 0.5180009603500366, |
|
"eval_rewards/accuracies": 0.7182539701461792, |
|
"eval_rewards/chosen": 0.14754721522331238, |
|
"eval_rewards/margins": 0.5429435968399048, |
|
"eval_rewards/rejected": -0.3953963816165924, |
|
"eval_runtime": 130.1164, |
|
"eval_samples_per_second": 15.371, |
|
"eval_steps_per_second": 0.484, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.903114186851211e-07, |
|
"logits/chosen": -3.098701238632202, |
|
"logits/rejected": -2.781334638595581, |
|
"logps/chosen": -300.07354736328125, |
|
"logps/rejected": -215.4860382080078, |
|
"loss": 0.4786, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.17965184152126312, |
|
"rewards/margins": 0.6233574748039246, |
|
"rewards/rejected": -0.44370561838150024, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0761245674740483e-07, |
|
"logits/chosen": -3.1609914302825928, |
|
"logits/rejected": -2.6632208824157715, |
|
"logps/chosen": -319.6437072753906, |
|
"logps/rejected": -274.22991943359375, |
|
"loss": 0.4765, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.04884842038154602, |
|
"rewards/margins": 0.6324900388717651, |
|
"rewards/rejected": -0.5836416482925415, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2491349480968858e-07, |
|
"logits/chosen": -3.015540599822998, |
|
"logits/rejected": -3.109987497329712, |
|
"logps/chosen": -315.15313720703125, |
|
"logps/rejected": -275.33453369140625, |
|
"loss": 0.4788, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.16350099444389343, |
|
"rewards/margins": 1.0702369213104248, |
|
"rewards/rejected": -0.906735897064209, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.422145328719723e-07, |
|
"logits/chosen": -3.0585391521453857, |
|
"logits/rejected": -2.459653377532959, |
|
"logps/chosen": -303.3382873535156, |
|
"logps/rejected": -253.593017578125, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.09562154114246368, |
|
"rewards/margins": 0.9685525894165039, |
|
"rewards/rejected": -1.0641741752624512, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.59515570934256e-07, |
|
"logits/chosen": -3.066427230834961, |
|
"logits/rejected": -2.8052279949188232, |
|
"logps/chosen": -297.28729248046875, |
|
"logps/rejected": -237.7314910888672, |
|
"loss": 0.4381, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.19543597102165222, |
|
"rewards/margins": 1.5181944370269775, |
|
"rewards/rejected": -1.322758436203003, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.768166089965398e-07, |
|
"logits/chosen": -3.1258957386016846, |
|
"logits/rejected": -2.8570971488952637, |
|
"logps/chosen": -326.80047607421875, |
|
"logps/rejected": -266.1025695800781, |
|
"loss": 0.4518, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.3090304136276245, |
|
"rewards/margins": 1.3269016742706299, |
|
"rewards/rejected": -1.017871379852295, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": -3.1422178745269775, |
|
"logits/rejected": -2.748126268386841, |
|
"logps/chosen": -304.7181091308594, |
|
"logps/rejected": -248.3983612060547, |
|
"loss": 0.4191, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.31797105073928833, |
|
"rewards/margins": 1.5963741540908813, |
|
"rewards/rejected": -1.2784031629562378, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.1141868512110725e-07, |
|
"logits/chosen": -3.1268668174743652, |
|
"logits/rejected": -2.895526885986328, |
|
"logps/chosen": -293.18341064453125, |
|
"logps/rejected": -238.168701171875, |
|
"loss": 0.4801, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.055577002465724945, |
|
"rewards/margins": 1.1816781759262085, |
|
"rewards/rejected": -1.1261012554168701, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.28719723183391e-07, |
|
"logits/chosen": -3.063249111175537, |
|
"logits/rejected": -2.6634368896484375, |
|
"logps/chosen": -279.2200927734375, |
|
"logps/rejected": -234.94046020507812, |
|
"loss": 0.4367, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3853099048137665, |
|
"rewards/margins": 1.4658257961273193, |
|
"rewards/rejected": -1.0805158615112305, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.460207612456747e-07, |
|
"logits/chosen": -3.158604621887207, |
|
"logits/rejected": -2.772695541381836, |
|
"logps/chosen": -297.8023681640625, |
|
"logps/rejected": -254.5688934326172, |
|
"loss": 0.4321, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.2604522109031677, |
|
"rewards/margins": 1.288292646408081, |
|
"rewards/rejected": -1.027840495109558, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -3.0543432235717773, |
|
"eval_logits/rejected": -2.7577624320983887, |
|
"eval_logps/chosen": -297.6632080078125, |
|
"eval_logps/rejected": -252.20359802246094, |
|
"eval_loss": 0.4374702274799347, |
|
"eval_rewards/accuracies": 0.7539682388305664, |
|
"eval_rewards/chosen": 0.13534867763519287, |
|
"eval_rewards/margins": 1.0882411003112793, |
|
"eval_rewards/rejected": -0.9528924822807312, |
|
"eval_runtime": 129.9621, |
|
"eval_samples_per_second": 15.389, |
|
"eval_steps_per_second": 0.485, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.633217993079585e-07, |
|
"logits/chosen": -3.1798148155212402, |
|
"logits/rejected": -2.5849387645721436, |
|
"logps/chosen": -361.23822021484375, |
|
"logps/rejected": -268.04400634765625, |
|
"loss": 0.3972, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.4466512203216553, |
|
"rewards/margins": 1.7230627536773682, |
|
"rewards/rejected": -1.2764114141464233, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.806228373702422e-07, |
|
"logits/chosen": -3.0083553791046143, |
|
"logits/rejected": -2.7477853298187256, |
|
"logps/chosen": -281.7891540527344, |
|
"logps/rejected": -229.61257934570312, |
|
"loss": 0.41, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.09162549674510956, |
|
"rewards/margins": 1.5220754146575928, |
|
"rewards/rejected": -1.6137008666992188, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.979238754325259e-07, |
|
"logits/chosen": -3.013458490371704, |
|
"logits/rejected": -3.002962827682495, |
|
"logps/chosen": -330.75360107421875, |
|
"logps/rejected": -287.80621337890625, |
|
"loss": 0.4135, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.23488721251487732, |
|
"rewards/margins": 1.8438904285430908, |
|
"rewards/rejected": -1.6090030670166016, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1522491349480966e-07, |
|
"logits/chosen": -3.1606154441833496, |
|
"logits/rejected": -2.604207754135132, |
|
"logps/chosen": -273.66851806640625, |
|
"logps/rejected": -227.6764373779297, |
|
"loss": 0.4219, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.22179162502288818, |
|
"rewards/margins": 1.2629636526107788, |
|
"rewards/rejected": -1.484755516052246, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.325259515570934e-07, |
|
"logits/chosen": -3.045926570892334, |
|
"logits/rejected": -2.9288675785064697, |
|
"logps/chosen": -269.4823913574219, |
|
"logps/rejected": -231.5515594482422, |
|
"loss": 0.4175, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1447521448135376, |
|
"rewards/margins": 1.5314770936965942, |
|
"rewards/rejected": -1.6762291193008423, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4982698961937715e-07, |
|
"logits/chosen": -2.953214168548584, |
|
"logits/rejected": -2.881971597671509, |
|
"logps/chosen": -279.1170349121094, |
|
"logps/rejected": -259.5255126953125, |
|
"loss": 0.4477, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6336969137191772, |
|
"rewards/margins": 1.3820281028747559, |
|
"rewards/rejected": -2.0157248973846436, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.671280276816609e-07, |
|
"logits/chosen": -2.955901861190796, |
|
"logits/rejected": -2.9216983318328857, |
|
"logps/chosen": -306.7757263183594, |
|
"logps/rejected": -248.06576538085938, |
|
"loss": 0.4471, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4410484731197357, |
|
"rewards/margins": 1.6437104940414429, |
|
"rewards/rejected": -2.08475923538208, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.844290657439446e-07, |
|
"logits/chosen": -3.0967202186584473, |
|
"logits/rejected": -2.701328992843628, |
|
"logps/chosen": -343.8622741699219, |
|
"logps/rejected": -255.25466918945312, |
|
"loss": 0.4219, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.018992478027939796, |
|
"rewards/margins": 1.7166669368743896, |
|
"rewards/rejected": -1.7356598377227783, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.998076923076923e-07, |
|
"logits/chosen": -3.1110892295837402, |
|
"logits/rejected": -2.551694869995117, |
|
"logps/chosen": -322.7112121582031, |
|
"logps/rejected": -257.21002197265625, |
|
"loss": 0.3876, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.04921429231762886, |
|
"rewards/margins": 1.8386141061782837, |
|
"rewards/rejected": -1.7893998622894287, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.978846153846153e-07, |
|
"logits/chosen": -3.186720371246338, |
|
"logits/rejected": -2.7246062755584717, |
|
"logps/chosen": -291.0187072753906, |
|
"logps/rejected": -258.8619384765625, |
|
"loss": 0.3848, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.12159641087055206, |
|
"rewards/margins": 2.314002275466919, |
|
"rewards/rejected": -2.1924057006835938, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -3.05080509185791, |
|
"eval_logits/rejected": -2.7592227458953857, |
|
"eval_logps/chosen": -303.83013916015625, |
|
"eval_logps/rejected": -261.59564208984375, |
|
"eval_loss": 0.43010157346725464, |
|
"eval_rewards/accuracies": 0.7301587462425232, |
|
"eval_rewards/chosen": -0.4813474714756012, |
|
"eval_rewards/margins": 1.4107486009597778, |
|
"eval_rewards/rejected": -1.8920958042144775, |
|
"eval_runtime": 129.8338, |
|
"eval_samples_per_second": 15.404, |
|
"eval_steps_per_second": 0.485, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.959615384615384e-07, |
|
"logits/chosen": -3.025524616241455, |
|
"logits/rejected": -2.8964145183563232, |
|
"logps/chosen": -284.9305114746094, |
|
"logps/rejected": -268.2242126464844, |
|
"loss": 0.4153, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6965316534042358, |
|
"rewards/margins": 1.7200047969818115, |
|
"rewards/rejected": -2.4165360927581787, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.940384615384615e-07, |
|
"logits/chosen": -3.1299521923065186, |
|
"logits/rejected": -3.0084733963012695, |
|
"logps/chosen": -251.54251098632812, |
|
"logps/rejected": -237.68899536132812, |
|
"loss": 0.3928, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.20220176875591278, |
|
"rewards/margins": 1.855491042137146, |
|
"rewards/rejected": -2.057692766189575, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.921153846153846e-07, |
|
"logits/chosen": -3.0541279315948486, |
|
"logits/rejected": -2.7398934364318848, |
|
"logps/chosen": -311.959228515625, |
|
"logps/rejected": -260.8400573730469, |
|
"loss": 0.4465, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.2934505343437195, |
|
"rewards/margins": 1.8156064748764038, |
|
"rewards/rejected": -2.1090569496154785, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.901923076923077e-07, |
|
"logits/chosen": -3.0184249877929688, |
|
"logits/rejected": -2.7525899410247803, |
|
"logps/chosen": -261.6094970703125, |
|
"logps/rejected": -280.4756774902344, |
|
"loss": 0.3781, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.4810101389884949, |
|
"rewards/margins": 1.5501643419265747, |
|
"rewards/rejected": -2.031174421310425, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.882692307692308e-07, |
|
"logits/chosen": -2.9586663246154785, |
|
"logits/rejected": -2.8382368087768555, |
|
"logps/chosen": -298.5231628417969, |
|
"logps/rejected": -261.07183837890625, |
|
"loss": 0.4605, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.6761707067489624, |
|
"rewards/margins": 1.5082601308822632, |
|
"rewards/rejected": -2.1844308376312256, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.863461538461539e-07, |
|
"logits/chosen": -3.0447680950164795, |
|
"logits/rejected": -2.9561750888824463, |
|
"logps/chosen": -325.26361083984375, |
|
"logps/rejected": -243.9521026611328, |
|
"loss": 0.3982, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15374720096588135, |
|
"rewards/margins": 2.1689236164093018, |
|
"rewards/rejected": -2.3226709365844727, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.844230769230768e-07, |
|
"logits/chosen": -3.171320676803589, |
|
"logits/rejected": -2.795593023300171, |
|
"logps/chosen": -270.4403076171875, |
|
"logps/rejected": -263.0525207519531, |
|
"loss": 0.4046, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.24999003112316132, |
|
"rewards/margins": 1.4972503185272217, |
|
"rewards/rejected": -1.7472403049468994, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.824999999999999e-07, |
|
"logits/chosen": -3.139816999435425, |
|
"logits/rejected": -2.8237149715423584, |
|
"logps/chosen": -276.6899719238281, |
|
"logps/rejected": -280.1494445800781, |
|
"loss": 0.3929, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.36418357491493225, |
|
"rewards/margins": 2.0872561931610107, |
|
"rewards/rejected": -2.45143985748291, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.80576923076923e-07, |
|
"logits/chosen": -3.0150628089904785, |
|
"logits/rejected": -2.856785535812378, |
|
"logps/chosen": -306.0409240722656, |
|
"logps/rejected": -272.7051086425781, |
|
"loss": 0.3961, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2751230001449585, |
|
"rewards/margins": 2.2792677879333496, |
|
"rewards/rejected": -2.5543906688690186, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.786538461538461e-07, |
|
"logits/chosen": -3.14699649810791, |
|
"logits/rejected": -2.9324729442596436, |
|
"logps/chosen": -299.39520263671875, |
|
"logps/rejected": -281.36590576171875, |
|
"loss": 0.3777, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.29352888464927673, |
|
"rewards/margins": 2.5090556144714355, |
|
"rewards/rejected": -2.802584171295166, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -3.047362804412842, |
|
"eval_logits/rejected": -2.747648239135742, |
|
"eval_logps/chosen": -307.6138000488281, |
|
"eval_logps/rejected": -267.9804992675781, |
|
"eval_loss": 0.40908658504486084, |
|
"eval_rewards/accuracies": 0.7698412537574768, |
|
"eval_rewards/chosen": -0.8597152233123779, |
|
"eval_rewards/margins": 1.67086660861969, |
|
"eval_rewards/rejected": -2.5305819511413574, |
|
"eval_runtime": 130.4183, |
|
"eval_samples_per_second": 15.335, |
|
"eval_steps_per_second": 0.483, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.767307692307692e-07, |
|
"logits/chosen": -3.156327724456787, |
|
"logits/rejected": -2.8549814224243164, |
|
"logps/chosen": -341.67694091796875, |
|
"logps/rejected": -262.5184326171875, |
|
"loss": 0.3913, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.92992103099823, |
|
"rewards/margins": 1.826056718826294, |
|
"rewards/rejected": -2.7559778690338135, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.748076923076923e-07, |
|
"logits/chosen": -3.062770366668701, |
|
"logits/rejected": -2.763805389404297, |
|
"logps/chosen": -311.7411193847656, |
|
"logps/rejected": -281.5027770996094, |
|
"loss": 0.3828, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.719129204750061, |
|
"rewards/margins": 2.130154848098755, |
|
"rewards/rejected": -2.8492841720581055, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7288461538461537e-07, |
|
"logits/chosen": -2.8859896659851074, |
|
"logits/rejected": -2.65274715423584, |
|
"logps/chosen": -270.04840087890625, |
|
"logps/rejected": -266.0298156738281, |
|
"loss": 0.3871, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.6378560662269592, |
|
"rewards/margins": 2.142416477203369, |
|
"rewards/rejected": -2.7802722454071045, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.7096153846153846e-07, |
|
"logits/chosen": -2.948652982711792, |
|
"logits/rejected": -2.773026704788208, |
|
"logps/chosen": -315.21234130859375, |
|
"logps/rejected": -303.20916748046875, |
|
"loss": 0.4083, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8244800567626953, |
|
"rewards/margins": 1.7506906986236572, |
|
"rewards/rejected": -2.5751707553863525, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.690384615384615e-07, |
|
"logits/chosen": -2.9108870029449463, |
|
"logits/rejected": -2.7398343086242676, |
|
"logps/chosen": -329.90936279296875, |
|
"logps/rejected": -259.4378967285156, |
|
"loss": 0.3518, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.48125535249710083, |
|
"rewards/margins": 1.989284873008728, |
|
"rewards/rejected": -2.4705405235290527, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.671153846153846e-07, |
|
"logits/chosen": -2.8274548053741455, |
|
"logits/rejected": -2.5006003379821777, |
|
"logps/chosen": -306.48260498046875, |
|
"logps/rejected": -256.0704650878906, |
|
"loss": 0.3857, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.5667982697486877, |
|
"rewards/margins": 1.9854114055633545, |
|
"rewards/rejected": -2.5522096157073975, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6519230769230766e-07, |
|
"logits/chosen": -2.7966976165771484, |
|
"logits/rejected": -2.4974465370178223, |
|
"logps/chosen": -282.8033752441406, |
|
"logps/rejected": -232.50863647460938, |
|
"loss": 0.3525, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0608681440353394, |
|
"rewards/margins": 2.071587085723877, |
|
"rewards/rejected": -3.132455587387085, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6326923076923075e-07, |
|
"logits/chosen": -2.6855077743530273, |
|
"logits/rejected": -2.5066957473754883, |
|
"logps/chosen": -252.94363403320312, |
|
"logps/rejected": -242.0689239501953, |
|
"loss": 0.3789, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.081735372543335, |
|
"rewards/margins": 2.2696421146392822, |
|
"rewards/rejected": -3.3513779640197754, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6134615384615384e-07, |
|
"logits/chosen": -2.599562168121338, |
|
"logits/rejected": -2.308093309402466, |
|
"logps/chosen": -275.1469421386719, |
|
"logps/rejected": -234.97103881835938, |
|
"loss": 0.3978, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.6643257141113281, |
|
"rewards/margins": 2.531264543533325, |
|
"rewards/rejected": -3.1955904960632324, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.5942307692307693e-07, |
|
"logits/chosen": -2.674797296524048, |
|
"logits/rejected": -2.2434916496276855, |
|
"logps/chosen": -343.3860778808594, |
|
"logps/rejected": -267.349365234375, |
|
"loss": 0.3559, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.19713124632835388, |
|
"rewards/margins": 2.5318844318389893, |
|
"rewards/rejected": -2.729015827178955, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.6106228828430176, |
|
"eval_logits/rejected": -2.2959861755371094, |
|
"eval_logps/chosen": -309.4405822753906, |
|
"eval_logps/rejected": -268.6939392089844, |
|
"eval_loss": 0.433157354593277, |
|
"eval_rewards/accuracies": 0.761904776096344, |
|
"eval_rewards/chosen": -1.0423911809921265, |
|
"eval_rewards/margins": 1.5595366954803467, |
|
"eval_rewards/rejected": -2.6019277572631836, |
|
"eval_runtime": 130.5063, |
|
"eval_samples_per_second": 15.325, |
|
"eval_steps_per_second": 0.483, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.575e-07, |
|
"logits/chosen": -2.6816911697387695, |
|
"logits/rejected": -2.3776135444641113, |
|
"logps/chosen": -293.3232421875, |
|
"logps/rejected": -270.6710205078125, |
|
"loss": 0.3898, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.5901311039924622, |
|
"rewards/margins": 3.1991240978240967, |
|
"rewards/rejected": -3.7892556190490723, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.555769230769231e-07, |
|
"logits/chosen": -2.821402072906494, |
|
"logits/rejected": -2.4265995025634766, |
|
"logps/chosen": -340.00909423828125, |
|
"logps/rejected": -276.14141845703125, |
|
"loss": 0.4053, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.2977416515350342, |
|
"rewards/margins": 1.781804084777832, |
|
"rewards/rejected": -3.079545736312866, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.5365384615384614e-07, |
|
"logits/chosen": -2.635298252105713, |
|
"logits/rejected": -2.775653123855591, |
|
"logps/chosen": -278.55889892578125, |
|
"logps/rejected": -281.57659912109375, |
|
"loss": 0.403, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.3091782331466675, |
|
"rewards/margins": 1.6853997707366943, |
|
"rewards/rejected": -2.9945781230926514, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.5173076923076917e-07, |
|
"logits/chosen": -2.858550548553467, |
|
"logits/rejected": -2.5678272247314453, |
|
"logps/chosen": -290.9246826171875, |
|
"logps/rejected": -272.06829833984375, |
|
"loss": 0.3998, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.6937220096588135, |
|
"rewards/margins": 2.3342771530151367, |
|
"rewards/rejected": -3.02799916267395, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.4980769230769226e-07, |
|
"logits/chosen": -2.722504138946533, |
|
"logits/rejected": -2.6339023113250732, |
|
"logps/chosen": -322.2853088378906, |
|
"logps/rejected": -271.86431884765625, |
|
"loss": 0.3568, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.8023670315742493, |
|
"rewards/margins": 2.3775863647460938, |
|
"rewards/rejected": -3.1799533367156982, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4788461538461534e-07, |
|
"logits/chosen": -2.910511016845703, |
|
"logits/rejected": -2.699862003326416, |
|
"logps/chosen": -331.2922668457031, |
|
"logps/rejected": -292.11846923828125, |
|
"loss": 0.4005, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.6938377618789673, |
|
"rewards/margins": 2.184812068939209, |
|
"rewards/rejected": -2.8786497116088867, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4596153846153843e-07, |
|
"logits/chosen": -2.787506580352783, |
|
"logits/rejected": -2.5669870376586914, |
|
"logps/chosen": -309.0074157714844, |
|
"logps/rejected": -302.73089599609375, |
|
"loss": 0.3949, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7182657122612, |
|
"rewards/margins": 2.3996009826660156, |
|
"rewards/rejected": -3.1178669929504395, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.440384615384615e-07, |
|
"logits/chosen": -2.7322592735290527, |
|
"logits/rejected": -2.562199831008911, |
|
"logps/chosen": -312.8968505859375, |
|
"logps/rejected": -307.6165771484375, |
|
"loss": 0.3882, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.5363451838493347, |
|
"rewards/margins": 2.4500136375427246, |
|
"rewards/rejected": -2.986358880996704, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.421153846153846e-07, |
|
"logits/chosen": -2.904258966445923, |
|
"logits/rejected": -2.672128200531006, |
|
"logps/chosen": -315.80322265625, |
|
"logps/rejected": -271.91796875, |
|
"loss": 0.3818, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.42090076208114624, |
|
"rewards/margins": 2.514308214187622, |
|
"rewards/rejected": -2.935209035873413, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.401923076923077e-07, |
|
"logits/chosen": -2.841974973678589, |
|
"logits/rejected": -2.6603472232818604, |
|
"logps/chosen": -285.8795166015625, |
|
"logps/rejected": -252.70083618164062, |
|
"loss": 0.4178, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.4464953541755676, |
|
"rewards/margins": 2.317780017852783, |
|
"rewards/rejected": -2.764275550842285, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_logits/chosen": -2.8508059978485107, |
|
"eval_logits/rejected": -2.5486865043640137, |
|
"eval_logps/chosen": -305.4503173828125, |
|
"eval_logps/rejected": -267.51214599609375, |
|
"eval_loss": 0.3934193253517151, |
|
"eval_rewards/accuracies": 0.7658730149269104, |
|
"eval_rewards/chosen": -0.6433631181716919, |
|
"eval_rewards/margins": 1.84038507938385, |
|
"eval_rewards/rejected": -2.483747959136963, |
|
"eval_runtime": 130.023, |
|
"eval_samples_per_second": 15.382, |
|
"eval_steps_per_second": 0.485, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3826923076923073e-07, |
|
"logits/chosen": -2.816254138946533, |
|
"logits/rejected": -2.6248648166656494, |
|
"logps/chosen": -279.0022888183594, |
|
"logps/rejected": -250.00253295898438, |
|
"loss": 0.3416, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.754069447517395, |
|
"rewards/margins": 2.3438994884490967, |
|
"rewards/rejected": -3.0979690551757812, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.363461538461538e-07, |
|
"logits/chosen": -2.8594422340393066, |
|
"logits/rejected": -2.776729106903076, |
|
"logps/chosen": -272.91192626953125, |
|
"logps/rejected": -247.15420532226562, |
|
"loss": 0.3795, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.9311884045600891, |
|
"rewards/margins": 2.104356050491333, |
|
"rewards/rejected": -3.0355446338653564, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.344230769230769e-07, |
|
"logits/chosen": -2.8708813190460205, |
|
"logits/rejected": -2.5840649604797363, |
|
"logps/chosen": -281.8168029785156, |
|
"logps/rejected": -272.81683349609375, |
|
"loss": 0.4112, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6721941828727722, |
|
"rewards/margins": 2.349375009536743, |
|
"rewards/rejected": -3.021569013595581, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.325e-07, |
|
"logits/chosen": -2.9108986854553223, |
|
"logits/rejected": -2.547652006149292, |
|
"logps/chosen": -326.2723083496094, |
|
"logps/rejected": -333.6148986816406, |
|
"loss": 0.3849, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6807237863540649, |
|
"rewards/margins": 1.8231480121612549, |
|
"rewards/rejected": -2.5038719177246094, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.305769230769231e-07, |
|
"logits/chosen": -2.7768750190734863, |
|
"logits/rejected": -2.7807576656341553, |
|
"logps/chosen": -287.76812744140625, |
|
"logps/rejected": -309.85418701171875, |
|
"loss": 0.3734, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5657386183738708, |
|
"rewards/margins": 2.2681524753570557, |
|
"rewards/rejected": -2.8338911533355713, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.2865384615384616e-07, |
|
"logits/chosen": -2.9382200241088867, |
|
"logits/rejected": -2.714677333831787, |
|
"logps/chosen": -281.4496154785156, |
|
"logps/rejected": -280.92034912109375, |
|
"loss": 0.4074, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7575949430465698, |
|
"rewards/margins": 2.220801830291748, |
|
"rewards/rejected": -2.9783966541290283, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.2673076923076925e-07, |
|
"logits/chosen": -2.9047207832336426, |
|
"logits/rejected": -2.573913097381592, |
|
"logps/chosen": -262.4695739746094, |
|
"logps/rejected": -235.21728515625, |
|
"loss": 0.3538, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9275886416435242, |
|
"rewards/margins": 1.9407182931900024, |
|
"rewards/rejected": -2.8683066368103027, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.2480769230769234e-07, |
|
"logits/chosen": -2.900038242340088, |
|
"logits/rejected": -2.5582680702209473, |
|
"logps/chosen": -276.6561584472656, |
|
"logps/rejected": -269.5938415527344, |
|
"loss": 0.3675, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.9780898094177246, |
|
"rewards/margins": 2.3385136127471924, |
|
"rewards/rejected": -3.316603422164917, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.228846153846153e-07, |
|
"logits/chosen": -2.9137685298919678, |
|
"logits/rejected": -2.5705721378326416, |
|
"logps/chosen": -312.13092041015625, |
|
"logps/rejected": -296.21697998046875, |
|
"loss": 0.41, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.045695424079895, |
|
"rewards/margins": 2.037189245223999, |
|
"rewards/rejected": -3.0828847885131836, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.209615384615384e-07, |
|
"logits/chosen": -2.788215398788452, |
|
"logits/rejected": -2.469395160675049, |
|
"logps/chosen": -270.2633056640625, |
|
"logps/rejected": -278.1541748046875, |
|
"loss": 0.4206, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.6475088596343994, |
|
"rewards/margins": 2.1269774436950684, |
|
"rewards/rejected": -3.7744860649108887, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -2.87266206741333, |
|
"eval_logits/rejected": -2.5678770542144775, |
|
"eval_logps/chosen": -313.7168273925781, |
|
"eval_logps/rejected": -277.7876892089844, |
|
"eval_loss": 0.4058150053024292, |
|
"eval_rewards/accuracies": 0.7857142686843872, |
|
"eval_rewards/chosen": -1.4700164794921875, |
|
"eval_rewards/margins": 2.041287660598755, |
|
"eval_rewards/rejected": -3.5113041400909424, |
|
"eval_runtime": 130.4287, |
|
"eval_samples_per_second": 15.334, |
|
"eval_steps_per_second": 0.483, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.190384615384615e-07, |
|
"logits/chosen": -2.757584810256958, |
|
"logits/rejected": -2.639995574951172, |
|
"logps/chosen": -309.24932861328125, |
|
"logps/rejected": -275.01385498046875, |
|
"loss": 0.3832, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.292989730834961, |
|
"rewards/margins": 2.1771950721740723, |
|
"rewards/rejected": -3.4701850414276123, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.171153846153846e-07, |
|
"logits/chosen": -2.867556571960449, |
|
"logits/rejected": -2.7541584968566895, |
|
"logps/chosen": -289.6522521972656, |
|
"logps/rejected": -270.7252197265625, |
|
"loss": 0.374, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1409839391708374, |
|
"rewards/margins": 2.2342498302459717, |
|
"rewards/rejected": -3.3752338886260986, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.1519230769230767e-07, |
|
"logits/chosen": -2.81394362449646, |
|
"logits/rejected": -2.7436118125915527, |
|
"logps/chosen": -259.9161682128906, |
|
"logps/rejected": -304.9470520019531, |
|
"loss": 0.3842, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.0830919742584229, |
|
"rewards/margins": 2.1413207054138184, |
|
"rewards/rejected": -3.224412441253662, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.1326923076923075e-07, |
|
"logits/chosen": -2.816218852996826, |
|
"logits/rejected": -2.8035120964050293, |
|
"logps/chosen": -329.23004150390625, |
|
"logps/rejected": -238.5620574951172, |
|
"loss": 0.4372, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5688218474388123, |
|
"rewards/margins": 2.4970219135284424, |
|
"rewards/rejected": -3.0658438205718994, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.1134615384615384e-07, |
|
"logits/chosen": -2.889146089553833, |
|
"logits/rejected": -2.6036853790283203, |
|
"logps/chosen": -312.89276123046875, |
|
"logps/rejected": -251.8223114013672, |
|
"loss": 0.3276, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1251590251922607, |
|
"rewards/margins": 2.237091302871704, |
|
"rewards/rejected": -3.3622500896453857, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.0942307692307693e-07, |
|
"logits/chosen": -2.934772491455078, |
|
"logits/rejected": -2.5234997272491455, |
|
"logps/chosen": -303.24603271484375, |
|
"logps/rejected": -275.3195495605469, |
|
"loss": 0.3925, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4970028400421143, |
|
"rewards/margins": 2.127586603164673, |
|
"rewards/rejected": -3.624589443206787, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.0749999999999996e-07, |
|
"logits/chosen": -3.005864143371582, |
|
"logits/rejected": -2.7517616748809814, |
|
"logps/chosen": -339.569091796875, |
|
"logps/rejected": -289.150146484375, |
|
"loss": 0.3449, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.193687081336975, |
|
"rewards/margins": 2.4689764976501465, |
|
"rewards/rejected": -3.662663221359253, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0557692307692305e-07, |
|
"logits/chosen": -2.9409213066101074, |
|
"logits/rejected": -2.467923402786255, |
|
"logps/chosen": -321.47149658203125, |
|
"logps/rejected": -257.263427734375, |
|
"loss": 0.3687, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.8053227663040161, |
|
"rewards/margins": 2.886740207672119, |
|
"rewards/rejected": -3.692063093185425, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0365384615384614e-07, |
|
"logits/chosen": -3.0648276805877686, |
|
"logits/rejected": -2.4790384769439697, |
|
"logps/chosen": -272.0107116699219, |
|
"logps/rejected": -259.4451904296875, |
|
"loss": 0.4511, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2784092426300049, |
|
"rewards/margins": 2.352234363555908, |
|
"rewards/rejected": -3.630643367767334, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.017307692307692e-07, |
|
"logits/chosen": -3.0128531455993652, |
|
"logits/rejected": -2.857410192489624, |
|
"logps/chosen": -282.149169921875, |
|
"logps/rejected": -283.2014465332031, |
|
"loss": 0.4323, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9385299682617188, |
|
"rewards/margins": 1.8846447467803955, |
|
"rewards/rejected": -2.823174476623535, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_logits/chosen": -2.9202206134796143, |
|
"eval_logits/rejected": -2.6212680339813232, |
|
"eval_logps/chosen": -308.0413818359375, |
|
"eval_logps/rejected": -269.6094970703125, |
|
"eval_loss": 0.3928997218608856, |
|
"eval_rewards/accuracies": 0.7896825671195984, |
|
"eval_rewards/chosen": -0.902471661567688, |
|
"eval_rewards/margins": 1.7910076379776, |
|
"eval_rewards/rejected": -2.693479537963867, |
|
"eval_runtime": 130.5976, |
|
"eval_samples_per_second": 15.314, |
|
"eval_steps_per_second": 0.482, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.998076923076923e-07, |
|
"logits/chosen": -2.789571762084961, |
|
"logits/rejected": -2.89715838432312, |
|
"logps/chosen": -284.51275634765625, |
|
"logps/rejected": -245.26925659179688, |
|
"loss": 0.4012, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.8691709637641907, |
|
"rewards/margins": 1.9500411748886108, |
|
"rewards/rejected": -2.8192124366760254, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.978846153846154e-07, |
|
"logits/chosen": -2.89267635345459, |
|
"logits/rejected": -2.7063088417053223, |
|
"logps/chosen": -335.8846740722656, |
|
"logps/rejected": -306.52471923828125, |
|
"loss": 0.3602, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0393882989883423, |
|
"rewards/margins": 1.985988974571228, |
|
"rewards/rejected": -3.0253777503967285, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.9596153846153843e-07, |
|
"logits/chosen": -2.7478280067443848, |
|
"logits/rejected": -2.5870022773742676, |
|
"logps/chosen": -260.309326171875, |
|
"logps/rejected": -247.5229034423828, |
|
"loss": 0.3506, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.8650934100151062, |
|
"rewards/margins": 2.7803313732147217, |
|
"rewards/rejected": -3.6454246044158936, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.940384615384615e-07, |
|
"logits/chosen": -2.7750461101531982, |
|
"logits/rejected": -2.84771990776062, |
|
"logps/chosen": -300.90875244140625, |
|
"logps/rejected": -278.62127685546875, |
|
"loss": 0.3679, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6639699339866638, |
|
"rewards/margins": 2.6008620262145996, |
|
"rewards/rejected": -3.264832019805908, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9211538461538455e-07, |
|
"logits/chosen": -3.0167763233184814, |
|
"logits/rejected": -2.3664677143096924, |
|
"logps/chosen": -319.5389709472656, |
|
"logps/rejected": -274.7938537597656, |
|
"loss": 0.3992, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.3606072664260864, |
|
"rewards/margins": 2.127211093902588, |
|
"rewards/rejected": -3.4878182411193848, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.9019230769230764e-07, |
|
"logits/chosen": -2.848731279373169, |
|
"logits/rejected": -2.658998489379883, |
|
"logps/chosen": -264.0252380371094, |
|
"logps/rejected": -246.7176971435547, |
|
"loss": 0.4053, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7886860370635986, |
|
"rewards/margins": 2.0003931522369385, |
|
"rewards/rejected": -3.789078950881958, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8826923076923073e-07, |
|
"logits/chosen": -2.9210238456726074, |
|
"logits/rejected": -2.798243284225464, |
|
"logps/chosen": -290.9345703125, |
|
"logps/rejected": -271.5350646972656, |
|
"loss": 0.3502, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.4627825021743774, |
|
"rewards/margins": 1.9910736083984375, |
|
"rewards/rejected": -3.4538562297821045, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.863461538461538e-07, |
|
"logits/chosen": -2.922494411468506, |
|
"logits/rejected": -2.602292776107788, |
|
"logps/chosen": -267.75848388671875, |
|
"logps/rejected": -272.4598693847656, |
|
"loss": 0.4225, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7402254343032837, |
|
"rewards/margins": 2.069711208343506, |
|
"rewards/rejected": -3.809937000274658, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.844230769230769e-07, |
|
"logits/chosen": -2.877516269683838, |
|
"logits/rejected": -2.51607084274292, |
|
"logps/chosen": -338.27886962890625, |
|
"logps/rejected": -259.23712158203125, |
|
"loss": 0.3949, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.178595781326294, |
|
"rewards/margins": 2.4852442741394043, |
|
"rewards/rejected": -3.663839817047119, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.825e-07, |
|
"logits/chosen": -2.664858102798462, |
|
"logits/rejected": -2.485161542892456, |
|
"logps/chosen": -310.84686279296875, |
|
"logps/rejected": -259.01397705078125, |
|
"loss": 0.3706, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.098044514656067, |
|
"rewards/margins": 2.7692551612854004, |
|
"rewards/rejected": -3.8672993183135986, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_logits/chosen": -2.8415608406066895, |
|
"eval_logits/rejected": -2.542830467224121, |
|
"eval_logps/chosen": -310.1387939453125, |
|
"eval_logps/rejected": -272.931640625, |
|
"eval_loss": 0.3902662396430969, |
|
"eval_rewards/accuracies": 0.8055555820465088, |
|
"eval_rewards/chosen": -1.1122112274169922, |
|
"eval_rewards/margins": 1.9134843349456787, |
|
"eval_rewards/rejected": -3.025695562362671, |
|
"eval_runtime": 130.5238, |
|
"eval_samples_per_second": 15.323, |
|
"eval_steps_per_second": 0.483, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.805769230769231e-07, |
|
"logits/chosen": -2.7821085453033447, |
|
"logits/rejected": -2.519225597381592, |
|
"logps/chosen": -279.53466796875, |
|
"logps/rejected": -229.2816619873047, |
|
"loss": 0.3435, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7595919370651245, |
|
"rewards/margins": 2.5894370079040527, |
|
"rewards/rejected": -3.3490288257598877, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.7865384615384617e-07, |
|
"logits/chosen": -3.0091214179992676, |
|
"logits/rejected": -2.694118022918701, |
|
"logps/chosen": -327.59515380859375, |
|
"logps/rejected": -311.08660888671875, |
|
"loss": 0.3721, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9237958788871765, |
|
"rewards/margins": 2.2765049934387207, |
|
"rewards/rejected": -3.200300693511963, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7673076923076925e-07, |
|
"logits/chosen": -2.812438488006592, |
|
"logits/rejected": -2.4150969982147217, |
|
"logps/chosen": -339.35986328125, |
|
"logps/rejected": -292.71905517578125, |
|
"loss": 0.3689, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5981228947639465, |
|
"rewards/margins": 3.090789318084717, |
|
"rewards/rejected": -3.6889121532440186, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.748076923076923e-07, |
|
"logits/chosen": -2.8114781379699707, |
|
"logits/rejected": -2.5097975730895996, |
|
"logps/chosen": -309.5245666503906, |
|
"logps/rejected": -285.4954833984375, |
|
"loss": 0.3759, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9724052548408508, |
|
"rewards/margins": 2.3283073902130127, |
|
"rewards/rejected": -3.3007121086120605, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.728846153846154e-07, |
|
"logits/chosen": -2.870340585708618, |
|
"logits/rejected": -2.547656536102295, |
|
"logps/chosen": -358.37042236328125, |
|
"logps/rejected": -298.05047607421875, |
|
"loss": 0.3706, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.174553394317627, |
|
"rewards/margins": 2.58784818649292, |
|
"rewards/rejected": -3.762402057647705, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7096153846153846e-07, |
|
"logits/chosen": -2.8550620079040527, |
|
"logits/rejected": -2.5932936668395996, |
|
"logps/chosen": -332.6602478027344, |
|
"logps/rejected": -313.8180236816406, |
|
"loss": 0.3257, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.8856698274612427, |
|
"rewards/margins": 2.5121753215789795, |
|
"rewards/rejected": -3.3978450298309326, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.690384615384615e-07, |
|
"logits/chosen": -2.858963966369629, |
|
"logits/rejected": -2.5064713954925537, |
|
"logps/chosen": -332.31402587890625, |
|
"logps/rejected": -266.08477783203125, |
|
"loss": 0.1583, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.8705652952194214, |
|
"rewards/margins": 4.396849632263184, |
|
"rewards/rejected": -5.2674150466918945, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.671153846153846e-07, |
|
"logits/chosen": -2.7196831703186035, |
|
"logits/rejected": -2.562572717666626, |
|
"logps/chosen": -274.79473876953125, |
|
"logps/rejected": -267.02587890625, |
|
"loss": 0.0551, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3285406231880188, |
|
"rewards/margins": 5.019989013671875, |
|
"rewards/rejected": -5.348529815673828, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6519230769230767e-07, |
|
"logits/chosen": -2.827141046524048, |
|
"logits/rejected": -2.4708828926086426, |
|
"logps/chosen": -259.7366027832031, |
|
"logps/rejected": -272.045654296875, |
|
"loss": 0.0773, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.058406610041856766, |
|
"rewards/margins": 5.3060407638549805, |
|
"rewards/rejected": -5.247633934020996, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6326923076923076e-07, |
|
"logits/chosen": -2.8863983154296875, |
|
"logits/rejected": -2.680694341659546, |
|
"logps/chosen": -307.0455322265625, |
|
"logps/rejected": -273.3485107421875, |
|
"loss": 0.0496, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03276168182492256, |
|
"rewards/margins": 6.467474937438965, |
|
"rewards/rejected": -6.500236511230469, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_logits/chosen": -2.8150267601013184, |
|
"eval_logits/rejected": -2.509260654449463, |
|
"eval_logps/chosen": -313.2651062011719, |
|
"eval_logps/rejected": -283.91961669921875, |
|
"eval_loss": 0.3991049826145172, |
|
"eval_rewards/accuracies": 0.8015872836112976, |
|
"eval_rewards/chosen": -1.4248411655426025, |
|
"eval_rewards/margins": 2.699652671813965, |
|
"eval_rewards/rejected": -4.1244940757751465, |
|
"eval_runtime": 137.8475, |
|
"eval_samples_per_second": 14.509, |
|
"eval_steps_per_second": 0.457, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.6134615384615384e-07, |
|
"logits/chosen": -2.7880311012268066, |
|
"logits/rejected": -2.579087018966675, |
|
"logps/chosen": -271.03375244140625, |
|
"logps/rejected": -290.6522216796875, |
|
"loss": 0.0675, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.3281337320804596, |
|
"rewards/margins": 6.674640655517578, |
|
"rewards/rejected": -6.3465070724487305, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.594230769230769e-07, |
|
"logits/chosen": -2.804231882095337, |
|
"logits/rejected": -2.3508119583129883, |
|
"logps/chosen": -286.9497375488281, |
|
"logps/rejected": -285.6386413574219, |
|
"loss": 0.0677, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.19059844315052032, |
|
"rewards/margins": 5.62130880355835, |
|
"rewards/rejected": -5.430710315704346, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.5749999999999997e-07, |
|
"logits/chosen": -2.8610713481903076, |
|
"logits/rejected": -2.537271022796631, |
|
"logps/chosen": -323.7139587402344, |
|
"logps/rejected": -295.88519287109375, |
|
"loss": 0.0585, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10528359562158585, |
|
"rewards/margins": 6.13063907623291, |
|
"rewards/rejected": -6.025355339050293, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.5557692307692305e-07, |
|
"logits/chosen": -2.777980327606201, |
|
"logits/rejected": -2.4435582160949707, |
|
"logps/chosen": -268.0248107910156, |
|
"logps/rejected": -285.71185302734375, |
|
"loss": 0.0522, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2603774666786194, |
|
"rewards/margins": 6.728521823883057, |
|
"rewards/rejected": -6.988899230957031, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5365384615384614e-07, |
|
"logits/chosen": -2.6917247772216797, |
|
"logits/rejected": -2.6877942085266113, |
|
"logps/chosen": -268.5338439941406, |
|
"logps/rejected": -299.14337158203125, |
|
"loss": 0.0544, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.6682311296463013, |
|
"rewards/margins": 4.996738433837891, |
|
"rewards/rejected": -5.664969444274902, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.5173076923076923e-07, |
|
"logits/chosen": -2.8935842514038086, |
|
"logits/rejected": -2.620191812515259, |
|
"logps/chosen": -360.71923828125, |
|
"logps/rejected": -382.8384704589844, |
|
"loss": 0.053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1445586234331131, |
|
"rewards/margins": 6.801445007324219, |
|
"rewards/rejected": -6.946004390716553, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.498076923076923e-07, |
|
"logits/chosen": -2.8603906631469727, |
|
"logits/rejected": -2.4893665313720703, |
|
"logps/chosen": -288.63330078125, |
|
"logps/rejected": -276.2620849609375, |
|
"loss": 0.0603, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.13872453570365906, |
|
"rewards/margins": 5.943129539489746, |
|
"rewards/rejected": -5.80440616607666, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.478846153846154e-07, |
|
"logits/chosen": -2.8175647258758545, |
|
"logits/rejected": -2.600020408630371, |
|
"logps/chosen": -303.94549560546875, |
|
"logps/rejected": -291.47125244140625, |
|
"loss": 0.0507, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.09348030388355255, |
|
"rewards/margins": 6.168398857116699, |
|
"rewards/rejected": -6.074917793273926, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.459615384615385e-07, |
|
"logits/chosen": -2.5639805793762207, |
|
"logits/rejected": -2.6261074542999268, |
|
"logps/chosen": -293.8769836425781, |
|
"logps/rejected": -305.51483154296875, |
|
"loss": 0.0556, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.5082187652587891, |
|
"rewards/margins": 5.773597240447998, |
|
"rewards/rejected": -6.281815528869629, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.440384615384615e-07, |
|
"logits/chosen": -2.915066957473755, |
|
"logits/rejected": -2.7044992446899414, |
|
"logps/chosen": -264.509033203125, |
|
"logps/rejected": -311.0338134765625, |
|
"loss": 0.0723, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.5605603456497192, |
|
"rewards/margins": 5.587393760681152, |
|
"rewards/rejected": -6.147953510284424, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_logits/chosen": -2.8242454528808594, |
|
"eval_logits/rejected": -2.5170044898986816, |
|
"eval_logps/chosen": -317.8056335449219, |
|
"eval_logps/rejected": -287.9914245605469, |
|
"eval_loss": 0.3998846411705017, |
|
"eval_rewards/accuracies": 0.7896825671195984, |
|
"eval_rewards/chosen": -1.8788989782333374, |
|
"eval_rewards/margins": 2.6527750492095947, |
|
"eval_rewards/rejected": -4.531673908233643, |
|
"eval_runtime": 138.2758, |
|
"eval_samples_per_second": 14.464, |
|
"eval_steps_per_second": 0.456, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.421153846153846e-07, |
|
"logits/chosen": -2.852841377258301, |
|
"logits/rejected": -2.838223934173584, |
|
"logps/chosen": -267.4649658203125, |
|
"logps/rejected": -319.916015625, |
|
"loss": 0.0478, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.17563840746879578, |
|
"rewards/margins": 6.513575077056885, |
|
"rewards/rejected": -6.689213752746582, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.4019230769230764e-07, |
|
"logits/chosen": -2.8269314765930176, |
|
"logits/rejected": -2.4453513622283936, |
|
"logps/chosen": -298.703857421875, |
|
"logps/rejected": -329.9590759277344, |
|
"loss": 0.0575, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.23525571823120117, |
|
"rewards/margins": 6.723779201507568, |
|
"rewards/rejected": -6.9590349197387695, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.3826923076923073e-07, |
|
"logits/chosen": -2.8692283630371094, |
|
"logits/rejected": -2.5266168117523193, |
|
"logps/chosen": -304.0444641113281, |
|
"logps/rejected": -332.7533264160156, |
|
"loss": 0.0816, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5301819443702698, |
|
"rewards/margins": 6.649426460266113, |
|
"rewards/rejected": -7.179608345031738, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.363461538461538e-07, |
|
"logits/chosen": -2.8497471809387207, |
|
"logits/rejected": -2.7025351524353027, |
|
"logps/chosen": -268.01116943359375, |
|
"logps/rejected": -269.3169250488281, |
|
"loss": 0.0541, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7645604610443115, |
|
"rewards/margins": 5.691164970397949, |
|
"rewards/rejected": -6.45572566986084, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.344230769230769e-07, |
|
"logits/chosen": -2.7669548988342285, |
|
"logits/rejected": -2.582608699798584, |
|
"logps/chosen": -297.6730651855469, |
|
"logps/rejected": -308.69866943359375, |
|
"loss": 0.0786, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.13068512082099915, |
|
"rewards/margins": 6.759812355041504, |
|
"rewards/rejected": -6.89049768447876, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.325e-07, |
|
"logits/chosen": -2.880467653274536, |
|
"logits/rejected": -2.482970714569092, |
|
"logps/chosen": -325.6810607910156, |
|
"logps/rejected": -317.48223876953125, |
|
"loss": 0.0557, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.18659517168998718, |
|
"rewards/margins": 6.584499359130859, |
|
"rewards/rejected": -6.77109432220459, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.305769230769231e-07, |
|
"logits/chosen": -2.910566806793213, |
|
"logits/rejected": -2.390554904937744, |
|
"logps/chosen": -335.02728271484375, |
|
"logps/rejected": -297.4922180175781, |
|
"loss": 0.0497, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5198079347610474, |
|
"rewards/margins": 6.664822578430176, |
|
"rewards/rejected": -7.184630393981934, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.286538461538461e-07, |
|
"logits/chosen": -2.741680383682251, |
|
"logits/rejected": -2.6448922157287598, |
|
"logps/chosen": -293.5112609863281, |
|
"logps/rejected": -307.81072998046875, |
|
"loss": 0.0548, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.1880362033843994, |
|
"rewards/margins": 6.250551700592041, |
|
"rewards/rejected": -7.4385881423950195, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.267307692307692e-07, |
|
"logits/chosen": -2.8261971473693848, |
|
"logits/rejected": -2.5925064086914062, |
|
"logps/chosen": -341.7809143066406, |
|
"logps/rejected": -322.70599365234375, |
|
"loss": 0.0556, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.081176519393921, |
|
"rewards/margins": 7.538055419921875, |
|
"rewards/rejected": -8.619232177734375, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.248076923076923e-07, |
|
"logits/chosen": -2.762108325958252, |
|
"logits/rejected": -2.593078136444092, |
|
"logps/chosen": -305.1072692871094, |
|
"logps/rejected": -306.4645690917969, |
|
"loss": 0.0481, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.4623178243637085, |
|
"rewards/margins": 5.997315406799316, |
|
"rewards/rejected": -7.459633827209473, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_logits/chosen": -2.810943603515625, |
|
"eval_logits/rejected": -2.513878345489502, |
|
"eval_logps/chosen": -325.2280578613281, |
|
"eval_logps/rejected": -297.9687194824219, |
|
"eval_loss": 0.41905704140663147, |
|
"eval_rewards/accuracies": 0.7817460298538208, |
|
"eval_rewards/chosen": -2.6211390495300293, |
|
"eval_rewards/margins": 2.90826416015625, |
|
"eval_rewards/rejected": -5.529403209686279, |
|
"eval_runtime": 138.4959, |
|
"eval_samples_per_second": 14.441, |
|
"eval_steps_per_second": 0.455, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.228846153846154e-07, |
|
"logits/chosen": -2.7453770637512207, |
|
"logits/rejected": -2.585190773010254, |
|
"logps/chosen": -299.1862487792969, |
|
"logps/rejected": -337.72320556640625, |
|
"loss": 0.0456, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.7499379515647888, |
|
"rewards/margins": 6.9477972984313965, |
|
"rewards/rejected": -7.697734832763672, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.2096153846153846e-07, |
|
"logits/chosen": -2.8571231365203857, |
|
"logits/rejected": -2.7352118492126465, |
|
"logps/chosen": -290.97100830078125, |
|
"logps/rejected": -344.00579833984375, |
|
"loss": 0.0631, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5878090858459473, |
|
"rewards/margins": 7.520246982574463, |
|
"rewards/rejected": -8.10805606842041, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1903846153846155e-07, |
|
"logits/chosen": -2.8437607288360596, |
|
"logits/rejected": -2.3721210956573486, |
|
"logps/chosen": -337.2201232910156, |
|
"logps/rejected": -293.2413635253906, |
|
"loss": 0.0778, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.8207977414131165, |
|
"rewards/margins": 6.183731555938721, |
|
"rewards/rejected": -7.004528999328613, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.1711538461538464e-07, |
|
"logits/chosen": -2.7952797412872314, |
|
"logits/rejected": -2.5198020935058594, |
|
"logps/chosen": -278.1673889160156, |
|
"logps/rejected": -294.428955078125, |
|
"loss": 0.0575, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.46408623456954956, |
|
"rewards/margins": 6.282520771026611, |
|
"rewards/rejected": -6.746606349945068, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.151923076923077e-07, |
|
"logits/chosen": -2.8894572257995605, |
|
"logits/rejected": -2.6253533363342285, |
|
"logps/chosen": -316.9076843261719, |
|
"logps/rejected": -325.4949035644531, |
|
"loss": 0.0621, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.14270208775997162, |
|
"rewards/margins": 6.009703636169434, |
|
"rewards/rejected": -5.867001533508301, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.132692307692307e-07, |
|
"logits/chosen": -2.8078317642211914, |
|
"logits/rejected": -2.5614686012268066, |
|
"logps/chosen": -298.93621826171875, |
|
"logps/rejected": -320.55828857421875, |
|
"loss": 0.052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.061225999146699905, |
|
"rewards/margins": 6.401114463806152, |
|
"rewards/rejected": -6.462340354919434, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.113461538461538e-07, |
|
"logits/chosen": -2.8679614067077637, |
|
"logits/rejected": -2.5462114810943604, |
|
"logps/chosen": -332.6659240722656, |
|
"logps/rejected": -304.09649658203125, |
|
"loss": 0.0415, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.08115784078836441, |
|
"rewards/margins": 6.285240173339844, |
|
"rewards/rejected": -6.204082012176514, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.094230769230769e-07, |
|
"logits/chosen": -2.8452160358428955, |
|
"logits/rejected": -2.593921184539795, |
|
"logps/chosen": -278.31298828125, |
|
"logps/rejected": -268.8619689941406, |
|
"loss": 0.051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3518837094306946, |
|
"rewards/margins": 6.064053535461426, |
|
"rewards/rejected": -6.4159369468688965, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.0749999999999997e-07, |
|
"logits/chosen": -2.8956944942474365, |
|
"logits/rejected": -2.659951686859131, |
|
"logps/chosen": -315.71759033203125, |
|
"logps/rejected": -329.34552001953125, |
|
"loss": 0.0662, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.39976391196250916, |
|
"rewards/margins": 6.577657222747803, |
|
"rewards/rejected": -6.977420806884766, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0557692307692305e-07, |
|
"logits/chosen": -2.820831060409546, |
|
"logits/rejected": -2.7252230644226074, |
|
"logps/chosen": -275.63775634765625, |
|
"logps/rejected": -314.5214538574219, |
|
"loss": 0.0432, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7111703157424927, |
|
"rewards/margins": 6.398756980895996, |
|
"rewards/rejected": -7.109927177429199, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_logits/chosen": -2.8120782375335693, |
|
"eval_logits/rejected": -2.515331506729126, |
|
"eval_logps/chosen": -319.6214294433594, |
|
"eval_logps/rejected": -293.1344909667969, |
|
"eval_loss": 0.40695273876190186, |
|
"eval_rewards/accuracies": 0.8055555820465088, |
|
"eval_rewards/chosen": -2.0604758262634277, |
|
"eval_rewards/margins": 2.985503911972046, |
|
"eval_rewards/rejected": -5.045979976654053, |
|
"eval_runtime": 138.1372, |
|
"eval_samples_per_second": 14.478, |
|
"eval_steps_per_second": 0.456, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0365384615384614e-07, |
|
"logits/chosen": -2.879749298095703, |
|
"logits/rejected": -2.674367666244507, |
|
"logps/chosen": -315.0393371582031, |
|
"logps/rejected": -354.39971923828125, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.607972264289856, |
|
"rewards/margins": 6.823029518127441, |
|
"rewards/rejected": -7.431002616882324, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.0173076923076923e-07, |
|
"logits/chosen": -2.8352997303009033, |
|
"logits/rejected": -2.622284412384033, |
|
"logps/chosen": -293.62371826171875, |
|
"logps/rejected": -313.57244873046875, |
|
"loss": 0.0372, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.08940573781728745, |
|
"rewards/margins": 7.263546943664551, |
|
"rewards/rejected": -7.352952003479004, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.998076923076923e-07, |
|
"logits/chosen": -2.805799961090088, |
|
"logits/rejected": -2.57133150100708, |
|
"logps/chosen": -323.6988220214844, |
|
"logps/rejected": -278.4544677734375, |
|
"loss": 0.0631, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.618446409702301, |
|
"rewards/margins": 6.439663887023926, |
|
"rewards/rejected": -7.058110237121582, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9788461538461535e-07, |
|
"logits/chosen": -2.750667095184326, |
|
"logits/rejected": -2.553912878036499, |
|
"logps/chosen": -262.938232421875, |
|
"logps/rejected": -265.05987548828125, |
|
"loss": 0.057, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.6424404978752136, |
|
"rewards/margins": 6.154696464538574, |
|
"rewards/rejected": -6.7971367835998535, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9596153846153844e-07, |
|
"logits/chosen": -2.74922776222229, |
|
"logits/rejected": -2.522984743118286, |
|
"logps/chosen": -296.0598449707031, |
|
"logps/rejected": -295.513916015625, |
|
"loss": 0.0725, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.9643446207046509, |
|
"rewards/margins": 6.230860233306885, |
|
"rewards/rejected": -7.195204734802246, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.940384615384615e-07, |
|
"logits/chosen": -2.760007381439209, |
|
"logits/rejected": -2.3878226280212402, |
|
"logps/chosen": -271.11651611328125, |
|
"logps/rejected": -298.1424560546875, |
|
"loss": 0.0646, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.8373205065727234, |
|
"rewards/margins": 6.260037422180176, |
|
"rewards/rejected": -7.097357749938965, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.921153846153846e-07, |
|
"logits/chosen": -2.7579071521759033, |
|
"logits/rejected": -2.446126937866211, |
|
"logps/chosen": -356.7608337402344, |
|
"logps/rejected": -328.30108642578125, |
|
"loss": 0.0579, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24769195914268494, |
|
"rewards/margins": 7.452914237976074, |
|
"rewards/rejected": -7.700606346130371, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.901923076923077e-07, |
|
"logits/chosen": -2.6069226264953613, |
|
"logits/rejected": -2.6243488788604736, |
|
"logps/chosen": -317.1035461425781, |
|
"logps/rejected": -319.14581298828125, |
|
"loss": 0.0676, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.2916959822177887, |
|
"rewards/margins": 6.393536567687988, |
|
"rewards/rejected": -6.685232639312744, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.882692307692308e-07, |
|
"logits/chosen": -2.725607395172119, |
|
"logits/rejected": -2.3066391944885254, |
|
"logps/chosen": -319.43157958984375, |
|
"logps/rejected": -315.57080078125, |
|
"loss": 0.0658, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.6996710896492004, |
|
"rewards/margins": 6.387073040008545, |
|
"rewards/rejected": -7.0867438316345215, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.863461538461539e-07, |
|
"logits/chosen": -2.8061482906341553, |
|
"logits/rejected": -2.6063475608825684, |
|
"logps/chosen": -319.04803466796875, |
|
"logps/rejected": -313.13494873046875, |
|
"loss": 0.0402, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17195379734039307, |
|
"rewards/margins": 7.243444919586182, |
|
"rewards/rejected": -7.41540002822876, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_logits/chosen": -2.7388460636138916, |
|
"eval_logits/rejected": -2.4383180141448975, |
|
"eval_logps/chosen": -321.46142578125, |
|
"eval_logps/rejected": -293.61639404296875, |
|
"eval_loss": 0.400052547454834, |
|
"eval_rewards/accuracies": 0.7936508059501648, |
|
"eval_rewards/chosen": -2.244476795196533, |
|
"eval_rewards/margins": 2.8496947288513184, |
|
"eval_rewards/rejected": -5.094171047210693, |
|
"eval_runtime": 137.973, |
|
"eval_samples_per_second": 14.496, |
|
"eval_steps_per_second": 0.457, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.844230769230769e-07, |
|
"logits/chosen": -2.73895001411438, |
|
"logits/rejected": -2.6003918647766113, |
|
"logps/chosen": -286.24420166015625, |
|
"logps/rejected": -312.2796325683594, |
|
"loss": 0.086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7323522567749023, |
|
"rewards/margins": 6.541958808898926, |
|
"rewards/rejected": -7.274311065673828, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8249999999999994e-07, |
|
"logits/chosen": -2.805447816848755, |
|
"logits/rejected": -2.335646867752075, |
|
"logps/chosen": -316.35821533203125, |
|
"logps/rejected": -295.689453125, |
|
"loss": 0.0389, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.7643243670463562, |
|
"rewards/margins": 6.689816474914551, |
|
"rewards/rejected": -7.454140663146973, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8057692307692303e-07, |
|
"logits/chosen": -2.6269233226776123, |
|
"logits/rejected": -2.334346294403076, |
|
"logps/chosen": -292.73297119140625, |
|
"logps/rejected": -307.1222229003906, |
|
"loss": 0.0552, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.6479349732398987, |
|
"rewards/margins": 6.831157684326172, |
|
"rewards/rejected": -7.479092597961426, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.786538461538461e-07, |
|
"logits/chosen": -2.6912832260131836, |
|
"logits/rejected": -2.435011625289917, |
|
"logps/chosen": -319.80609130859375, |
|
"logps/rejected": -309.6658630371094, |
|
"loss": 0.0503, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.33509448170661926, |
|
"rewards/margins": 7.094533443450928, |
|
"rewards/rejected": -7.429627895355225, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.767307692307692e-07, |
|
"logits/chosen": -2.567253589630127, |
|
"logits/rejected": -2.4747424125671387, |
|
"logps/chosen": -294.86395263671875, |
|
"logps/rejected": -295.82183837890625, |
|
"loss": 0.0523, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.19534453749656677, |
|
"rewards/margins": 6.722334861755371, |
|
"rewards/rejected": -6.917679786682129, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.748076923076923e-07, |
|
"logits/chosen": -2.699418783187866, |
|
"logits/rejected": -2.5728678703308105, |
|
"logps/chosen": -329.30322265625, |
|
"logps/rejected": -334.52044677734375, |
|
"loss": 0.0589, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.20032933354377747, |
|
"rewards/margins": 6.842965602874756, |
|
"rewards/rejected": -7.043294429779053, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.728846153846154e-07, |
|
"logits/chosen": -2.674626588821411, |
|
"logits/rejected": -2.435716152191162, |
|
"logps/chosen": -337.5685729980469, |
|
"logps/rejected": -332.3224182128906, |
|
"loss": 0.0751, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.4591027796268463, |
|
"rewards/margins": 6.688274383544922, |
|
"rewards/rejected": -7.147377014160156, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.7096153846153847e-07, |
|
"logits/chosen": -2.704498767852783, |
|
"logits/rejected": -2.261159896850586, |
|
"logps/chosen": -288.1585693359375, |
|
"logps/rejected": -305.39166259765625, |
|
"loss": 0.0592, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.35019350051879883, |
|
"rewards/margins": 6.569688320159912, |
|
"rewards/rejected": -6.919882297515869, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6903846153846155e-07, |
|
"logits/chosen": -2.7152137756347656, |
|
"logits/rejected": -2.2341487407684326, |
|
"logps/chosen": -273.33880615234375, |
|
"logps/rejected": -277.2101745605469, |
|
"loss": 0.0471, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.8817023038864136, |
|
"rewards/margins": 5.90345573425293, |
|
"rewards/rejected": -6.785158634185791, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.671153846153846e-07, |
|
"logits/chosen": -2.6211445331573486, |
|
"logits/rejected": -2.372669219970703, |
|
"logps/chosen": -328.92657470703125, |
|
"logps/rejected": -282.3219299316406, |
|
"loss": 0.0529, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.9412908554077148, |
|
"rewards/margins": 6.654592990875244, |
|
"rewards/rejected": -7.595883846282959, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_logits/chosen": -2.696347236633301, |
|
"eval_logits/rejected": -2.3906466960906982, |
|
"eval_logps/chosen": -322.5152587890625, |
|
"eval_logps/rejected": -295.14263916015625, |
|
"eval_loss": 0.4066043198108673, |
|
"eval_rewards/accuracies": 0.8015872836112976, |
|
"eval_rewards/chosen": -2.3498613834381104, |
|
"eval_rewards/margins": 2.8969383239746094, |
|
"eval_rewards/rejected": -5.246799945831299, |
|
"eval_runtime": 138.1634, |
|
"eval_samples_per_second": 14.476, |
|
"eval_steps_per_second": 0.456, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.651923076923077e-07, |
|
"logits/chosen": -2.6930763721466064, |
|
"logits/rejected": -2.4558794498443604, |
|
"logps/chosen": -297.29498291015625, |
|
"logps/rejected": -310.0185546875, |
|
"loss": 0.0558, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.2887471914291382, |
|
"rewards/margins": 6.140270709991455, |
|
"rewards/rejected": -7.429017543792725, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.6326923076923076e-07, |
|
"logits/chosen": -2.5517020225524902, |
|
"logits/rejected": -2.2331695556640625, |
|
"logps/chosen": -306.2018127441406, |
|
"logps/rejected": -289.97216796875, |
|
"loss": 0.0409, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4713718295097351, |
|
"rewards/margins": 6.912310600280762, |
|
"rewards/rejected": -7.3836822509765625, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6134615384615385e-07, |
|
"logits/chosen": -2.5724055767059326, |
|
"logits/rejected": -2.276639461517334, |
|
"logps/chosen": -289.84307861328125, |
|
"logps/rejected": -308.78851318359375, |
|
"loss": 0.047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.45345011353492737, |
|
"rewards/margins": 7.111723899841309, |
|
"rewards/rejected": -7.565174102783203, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.5942307692307694e-07, |
|
"logits/chosen": -2.6910042762756348, |
|
"logits/rejected": -2.519693374633789, |
|
"logps/chosen": -274.81695556640625, |
|
"logps/rejected": -293.1619873046875, |
|
"loss": 0.0652, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.8246002197265625, |
|
"rewards/margins": 6.030448913574219, |
|
"rewards/rejected": -6.855049133300781, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5749999999999997e-07, |
|
"logits/chosen": -2.648815631866455, |
|
"logits/rejected": -2.505194902420044, |
|
"logps/chosen": -251.701416015625, |
|
"logps/rejected": -303.21319580078125, |
|
"loss": 0.0427, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.43140754103660583, |
|
"rewards/margins": 6.856492519378662, |
|
"rewards/rejected": -7.287900447845459, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5557692307692306e-07, |
|
"logits/chosen": -2.5452957153320312, |
|
"logits/rejected": -2.3541762828826904, |
|
"logps/chosen": -237.29531860351562, |
|
"logps/rejected": -246.6190643310547, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.9847968816757202, |
|
"rewards/margins": 5.875502109527588, |
|
"rewards/rejected": -6.860299587249756, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5365384615384614e-07, |
|
"logits/chosen": -2.754185914993286, |
|
"logits/rejected": -2.343951940536499, |
|
"logps/chosen": -326.2292175292969, |
|
"logps/rejected": -300.82843017578125, |
|
"loss": 0.0456, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6876034736633301, |
|
"rewards/margins": 7.04247522354126, |
|
"rewards/rejected": -7.73007869720459, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5173076923076923e-07, |
|
"logits/chosen": -2.7579290866851807, |
|
"logits/rejected": -2.269740104675293, |
|
"logps/chosen": -305.60357666015625, |
|
"logps/rejected": -292.82257080078125, |
|
"loss": 0.0444, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.6316174268722534, |
|
"rewards/margins": 7.171195983886719, |
|
"rewards/rejected": -7.802813529968262, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.498076923076923e-07, |
|
"logits/chosen": -2.599862575531006, |
|
"logits/rejected": -2.4854395389556885, |
|
"logps/chosen": -308.61041259765625, |
|
"logps/rejected": -350.6466979980469, |
|
"loss": 0.0351, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18435214459896088, |
|
"rewards/margins": 6.852914333343506, |
|
"rewards/rejected": -7.037265777587891, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.478846153846154e-07, |
|
"logits/chosen": -2.491466522216797, |
|
"logits/rejected": -2.2569782733917236, |
|
"logps/chosen": -288.4465637207031, |
|
"logps/rejected": -276.3277587890625, |
|
"loss": 0.0651, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.8335868716239929, |
|
"rewards/margins": 6.33389949798584, |
|
"rewards/rejected": -7.16748571395874, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_logits/chosen": -2.646865129470825, |
|
"eval_logits/rejected": -2.3390119075775146, |
|
"eval_logps/chosen": -319.6136474609375, |
|
"eval_logps/rejected": -291.59014892578125, |
|
"eval_loss": 0.3962344229221344, |
|
"eval_rewards/accuracies": 0.8015872836112976, |
|
"eval_rewards/chosen": -2.059701681137085, |
|
"eval_rewards/margins": 2.83184552192688, |
|
"eval_rewards/rejected": -4.891546726226807, |
|
"eval_runtime": 138.1446, |
|
"eval_samples_per_second": 14.478, |
|
"eval_steps_per_second": 0.456, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.4596153846153844e-07, |
|
"logits/chosen": -2.6766560077667236, |
|
"logits/rejected": -2.3890440464019775, |
|
"logps/chosen": -320.79901123046875, |
|
"logps/rejected": -327.23553466796875, |
|
"loss": 0.052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5842215418815613, |
|
"rewards/margins": 6.163203716278076, |
|
"rewards/rejected": -6.747425079345703, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4403846153846153e-07, |
|
"logits/chosen": -2.553154706954956, |
|
"logits/rejected": -2.430248737335205, |
|
"logps/chosen": -278.9110107421875, |
|
"logps/rejected": -300.0827331542969, |
|
"loss": 0.0549, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.6223709583282471, |
|
"rewards/margins": 6.104709625244141, |
|
"rewards/rejected": -6.727081298828125, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.421153846153846e-07, |
|
"logits/chosen": -2.514636993408203, |
|
"logits/rejected": -2.49489164352417, |
|
"logps/chosen": -264.50677490234375, |
|
"logps/rejected": -279.73504638671875, |
|
"loss": 0.0512, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.4848340153694153, |
|
"rewards/margins": 6.083920478820801, |
|
"rewards/rejected": -6.56875467300415, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.401923076923077e-07, |
|
"logits/chosen": -2.6115622520446777, |
|
"logits/rejected": -2.155635356903076, |
|
"logps/chosen": -317.70391845703125, |
|
"logps/rejected": -309.3824768066406, |
|
"loss": 0.0424, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.23578333854675293, |
|
"rewards/margins": 6.9274091720581055, |
|
"rewards/rejected": -7.1631927490234375, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3826923076923076e-07, |
|
"logits/chosen": -2.519632577896118, |
|
"logits/rejected": -2.1128008365631104, |
|
"logps/chosen": -304.7782287597656, |
|
"logps/rejected": -318.24578857421875, |
|
"loss": 0.0604, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.653046727180481, |
|
"rewards/margins": 7.262368202209473, |
|
"rewards/rejected": -7.915414333343506, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3634615384615382e-07, |
|
"logits/chosen": -2.471069812774658, |
|
"logits/rejected": -2.2706027030944824, |
|
"logps/chosen": -321.79833984375, |
|
"logps/rejected": -329.3187255859375, |
|
"loss": 0.0502, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35187727212905884, |
|
"rewards/margins": 7.088311195373535, |
|
"rewards/rejected": -7.440188407897949, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.344230769230769e-07, |
|
"logits/chosen": -2.7176525592803955, |
|
"logits/rejected": -2.316293478012085, |
|
"logps/chosen": -301.23724365234375, |
|
"logps/rejected": -292.47515869140625, |
|
"loss": 0.0461, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.00272883172146976, |
|
"rewards/margins": 6.684892177581787, |
|
"rewards/rejected": -6.687620639801025, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.325e-07, |
|
"logits/chosen": -2.51845383644104, |
|
"logits/rejected": -2.6053450107574463, |
|
"logps/chosen": -260.30267333984375, |
|
"logps/rejected": -285.4233703613281, |
|
"loss": 0.072, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.8373669385910034, |
|
"rewards/margins": 6.566694736480713, |
|
"rewards/rejected": -7.404061794281006, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3057692307692306e-07, |
|
"logits/chosen": -2.6345601081848145, |
|
"logits/rejected": -2.0915791988372803, |
|
"logps/chosen": -293.4105529785156, |
|
"logps/rejected": -283.9396057128906, |
|
"loss": 0.0703, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.2911089062690735, |
|
"rewards/margins": 6.6237335205078125, |
|
"rewards/rejected": -6.914842128753662, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2865384615384615e-07, |
|
"logits/chosen": -2.5609567165374756, |
|
"logits/rejected": -2.4597666263580322, |
|
"logps/chosen": -283.018798828125, |
|
"logps/rejected": -314.49749755859375, |
|
"loss": 0.0738, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.472573459148407, |
|
"rewards/margins": 6.736408233642578, |
|
"rewards/rejected": -7.208981990814209, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_logits/chosen": -2.6606547832489014, |
|
"eval_logits/rejected": -2.353179454803467, |
|
"eval_logps/chosen": -317.9099426269531, |
|
"eval_logps/rejected": -288.78173828125, |
|
"eval_loss": 0.3941672146320343, |
|
"eval_rewards/accuracies": 0.8134920597076416, |
|
"eval_rewards/chosen": -1.889327049255371, |
|
"eval_rewards/margins": 2.721378803253174, |
|
"eval_rewards/rejected": -4.610706329345703, |
|
"eval_runtime": 137.7756, |
|
"eval_samples_per_second": 14.516, |
|
"eval_steps_per_second": 0.457, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2673076923076923e-07, |
|
"logits/chosen": -2.5994811058044434, |
|
"logits/rejected": -2.188270092010498, |
|
"logps/chosen": -293.59429931640625, |
|
"logps/rejected": -287.1206359863281, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.12811625003814697, |
|
"rewards/margins": 6.462324619293213, |
|
"rewards/rejected": -6.5904412269592285, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2480769230769232e-07, |
|
"logits/chosen": -2.701138496398926, |
|
"logits/rejected": -2.504826307296753, |
|
"logps/chosen": -334.2485046386719, |
|
"logps/rejected": -309.32330322265625, |
|
"loss": 0.0506, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11231260001659393, |
|
"rewards/margins": 6.020442485809326, |
|
"rewards/rejected": -5.908129692077637, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2288461538461535e-07, |
|
"logits/chosen": -2.6742324829101562, |
|
"logits/rejected": -2.340301275253296, |
|
"logps/chosen": -266.2740478515625, |
|
"logps/rejected": -298.72637939453125, |
|
"loss": 0.0524, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.3445238471031189, |
|
"rewards/margins": 6.339863300323486, |
|
"rewards/rejected": -6.68438720703125, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2096153846153844e-07, |
|
"logits/chosen": -2.706620693206787, |
|
"logits/rejected": -2.4908840656280518, |
|
"logps/chosen": -304.2544860839844, |
|
"logps/rejected": -328.808349609375, |
|
"loss": 0.0486, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.4945853352546692, |
|
"rewards/margins": 6.773747444152832, |
|
"rewards/rejected": -7.268332481384277, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1903846153846153e-07, |
|
"logits/chosen": -2.5668416023254395, |
|
"logits/rejected": -2.1745359897613525, |
|
"logps/chosen": -283.23101806640625, |
|
"logps/rejected": -289.1015625, |
|
"loss": 0.1597, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.4894266128540039, |
|
"rewards/margins": 6.333164691925049, |
|
"rewards/rejected": -6.8225908279418945, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1711538461538462e-07, |
|
"logits/chosen": -2.4643194675445557, |
|
"logits/rejected": -2.2983975410461426, |
|
"logps/chosen": -276.0385437011719, |
|
"logps/rejected": -261.1243591308594, |
|
"loss": 0.0719, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.7288832068443298, |
|
"rewards/margins": 5.606365203857422, |
|
"rewards/rejected": -6.3352484703063965, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1519230769230768e-07, |
|
"logits/chosen": -2.552398681640625, |
|
"logits/rejected": -2.3484368324279785, |
|
"logps/chosen": -356.1564636230469, |
|
"logps/rejected": -358.2718811035156, |
|
"loss": 0.0665, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.34922653436660767, |
|
"rewards/margins": 6.843531608581543, |
|
"rewards/rejected": -6.494305610656738, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.1326923076923076e-07, |
|
"logits/chosen": -2.5096826553344727, |
|
"logits/rejected": -2.1927523612976074, |
|
"logps/chosen": -311.0741882324219, |
|
"logps/rejected": -288.48016357421875, |
|
"loss": 0.0548, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.4201888144016266, |
|
"rewards/margins": 6.081272602081299, |
|
"rewards/rejected": -6.501461029052734, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1134615384615385e-07, |
|
"logits/chosen": -2.5754733085632324, |
|
"logits/rejected": -2.302943468093872, |
|
"logps/chosen": -319.63763427734375, |
|
"logps/rejected": -306.80706787109375, |
|
"loss": 0.0498, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.2358233481645584, |
|
"rewards/margins": 6.327439308166504, |
|
"rewards/rejected": -6.563262939453125, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.094230769230769e-07, |
|
"logits/chosen": -2.5756051540374756, |
|
"logits/rejected": -2.2062323093414307, |
|
"logps/chosen": -303.8487548828125, |
|
"logps/rejected": -291.21343994140625, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.1234605461359024, |
|
"rewards/margins": 6.422218322753906, |
|
"rewards/rejected": -6.545678615570068, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_logits/chosen": -2.5907535552978516, |
|
"eval_logits/rejected": -2.2728497982025146, |
|
"eval_logps/chosen": -317.79052734375, |
|
"eval_logps/rejected": -289.8961486816406, |
|
"eval_loss": 0.39897972345352173, |
|
"eval_rewards/accuracies": 0.817460298538208, |
|
"eval_rewards/chosen": -1.8773850202560425, |
|
"eval_rewards/margins": 2.8447630405426025, |
|
"eval_rewards/rejected": -4.7221479415893555, |
|
"eval_runtime": 138.1312, |
|
"eval_samples_per_second": 14.479, |
|
"eval_steps_per_second": 0.456, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0749999999999997e-07, |
|
"logits/chosen": -2.529545545578003, |
|
"logits/rejected": -2.288567304611206, |
|
"logps/chosen": -281.3018798828125, |
|
"logps/rejected": -325.45086669921875, |
|
"loss": 0.0457, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7181668877601624, |
|
"rewards/margins": 6.582350730895996, |
|
"rewards/rejected": -7.3005170822143555, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.0557692307692306e-07, |
|
"logits/chosen": -2.677251100540161, |
|
"logits/rejected": -2.332127571105957, |
|
"logps/chosen": -365.18426513671875, |
|
"logps/rejected": -319.1354064941406, |
|
"loss": 0.0568, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.353956401348114, |
|
"rewards/margins": 6.727086544036865, |
|
"rewards/rejected": -6.373129844665527, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0365384615384615e-07, |
|
"logits/chosen": -2.4742937088012695, |
|
"logits/rejected": -2.227391004562378, |
|
"logps/chosen": -290.05865478515625, |
|
"logps/rejected": -287.7528991699219, |
|
"loss": 0.0645, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.4824981689453125, |
|
"rewards/margins": 6.790109157562256, |
|
"rewards/rejected": -7.272607326507568, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0173076923076923e-07, |
|
"logits/chosen": -2.45625901222229, |
|
"logits/rejected": -2.1974668502807617, |
|
"logps/chosen": -286.39642333984375, |
|
"logps/rejected": -305.8327941894531, |
|
"loss": 0.0741, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.8668807744979858, |
|
"rewards/margins": 6.204115867614746, |
|
"rewards/rejected": -7.0709967613220215, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.998076923076923e-07, |
|
"logits/chosen": -2.4767487049102783, |
|
"logits/rejected": -2.2565927505493164, |
|
"logps/chosen": -312.245849609375, |
|
"logps/rejected": -325.4161071777344, |
|
"loss": 0.0461, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.23089423775672913, |
|
"rewards/margins": 6.206521987915039, |
|
"rewards/rejected": -6.437416076660156, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9788461538461538e-07, |
|
"logits/chosen": -2.4799299240112305, |
|
"logits/rejected": -2.1780128479003906, |
|
"logps/chosen": -337.66900634765625, |
|
"logps/rejected": -293.83551025390625, |
|
"loss": 0.0667, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.772417426109314, |
|
"rewards/margins": 5.717872619628906, |
|
"rewards/rejected": -6.490289211273193, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9596153846153847e-07, |
|
"logits/chosen": -2.6148133277893066, |
|
"logits/rejected": -2.4374372959136963, |
|
"logps/chosen": -324.8668212890625, |
|
"logps/rejected": -370.79241943359375, |
|
"loss": 0.0593, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.5282601118087769, |
|
"rewards/margins": 6.644644260406494, |
|
"rewards/rejected": -7.172904014587402, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9403846153846153e-07, |
|
"logits/chosen": -2.567094326019287, |
|
"logits/rejected": -2.3295035362243652, |
|
"logps/chosen": -265.49188232421875, |
|
"logps/rejected": -281.04522705078125, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.3976692259311676, |
|
"rewards/margins": 5.820679664611816, |
|
"rewards/rejected": -6.218348503112793, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.921153846153846e-07, |
|
"logits/chosen": -2.454172372817993, |
|
"logits/rejected": -2.171295166015625, |
|
"logps/chosen": -271.5619201660156, |
|
"logps/rejected": -311.7131042480469, |
|
"loss": 0.1045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.039119429886341095, |
|
"rewards/margins": 6.767712593078613, |
|
"rewards/rejected": -6.806832790374756, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9019230769230768e-07, |
|
"logits/chosen": -2.5482497215270996, |
|
"logits/rejected": -2.401669979095459, |
|
"logps/chosen": -307.3960266113281, |
|
"logps/rejected": -318.3688659667969, |
|
"loss": 0.0686, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.022620106115937233, |
|
"rewards/margins": 6.840855598449707, |
|
"rewards/rejected": -6.818235874176025, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_logits/chosen": -2.5658326148986816, |
|
"eval_logits/rejected": -2.255352258682251, |
|
"eval_logps/chosen": -317.76165771484375, |
|
"eval_logps/rejected": -289.4820861816406, |
|
"eval_loss": 0.39241212606430054, |
|
"eval_rewards/accuracies": 0.8055555820465088, |
|
"eval_rewards/chosen": -1.8745005130767822, |
|
"eval_rewards/margins": 2.806236505508423, |
|
"eval_rewards/rejected": -4.680737495422363, |
|
"eval_runtime": 138.2704, |
|
"eval_samples_per_second": 14.464, |
|
"eval_steps_per_second": 0.456, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8826923076923076e-07, |
|
"logits/chosen": -2.465681552886963, |
|
"logits/rejected": -2.326996326446533, |
|
"logps/chosen": -274.81573486328125, |
|
"logps/rejected": -284.34405517578125, |
|
"loss": 0.0609, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.3031781315803528, |
|
"rewards/margins": 6.008488178253174, |
|
"rewards/rejected": -6.311666488647461, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8634615384615385e-07, |
|
"logits/chosen": -2.532928943634033, |
|
"logits/rejected": -2.366983652114868, |
|
"logps/chosen": -390.73223876953125, |
|
"logps/rejected": -315.1780700683594, |
|
"loss": 0.0527, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.22381691634655, |
|
"rewards/margins": 6.888078212738037, |
|
"rewards/rejected": -7.1118950843811035, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.844230769230769e-07, |
|
"logits/chosen": -2.545893669128418, |
|
"logits/rejected": -2.114891529083252, |
|
"logps/chosen": -240.4969940185547, |
|
"logps/rejected": -269.58819580078125, |
|
"loss": 0.0416, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.3161620497703552, |
|
"rewards/margins": 7.036616325378418, |
|
"rewards/rejected": -6.720454216003418, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.825e-07, |
|
"logits/chosen": -2.4374775886535645, |
|
"logits/rejected": -2.175703525543213, |
|
"logps/chosen": -272.4288024902344, |
|
"logps/rejected": -296.0552673339844, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.035207681357860565, |
|
"rewards/margins": 6.330394744873047, |
|
"rewards/rejected": -6.365602493286133, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8057692307692306e-07, |
|
"logits/chosen": -2.3066885471343994, |
|
"logits/rejected": -2.355970859527588, |
|
"logps/chosen": -294.2997131347656, |
|
"logps/rejected": -333.3860778808594, |
|
"loss": 0.0162, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04359764978289604, |
|
"rewards/margins": 9.196569442749023, |
|
"rewards/rejected": -9.152971267700195, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7865384615384615e-07, |
|
"logits/chosen": -2.4848382472991943, |
|
"logits/rejected": -2.32381010055542, |
|
"logps/chosen": -319.75018310546875, |
|
"logps/rejected": -326.4351501464844, |
|
"loss": 0.0142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.26576751470565796, |
|
"rewards/margins": 6.980968475341797, |
|
"rewards/rejected": -7.2467360496521, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.767307692307692e-07, |
|
"logits/chosen": -2.5374231338500977, |
|
"logits/rejected": -2.332153797149658, |
|
"logps/chosen": -284.25946044921875, |
|
"logps/rejected": -338.75006103515625, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.31498903036117554, |
|
"rewards/margins": 8.299997329711914, |
|
"rewards/rejected": -7.985008239746094, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.748076923076923e-07, |
|
"logits/chosen": -2.487070083618164, |
|
"logits/rejected": -2.249248504638672, |
|
"logps/chosen": -291.6103820800781, |
|
"logps/rejected": -322.8634948730469, |
|
"loss": 0.0133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5083962082862854, |
|
"rewards/margins": 7.409769535064697, |
|
"rewards/rejected": -7.918165683746338, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7288461538461538e-07, |
|
"logits/chosen": -2.5644381046295166, |
|
"logits/rejected": -2.2769272327423096, |
|
"logps/chosen": -264.1942138671875, |
|
"logps/rejected": -300.13299560546875, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.47016096115112305, |
|
"rewards/margins": 7.2947282791137695, |
|
"rewards/rejected": -7.764888763427734, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7096153846153847e-07, |
|
"logits/chosen": -2.40687894821167, |
|
"logits/rejected": -2.2331576347351074, |
|
"logps/chosen": -330.56280517578125, |
|
"logps/rejected": -339.08087158203125, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6595343947410583, |
|
"rewards/margins": 7.104054927825928, |
|
"rewards/rejected": -7.763589382171631, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_logits/chosen": -2.534712553024292, |
|
"eval_logits/rejected": -2.2297492027282715, |
|
"eval_logps/chosen": -323.70367431640625, |
|
"eval_logps/rejected": -299.8647155761719, |
|
"eval_loss": 0.426045686006546, |
|
"eval_rewards/accuracies": 0.7936508059501648, |
|
"eval_rewards/chosen": -2.468703508377075, |
|
"eval_rewards/margins": 3.2502999305725098, |
|
"eval_rewards/rejected": -5.719003677368164, |
|
"eval_runtime": 130.9272, |
|
"eval_samples_per_second": 15.276, |
|
"eval_steps_per_second": 0.481, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6903846153846153e-07, |
|
"logits/chosen": -2.53375506401062, |
|
"logits/rejected": -2.4274983406066895, |
|
"logps/chosen": -317.34210205078125, |
|
"logps/rejected": -329.56561279296875, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4780617356300354, |
|
"rewards/margins": 7.658169746398926, |
|
"rewards/rejected": -8.13623046875, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.671153846153846e-07, |
|
"logits/chosen": -2.3600234985351562, |
|
"logits/rejected": -2.2371509075164795, |
|
"logps/chosen": -238.1803741455078, |
|
"logps/rejected": -311.7017517089844, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.047269344329834, |
|
"rewards/margins": 7.484587669372559, |
|
"rewards/rejected": -8.53185749053955, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6519230769230768e-07, |
|
"logits/chosen": -2.5380847454071045, |
|
"logits/rejected": -2.3048927783966064, |
|
"logps/chosen": -294.823486328125, |
|
"logps/rejected": -327.47802734375, |
|
"loss": 0.014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.806699275970459, |
|
"rewards/margins": 7.580696105957031, |
|
"rewards/rejected": -8.387394905090332, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6326923076923077e-07, |
|
"logits/chosen": -2.3959903717041016, |
|
"logits/rejected": -2.3024260997772217, |
|
"logps/chosen": -309.2193298339844, |
|
"logps/rejected": -318.51409912109375, |
|
"loss": 0.0115, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8904142379760742, |
|
"rewards/margins": 7.762335300445557, |
|
"rewards/rejected": -8.652750015258789, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6134615384615383e-07, |
|
"logits/chosen": -2.4456429481506348, |
|
"logits/rejected": -2.5010082721710205, |
|
"logps/chosen": -293.930908203125, |
|
"logps/rejected": -353.22210693359375, |
|
"loss": 0.009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8601284027099609, |
|
"rewards/margins": 8.212738990783691, |
|
"rewards/rejected": -9.072866439819336, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5942307692307691e-07, |
|
"logits/chosen": -2.4511656761169434, |
|
"logits/rejected": -2.115886926651001, |
|
"logps/chosen": -305.025390625, |
|
"logps/rejected": -289.225830078125, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.959731936454773, |
|
"rewards/margins": 7.51929235458374, |
|
"rewards/rejected": -8.479024887084961, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.575e-07, |
|
"logits/chosen": -2.4998791217803955, |
|
"logits/rejected": -2.253462314605713, |
|
"logps/chosen": -289.7786560058594, |
|
"logps/rejected": -333.38641357421875, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2887849807739258, |
|
"rewards/margins": 7.364945411682129, |
|
"rewards/rejected": -8.653730392456055, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.555769230769231e-07, |
|
"logits/chosen": -2.535858631134033, |
|
"logits/rejected": -2.194636583328247, |
|
"logps/chosen": -300.7867736816406, |
|
"logps/rejected": -307.16937255859375, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0742636919021606, |
|
"rewards/margins": 7.198313236236572, |
|
"rewards/rejected": -8.272576332092285, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5365384615384612e-07, |
|
"logits/chosen": -2.508073329925537, |
|
"logits/rejected": -2.2332749366760254, |
|
"logps/chosen": -277.55584716796875, |
|
"logps/rejected": -303.4755554199219, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.042938232421875, |
|
"rewards/margins": 7.390425682067871, |
|
"rewards/rejected": -8.43336296081543, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.517307692307692e-07, |
|
"logits/chosen": -2.4870352745056152, |
|
"logits/rejected": -2.193904161453247, |
|
"logps/chosen": -278.8507385253906, |
|
"logps/rejected": -316.874755859375, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.6408751010894775, |
|
"rewards/margins": 7.6175031661987305, |
|
"rewards/rejected": -9.258378982543945, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_logits/chosen": -2.521868944168091, |
|
"eval_logits/rejected": -2.218470573425293, |
|
"eval_logps/chosen": -327.2823486328125, |
|
"eval_logps/rejected": -306.3802490234375, |
|
"eval_loss": 0.4518626630306244, |
|
"eval_rewards/accuracies": 0.7976190447807312, |
|
"eval_rewards/chosen": -2.8265669345855713, |
|
"eval_rewards/margins": 3.5439889430999756, |
|
"eval_rewards/rejected": -6.370555400848389, |
|
"eval_runtime": 130.6688, |
|
"eval_samples_per_second": 15.306, |
|
"eval_steps_per_second": 0.482, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.498076923076923e-07, |
|
"logits/chosen": -2.4707891941070557, |
|
"logits/rejected": -2.3319196701049805, |
|
"logps/chosen": -299.81610107421875, |
|
"logps/rejected": -340.24267578125, |
|
"loss": 0.0177, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.4307548999786377, |
|
"rewards/margins": 8.416878700256348, |
|
"rewards/rejected": -9.847633361816406, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4788461538461538e-07, |
|
"logits/chosen": -2.4089057445526123, |
|
"logits/rejected": -2.055510997772217, |
|
"logps/chosen": -286.984375, |
|
"logps/rejected": -311.11590576171875, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9583197832107544, |
|
"rewards/margins": 8.543428421020508, |
|
"rewards/rejected": -9.501748085021973, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.4596153846153844e-07, |
|
"logits/chosen": -2.5453710556030273, |
|
"logits/rejected": -2.289592981338501, |
|
"logps/chosen": -301.62762451171875, |
|
"logps/rejected": -309.4609069824219, |
|
"loss": 0.013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8477941751480103, |
|
"rewards/margins": 7.631629943847656, |
|
"rewards/rejected": -8.479424476623535, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4403846153846153e-07, |
|
"logits/chosen": -2.5285215377807617, |
|
"logits/rejected": -2.17012095451355, |
|
"logps/chosen": -317.62713623046875, |
|
"logps/rejected": -300.0318298339844, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4716318845748901, |
|
"rewards/margins": 7.3103203773498535, |
|
"rewards/rejected": -8.781950950622559, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4211538461538462e-07, |
|
"logits/chosen": -2.333951473236084, |
|
"logits/rejected": -2.1678061485290527, |
|
"logps/chosen": -298.3710021972656, |
|
"logps/rejected": -300.546630859375, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9506980180740356, |
|
"rewards/margins": 7.881108283996582, |
|
"rewards/rejected": -8.831806182861328, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.401923076923077e-07, |
|
"logits/chosen": -2.359285354614258, |
|
"logits/rejected": -2.172787666320801, |
|
"logps/chosen": -273.92962646484375, |
|
"logps/rejected": -312.80499267578125, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1501799821853638, |
|
"rewards/margins": 7.2449822425842285, |
|
"rewards/rejected": -8.395162582397461, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3826923076923074e-07, |
|
"logits/chosen": -2.4522316455841064, |
|
"logits/rejected": -2.1035475730895996, |
|
"logps/chosen": -273.8643493652344, |
|
"logps/rejected": -292.98089599609375, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4371745586395264, |
|
"rewards/margins": 8.328310012817383, |
|
"rewards/rejected": -9.765485763549805, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3634615384615383e-07, |
|
"logits/chosen": -2.516073703765869, |
|
"logits/rejected": -2.171781539916992, |
|
"logps/chosen": -346.954833984375, |
|
"logps/rejected": -358.9801940917969, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9951909780502319, |
|
"rewards/margins": 8.938260078430176, |
|
"rewards/rejected": -9.933451652526855, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3442307692307691e-07, |
|
"logits/chosen": -2.3622655868530273, |
|
"logits/rejected": -2.100069761276245, |
|
"logps/chosen": -321.36663818359375, |
|
"logps/rejected": -338.2700500488281, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5687800049781799, |
|
"rewards/margins": 8.284536361694336, |
|
"rewards/rejected": -8.853315353393555, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.325e-07, |
|
"logits/chosen": -2.537001848220825, |
|
"logits/rejected": -2.2472758293151855, |
|
"logps/chosen": -326.72735595703125, |
|
"logps/rejected": -396.21026611328125, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1825602054595947, |
|
"rewards/margins": 8.144112586975098, |
|
"rewards/rejected": -9.326672554016113, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_logits/chosen": -2.512586832046509, |
|
"eval_logits/rejected": -2.2102549076080322, |
|
"eval_logps/chosen": -328.43841552734375, |
|
"eval_logps/rejected": -308.2383728027344, |
|
"eval_loss": 0.4562525749206543, |
|
"eval_rewards/accuracies": 0.8015872836112976, |
|
"eval_rewards/chosen": -2.9421753883361816, |
|
"eval_rewards/margins": 3.614194869995117, |
|
"eval_rewards/rejected": -6.556369781494141, |
|
"eval_runtime": 130.4834, |
|
"eval_samples_per_second": 15.328, |
|
"eval_steps_per_second": 0.483, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.305769230769231e-07, |
|
"logits/chosen": -2.5175909996032715, |
|
"logits/rejected": -2.245337963104248, |
|
"logps/chosen": -322.72509765625, |
|
"logps/rejected": -354.58154296875, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8563722372055054, |
|
"rewards/margins": 8.002213478088379, |
|
"rewards/rejected": -8.858585357666016, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2865384615384615e-07, |
|
"logits/chosen": -2.554388999938965, |
|
"logits/rejected": -2.2140610218048096, |
|
"logps/chosen": -298.94482421875, |
|
"logps/rejected": -368.2962341308594, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3428957164287567, |
|
"rewards/margins": 8.27833080291748, |
|
"rewards/rejected": -8.62122631072998, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2673076923076924e-07, |
|
"logits/chosen": -2.54005765914917, |
|
"logits/rejected": -2.136395215988159, |
|
"logps/chosen": -334.72747802734375, |
|
"logps/rejected": -328.95953369140625, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.920019805431366, |
|
"rewards/margins": 8.396950721740723, |
|
"rewards/rejected": -9.316969871520996, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.248076923076923e-07, |
|
"logits/chosen": -2.515563726425171, |
|
"logits/rejected": -2.100313425064087, |
|
"logps/chosen": -304.0508117675781, |
|
"logps/rejected": -331.5612487792969, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7162388563156128, |
|
"rewards/margins": 8.973624229431152, |
|
"rewards/rejected": -9.689862251281738, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2288461538461538e-07, |
|
"logits/chosen": -2.5252881050109863, |
|
"logits/rejected": -2.220909595489502, |
|
"logps/chosen": -353.8068542480469, |
|
"logps/rejected": -327.639892578125, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5056819915771484, |
|
"rewards/margins": 8.935028076171875, |
|
"rewards/rejected": -9.440710067749023, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.2096153846153847e-07, |
|
"logits/chosen": -2.464665412902832, |
|
"logits/rejected": -2.2449238300323486, |
|
"logps/chosen": -337.83270263671875, |
|
"logps/rejected": -318.53338623046875, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0805784463882446, |
|
"rewards/margins": 7.592961311340332, |
|
"rewards/rejected": -8.673539161682129, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1903846153846153e-07, |
|
"logits/chosen": -2.430225133895874, |
|
"logits/rejected": -2.233182191848755, |
|
"logps/chosen": -288.67864990234375, |
|
"logps/rejected": -340.03253173828125, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9145647883415222, |
|
"rewards/margins": 8.35079288482666, |
|
"rewards/rejected": -9.26535701751709, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1711538461538462e-07, |
|
"logits/chosen": -2.484591245651245, |
|
"logits/rejected": -2.2712762355804443, |
|
"logps/chosen": -304.85809326171875, |
|
"logps/rejected": -307.70098876953125, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0181230306625366, |
|
"rewards/margins": 8.104196548461914, |
|
"rewards/rejected": -9.122319221496582, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1519230769230768e-07, |
|
"logits/chosen": -2.600891590118408, |
|
"logits/rejected": -2.2220699787139893, |
|
"logps/chosen": -368.4697570800781, |
|
"logps/rejected": -334.89471435546875, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5690144300460815, |
|
"rewards/margins": 8.74263858795166, |
|
"rewards/rejected": -9.311653137207031, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1326923076923077e-07, |
|
"logits/chosen": -2.51472544670105, |
|
"logits/rejected": -2.2086689472198486, |
|
"logps/chosen": -311.0439453125, |
|
"logps/rejected": -318.931396484375, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5231196880340576, |
|
"rewards/margins": 8.030439376831055, |
|
"rewards/rejected": -9.553558349609375, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_logits/chosen": -2.5081491470336914, |
|
"eval_logits/rejected": -2.205916404724121, |
|
"eval_logps/chosen": -332.2627868652344, |
|
"eval_logps/rejected": -313.21649169921875, |
|
"eval_loss": 0.4636384844779968, |
|
"eval_rewards/accuracies": 0.8015872836112976, |
|
"eval_rewards/chosen": -3.3246116638183594, |
|
"eval_rewards/margins": 3.7295706272125244, |
|
"eval_rewards/rejected": -7.054182052612305, |
|
"eval_runtime": 130.8912, |
|
"eval_samples_per_second": 15.28, |
|
"eval_steps_per_second": 0.481, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1134615384615384e-07, |
|
"logits/chosen": -2.4573376178741455, |
|
"logits/rejected": -2.0740745067596436, |
|
"logps/chosen": -303.64959716796875, |
|
"logps/rejected": -306.9479064941406, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2850396633148193, |
|
"rewards/margins": 8.17421817779541, |
|
"rewards/rejected": -9.459257125854492, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0942307692307693e-07, |
|
"logits/chosen": -2.4275412559509277, |
|
"logits/rejected": -2.2551393508911133, |
|
"logps/chosen": -307.6458740234375, |
|
"logps/rejected": -377.93280029296875, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.249359369277954, |
|
"rewards/margins": 8.990339279174805, |
|
"rewards/rejected": -10.23969841003418, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0749999999999999e-07, |
|
"logits/chosen": -2.548478126525879, |
|
"logits/rejected": -2.3526339530944824, |
|
"logps/chosen": -295.4488220214844, |
|
"logps/rejected": -376.52154541015625, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1120712757110596, |
|
"rewards/margins": 8.262701988220215, |
|
"rewards/rejected": -9.374773979187012, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0557692307692308e-07, |
|
"logits/chosen": -2.5474133491516113, |
|
"logits/rejected": -2.391606092453003, |
|
"logps/chosen": -334.513671875, |
|
"logps/rejected": -345.8397521972656, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.15473210811615, |
|
"rewards/margins": 8.487726211547852, |
|
"rewards/rejected": -9.64245891571045, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0365384615384615e-07, |
|
"logits/chosen": -2.438559055328369, |
|
"logits/rejected": -2.14943528175354, |
|
"logps/chosen": -308.0134582519531, |
|
"logps/rejected": -321.0350341796875, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2040348052978516, |
|
"rewards/margins": 8.601288795471191, |
|
"rewards/rejected": -9.805322647094727, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0173076923076922e-07, |
|
"logits/chosen": -2.4214491844177246, |
|
"logits/rejected": -2.230001211166382, |
|
"logps/chosen": -316.44677734375, |
|
"logps/rejected": -337.9944763183594, |
|
"loss": 0.0086, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.9195982813835144, |
|
"rewards/margins": 8.974394798278809, |
|
"rewards/rejected": -9.89399242401123, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.98076923076923e-08, |
|
"logits/chosen": -2.442124605178833, |
|
"logits/rejected": -2.130152702331543, |
|
"logps/chosen": -352.26678466796875, |
|
"logps/rejected": -360.30853271484375, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2404325008392334, |
|
"rewards/margins": 8.4083890914917, |
|
"rewards/rejected": -9.648821830749512, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.788461538461539e-08, |
|
"logits/chosen": -2.4774975776672363, |
|
"logits/rejected": -2.3399832248687744, |
|
"logps/chosen": -281.23297119140625, |
|
"logps/rejected": -328.12054443359375, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.779415488243103, |
|
"rewards/margins": 8.644756317138672, |
|
"rewards/rejected": -10.424173355102539, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.596153846153846e-08, |
|
"logits/chosen": -2.320403575897217, |
|
"logits/rejected": -2.106785297393799, |
|
"logps/chosen": -303.82781982421875, |
|
"logps/rejected": -352.65887451171875, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.5107743740081787, |
|
"rewards/margins": 9.153158187866211, |
|
"rewards/rejected": -10.663931846618652, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.403846153846153e-08, |
|
"logits/chosen": -2.466916561126709, |
|
"logits/rejected": -2.1457412242889404, |
|
"logps/chosen": -302.87255859375, |
|
"logps/rejected": -329.4913024902344, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.343062400817871, |
|
"rewards/margins": 8.397542953491211, |
|
"rewards/rejected": -9.74060344696045, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_logits/chosen": -2.4943490028381348, |
|
"eval_logits/rejected": -2.194486379623413, |
|
"eval_logps/chosen": -332.6160583496094, |
|
"eval_logps/rejected": -314.3266296386719, |
|
"eval_loss": 0.4745297133922577, |
|
"eval_rewards/accuracies": 0.7976190447807312, |
|
"eval_rewards/chosen": -3.3599419593811035, |
|
"eval_rewards/margins": 3.805250883102417, |
|
"eval_rewards/rejected": -7.1651930809021, |
|
"eval_runtime": 131.3732, |
|
"eval_samples_per_second": 15.224, |
|
"eval_steps_per_second": 0.48, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.211538461538461e-08, |
|
"logits/chosen": -2.34041690826416, |
|
"logits/rejected": -2.142360210418701, |
|
"logps/chosen": -270.6195373535156, |
|
"logps/rejected": -337.4690856933594, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4843101501464844, |
|
"rewards/margins": 8.575994491577148, |
|
"rewards/rejected": -10.060304641723633, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.01923076923077e-08, |
|
"logits/chosen": -2.4574790000915527, |
|
"logits/rejected": -2.0679962635040283, |
|
"logps/chosen": -332.03607177734375, |
|
"logps/rejected": -336.3428955078125, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.087881565093994, |
|
"rewards/margins": 8.020088195800781, |
|
"rewards/rejected": -10.107969284057617, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.826923076923077e-08, |
|
"logits/chosen": -2.3383586406707764, |
|
"logits/rejected": -2.243995189666748, |
|
"logps/chosen": -283.6031188964844, |
|
"logps/rejected": -319.84686279296875, |
|
"loss": 0.0086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3011380434036255, |
|
"rewards/margins": 8.05530834197998, |
|
"rewards/rejected": -9.3564453125, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.634615384615384e-08, |
|
"logits/chosen": -2.3765769004821777, |
|
"logits/rejected": -2.2421634197235107, |
|
"logps/chosen": -298.99407958984375, |
|
"logps/rejected": -322.83148193359375, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1525708436965942, |
|
"rewards/margins": 8.4398832321167, |
|
"rewards/rejected": -9.592453956604004, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.442307692307692e-08, |
|
"logits/chosen": -2.4655745029449463, |
|
"logits/rejected": -2.1042861938476562, |
|
"logps/chosen": -335.84503173828125, |
|
"logps/rejected": -348.6551818847656, |
|
"loss": 0.0122, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3728059530258179, |
|
"rewards/margins": 8.628170013427734, |
|
"rewards/rejected": -10.000975608825684, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.25e-08, |
|
"logits/chosen": -2.4270904064178467, |
|
"logits/rejected": -2.292043447494507, |
|
"logps/chosen": -336.81292724609375, |
|
"logps/rejected": -339.5079345703125, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4841727018356323, |
|
"rewards/margins": 8.56916332244873, |
|
"rewards/rejected": -10.053335189819336, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.057692307692306e-08, |
|
"logits/chosen": -2.425565004348755, |
|
"logits/rejected": -2.2447338104248047, |
|
"logps/chosen": -312.3524475097656, |
|
"logps/rejected": -347.9653015136719, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.970663070678711, |
|
"rewards/margins": 9.28294849395752, |
|
"rewards/rejected": -11.25361156463623, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.865384615384615e-08, |
|
"logits/chosen": -2.592557191848755, |
|
"logits/rejected": -2.1757986545562744, |
|
"logps/chosen": -331.1345520019531, |
|
"logps/rejected": -357.92486572265625, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4531742334365845, |
|
"rewards/margins": 8.858552932739258, |
|
"rewards/rejected": -10.311727523803711, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.673076923076923e-08, |
|
"logits/chosen": -2.4398257732391357, |
|
"logits/rejected": -2.263611316680908, |
|
"logps/chosen": -326.148193359375, |
|
"logps/rejected": -340.4607238769531, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3447742462158203, |
|
"rewards/margins": 8.881135940551758, |
|
"rewards/rejected": -10.225909233093262, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.480769230769231e-08, |
|
"logits/chosen": -2.3727784156799316, |
|
"logits/rejected": -2.0641674995422363, |
|
"logps/chosen": -314.0769348144531, |
|
"logps/rejected": -375.218017578125, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9205458164215088, |
|
"rewards/margins": 8.602018356323242, |
|
"rewards/rejected": -10.522564888000488, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_logits/chosen": -2.4880807399749756, |
|
"eval_logits/rejected": -2.188831090927124, |
|
"eval_logps/chosen": -333.9322204589844, |
|
"eval_logps/rejected": -316.0655517578125, |
|
"eval_loss": 0.48117944598197937, |
|
"eval_rewards/accuracies": 0.7976190447807312, |
|
"eval_rewards/chosen": -3.4915573596954346, |
|
"eval_rewards/margins": 3.8475303649902344, |
|
"eval_rewards/rejected": -7.339086532592773, |
|
"eval_runtime": 130.8113, |
|
"eval_samples_per_second": 15.289, |
|
"eval_steps_per_second": 0.482, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.288461538461537e-08, |
|
"logits/chosen": -2.321307897567749, |
|
"logits/rejected": -2.3813178539276123, |
|
"logps/chosen": -296.61102294921875, |
|
"logps/rejected": -338.35260009765625, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7034938335418701, |
|
"rewards/margins": 8.526666641235352, |
|
"rewards/rejected": -10.2301607131958, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.096153846153846e-08, |
|
"logits/chosen": -2.401418685913086, |
|
"logits/rejected": -1.986132264137268, |
|
"logps/chosen": -320.1055603027344, |
|
"logps/rejected": -354.8368225097656, |
|
"loss": 0.0155, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.6911952495574951, |
|
"rewards/margins": 8.266457557678223, |
|
"rewards/rejected": -9.957651138305664, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.903846153846154e-08, |
|
"logits/chosen": -2.4205477237701416, |
|
"logits/rejected": -2.201981782913208, |
|
"logps/chosen": -280.1654052734375, |
|
"logps/rejected": -321.9850158691406, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0640054941177368, |
|
"rewards/margins": 8.131551742553711, |
|
"rewards/rejected": -9.195556640625, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.711538461538461e-08, |
|
"logits/chosen": -2.4457507133483887, |
|
"logits/rejected": -2.2051494121551514, |
|
"logps/chosen": -301.8743591308594, |
|
"logps/rejected": -312.6191711425781, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5744168758392334, |
|
"rewards/margins": 7.840787410736084, |
|
"rewards/rejected": -9.415205001831055, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.519230769230768e-08, |
|
"logits/chosen": -2.3616795539855957, |
|
"logits/rejected": -2.280289888381958, |
|
"logps/chosen": -294.35308837890625, |
|
"logps/rejected": -342.05218505859375, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.15314781665802, |
|
"rewards/margins": 8.7235107421875, |
|
"rewards/rejected": -9.87665843963623, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.326923076923077e-08, |
|
"logits/chosen": -2.3353302478790283, |
|
"logits/rejected": -2.244074821472168, |
|
"logps/chosen": -258.412353515625, |
|
"logps/rejected": -297.08795166015625, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2376277446746826, |
|
"rewards/margins": 8.651219367980957, |
|
"rewards/rejected": -9.888847351074219, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.134615384615384e-08, |
|
"logits/chosen": -2.478097438812256, |
|
"logits/rejected": -2.2386653423309326, |
|
"logps/chosen": -348.7337341308594, |
|
"logps/rejected": -403.3638610839844, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3783025741577148, |
|
"rewards/margins": 9.295442581176758, |
|
"rewards/rejected": -10.673746109008789, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.9423076923076925e-08, |
|
"logits/chosen": -2.4538445472717285, |
|
"logits/rejected": -2.183901071548462, |
|
"logps/chosen": -297.26519775390625, |
|
"logps/rejected": -336.0912170410156, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6643871068954468, |
|
"rewards/margins": 8.495319366455078, |
|
"rewards/rejected": -10.159708023071289, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.75e-08, |
|
"logits/chosen": -2.4051222801208496, |
|
"logits/rejected": -2.143523931503296, |
|
"logps/chosen": -306.6567077636719, |
|
"logps/rejected": -343.2184143066406, |
|
"loss": 0.0575, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2398414611816406, |
|
"rewards/margins": 9.57859992980957, |
|
"rewards/rejected": -10.818441390991211, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.557692307692308e-08, |
|
"logits/chosen": -2.491457462310791, |
|
"logits/rejected": -2.118168354034424, |
|
"logps/chosen": -327.12127685546875, |
|
"logps/rejected": -316.92608642578125, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2390878200531006, |
|
"rewards/margins": 8.092336654663086, |
|
"rewards/rejected": -9.331425666809082, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_logits/chosen": -2.455951690673828, |
|
"eval_logits/rejected": -2.164350748062134, |
|
"eval_logps/chosen": -331.2425231933594, |
|
"eval_logps/rejected": -312.561279296875, |
|
"eval_loss": 0.4678390920162201, |
|
"eval_rewards/accuracies": 0.7976190447807312, |
|
"eval_rewards/chosen": -3.2225840091705322, |
|
"eval_rewards/margins": 3.7660768032073975, |
|
"eval_rewards/rejected": -6.9886603355407715, |
|
"eval_runtime": 130.8661, |
|
"eval_samples_per_second": 15.283, |
|
"eval_steps_per_second": 0.481, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.365384615384615e-08, |
|
"logits/chosen": -2.4199442863464355, |
|
"logits/rejected": -2.348403215408325, |
|
"logps/chosen": -288.18865966796875, |
|
"logps/rejected": -321.6029968261719, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4005610942840576, |
|
"rewards/margins": 8.314815521240234, |
|
"rewards/rejected": -9.715375900268555, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.1730769230769234e-08, |
|
"logits/chosen": -2.3591227531433105, |
|
"logits/rejected": -2.2464001178741455, |
|
"logps/chosen": -338.2710876464844, |
|
"logps/rejected": -352.8677673339844, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3274883031845093, |
|
"rewards/margins": 8.12449836730957, |
|
"rewards/rejected": -9.451985359191895, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.980769230769231e-08, |
|
"logits/chosen": -2.3736279010772705, |
|
"logits/rejected": -2.245208978652954, |
|
"logps/chosen": -286.21038818359375, |
|
"logps/rejected": -339.1667175292969, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6096528768539429, |
|
"rewards/margins": 8.592547416687012, |
|
"rewards/rejected": -9.202199935913086, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.788461538461538e-08, |
|
"logits/chosen": -2.3171088695526123, |
|
"logits/rejected": -2.2612321376800537, |
|
"logps/chosen": -266.6427001953125, |
|
"logps/rejected": -303.3582458496094, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0699584484100342, |
|
"rewards/margins": 8.39510440826416, |
|
"rewards/rejected": -9.465063095092773, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.596153846153846e-08, |
|
"logits/chosen": -2.322035551071167, |
|
"logits/rejected": -2.1794607639312744, |
|
"logps/chosen": -310.55230712890625, |
|
"logps/rejected": -354.242431640625, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9702370762825012, |
|
"rewards/margins": 8.775087356567383, |
|
"rewards/rejected": -9.745325088500977, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.4038461538461536e-08, |
|
"logits/chosen": -2.3565049171447754, |
|
"logits/rejected": -2.1641526222229004, |
|
"logps/chosen": -284.02880859375, |
|
"logps/rejected": -341.21258544921875, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5019583702087402, |
|
"rewards/margins": 8.348108291625977, |
|
"rewards/rejected": -9.850065231323242, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.211538461538462e-08, |
|
"logits/chosen": -2.457857608795166, |
|
"logits/rejected": -2.1471848487854004, |
|
"logps/chosen": -322.2108459472656, |
|
"logps/rejected": -342.0418395996094, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7656872272491455, |
|
"rewards/margins": 8.020166397094727, |
|
"rewards/rejected": -9.785853385925293, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.019230769230769e-08, |
|
"logits/chosen": -2.314117670059204, |
|
"logits/rejected": -2.1793768405914307, |
|
"logps/chosen": -333.1905822753906, |
|
"logps/rejected": -368.11749267578125, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2259396314620972, |
|
"rewards/margins": 8.503653526306152, |
|
"rewards/rejected": -9.729593276977539, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.826923076923077e-08, |
|
"logits/chosen": -2.349207878112793, |
|
"logits/rejected": -2.1943373680114746, |
|
"logps/chosen": -265.8515625, |
|
"logps/rejected": -290.029541015625, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5206115245819092, |
|
"rewards/margins": 8.009859085083008, |
|
"rewards/rejected": -9.530470848083496, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.6346153846153845e-08, |
|
"logits/chosen": -2.2535085678100586, |
|
"logits/rejected": -2.2148807048797607, |
|
"logps/chosen": -296.49786376953125, |
|
"logps/rejected": -332.1768798828125, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0893945693969727, |
|
"rewards/margins": 8.603468894958496, |
|
"rewards/rejected": -10.692864418029785, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_logits/chosen": -2.4482967853546143, |
|
"eval_logits/rejected": -2.157228708267212, |
|
"eval_logps/chosen": -333.32342529296875, |
|
"eval_logps/rejected": -315.1584167480469, |
|
"eval_loss": 0.4693792760372162, |
|
"eval_rewards/accuracies": 0.7976190447807312, |
|
"eval_rewards/chosen": -3.4306719303131104, |
|
"eval_rewards/margins": 3.8177053928375244, |
|
"eval_rewards/rejected": -7.248377323150635, |
|
"eval_runtime": 130.9189, |
|
"eval_samples_per_second": 15.277, |
|
"eval_steps_per_second": 0.481, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.4423076923076926e-08, |
|
"logits/chosen": -2.188178300857544, |
|
"logits/rejected": -2.098421335220337, |
|
"logps/chosen": -260.811279296875, |
|
"logps/rejected": -316.5066223144531, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3799587488174438, |
|
"rewards/margins": 8.445514678955078, |
|
"rewards/rejected": -9.825471878051758, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.25e-08, |
|
"logits/chosen": -2.4772255420684814, |
|
"logits/rejected": -2.2826056480407715, |
|
"logps/chosen": -332.14886474609375, |
|
"logps/rejected": -351.8216857910156, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2810627222061157, |
|
"rewards/margins": 8.637121200561523, |
|
"rewards/rejected": -9.918184280395508, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.0576923076923074e-08, |
|
"logits/chosen": -2.433171510696411, |
|
"logits/rejected": -2.2717080116271973, |
|
"logps/chosen": -310.1302795410156, |
|
"logps/rejected": -352.16851806640625, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.9453051090240479, |
|
"rewards/margins": 8.546197891235352, |
|
"rewards/rejected": -10.491501808166504, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.8653846153846154e-08, |
|
"logits/chosen": -2.4593279361724854, |
|
"logits/rejected": -2.2902705669403076, |
|
"logps/chosen": -298.08258056640625, |
|
"logps/rejected": -338.76300048828125, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3809144496917725, |
|
"rewards/margins": 8.587820053100586, |
|
"rewards/rejected": -9.968733787536621, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.673076923076923e-08, |
|
"logits/chosen": -2.4502007961273193, |
|
"logits/rejected": -2.133526563644409, |
|
"logps/chosen": -303.839111328125, |
|
"logps/rejected": -360.4519958496094, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9425987005233765, |
|
"rewards/margins": 8.856470108032227, |
|
"rewards/rejected": -9.799068450927734, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.4807692307692306e-08, |
|
"logits/chosen": -2.3859500885009766, |
|
"logits/rejected": -2.0461232662200928, |
|
"logps/chosen": -288.9315490722656, |
|
"logps/rejected": -340.39483642578125, |
|
"loss": 0.0131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.621050238609314, |
|
"rewards/margins": 8.439092636108398, |
|
"rewards/rejected": -10.060144424438477, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.2884615384615383e-08, |
|
"logits/chosen": -2.399702787399292, |
|
"logits/rejected": -2.210979700088501, |
|
"logps/chosen": -303.9853515625, |
|
"logps/rejected": -327.6991271972656, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2347537279129028, |
|
"rewards/margins": 8.255434036254883, |
|
"rewards/rejected": -9.490187644958496, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.096153846153846e-08, |
|
"logits/chosen": -2.4862875938415527, |
|
"logits/rejected": -1.9970000982284546, |
|
"logps/chosen": -348.6851806640625, |
|
"logps/rejected": -346.09869384765625, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.210662841796875, |
|
"rewards/margins": 8.414986610412598, |
|
"rewards/rejected": -9.625649452209473, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.9038461538461537e-08, |
|
"logits/chosen": -2.2640528678894043, |
|
"logits/rejected": -2.353923797607422, |
|
"logps/chosen": -350.0566711425781, |
|
"logps/rejected": -368.6754150390625, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4119244813919067, |
|
"rewards/margins": 8.232645034790039, |
|
"rewards/rejected": -9.644569396972656, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.7115384615384615e-08, |
|
"logits/chosen": -2.500624895095825, |
|
"logits/rejected": -2.284454107284546, |
|
"logps/chosen": -296.8025817871094, |
|
"logps/rejected": -343.0960998535156, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.786973237991333, |
|
"rewards/margins": 8.618185043334961, |
|
"rewards/rejected": -9.405158996582031, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_logits/chosen": -2.449103832244873, |
|
"eval_logits/rejected": -2.1575379371643066, |
|
"eval_logps/chosen": -333.975830078125, |
|
"eval_logps/rejected": -315.9576416015625, |
|
"eval_loss": 0.47073113918304443, |
|
"eval_rewards/accuracies": 0.8055555820465088, |
|
"eval_rewards/chosen": -3.4959194660186768, |
|
"eval_rewards/margins": 3.832378625869751, |
|
"eval_rewards/rejected": -7.3282976150512695, |
|
"eval_runtime": 131.1669, |
|
"eval_samples_per_second": 15.248, |
|
"eval_steps_per_second": 0.48, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.5192307692307692e-08, |
|
"logits/chosen": -2.2975189685821533, |
|
"logits/rejected": -2.093742847442627, |
|
"logps/chosen": -296.9509582519531, |
|
"logps/rejected": -362.02008056640625, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4728249311447144, |
|
"rewards/margins": 9.322419166564941, |
|
"rewards/rejected": -10.795244216918945, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.326923076923077e-08, |
|
"logits/chosen": -2.4062585830688477, |
|
"logits/rejected": -2.2824714183807373, |
|
"logps/chosen": -352.0885314941406, |
|
"logps/rejected": -359.2705383300781, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8912947177886963, |
|
"rewards/margins": 8.833779335021973, |
|
"rewards/rejected": -10.725072860717773, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.1346153846153845e-08, |
|
"logits/chosen": -2.436295509338379, |
|
"logits/rejected": -2.212411880493164, |
|
"logps/chosen": -306.6794738769531, |
|
"logps/rejected": -373.9212341308594, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6317508220672607, |
|
"rewards/margins": 9.109142303466797, |
|
"rewards/rejected": -10.74089241027832, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 9.423076923076922e-09, |
|
"logits/chosen": -2.5206334590911865, |
|
"logits/rejected": -2.230746030807495, |
|
"logps/chosen": -374.86334228515625, |
|
"logps/rejected": -365.6614685058594, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1036624908447266, |
|
"rewards/margins": 9.229228019714355, |
|
"rewards/rejected": -10.332891464233398, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.5e-09, |
|
"logits/chosen": -2.4916155338287354, |
|
"logits/rejected": -2.3132541179656982, |
|
"logps/chosen": -284.1214599609375, |
|
"logps/rejected": -332.753662109375, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7909221649169922, |
|
"rewards/margins": 7.715114593505859, |
|
"rewards/rejected": -9.506036758422852, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.576923076923077e-09, |
|
"logits/chosen": -2.560082197189331, |
|
"logits/rejected": -2.2312729358673096, |
|
"logps/chosen": -309.1767272949219, |
|
"logps/rejected": -349.9689025878906, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8000606298446655, |
|
"rewards/margins": 8.335681915283203, |
|
"rewards/rejected": -10.135743141174316, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.6538461538461534e-09, |
|
"logits/chosen": -2.233848810195923, |
|
"logits/rejected": -2.199557304382324, |
|
"logps/chosen": -336.6528015136719, |
|
"logps/rejected": -391.2978820800781, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6596148014068604, |
|
"rewards/margins": 8.436461448669434, |
|
"rewards/rejected": -10.096076011657715, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.7307692307692307e-09, |
|
"logits/chosen": -2.3136038780212402, |
|
"logits/rejected": -2.09283447265625, |
|
"logps/chosen": -289.9815368652344, |
|
"logps/rejected": -315.29498291015625, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.966369867324829, |
|
"rewards/margins": 7.501823425292969, |
|
"rewards/rejected": -9.468193054199219, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2889, |
|
"total_flos": 0.0, |
|
"train_loss": 0.16336456995732068, |
|
"train_runtime": 43274.0653, |
|
"train_samples_per_second": 4.272, |
|
"train_steps_per_second": 0.067 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2889, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|