|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984301412872841, |
|
"eval_steps": 100, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": 0.01849743165075779, |
|
"logits/rejected": 0.013860300183296204, |
|
"logps/chosen": -318.92303466796875, |
|
"logps/rejected": -327.4117126464844, |
|
"loss": 0.0872, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": 0.0165844839066267, |
|
"logits/rejected": 0.029045505449175835, |
|
"logps/chosen": -380.119384765625, |
|
"logps/rejected": -372.70452880859375, |
|
"loss": 0.0916, |
|
"rewards/accuracies": 0.4930555522441864, |
|
"rewards/chosen": 0.00031676876824349165, |
|
"rewards/margins": 0.0008045767317526042, |
|
"rewards/rejected": -0.00048780813813209534, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -0.01443287543952465, |
|
"logits/rejected": 0.01765434443950653, |
|
"logps/chosen": -396.4976501464844, |
|
"logps/rejected": -366.0671691894531, |
|
"loss": 0.0929, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.000257034320384264, |
|
"rewards/margins": 0.0013006285298615694, |
|
"rewards/rejected": -0.0010435942094773054, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": 0.037671297788619995, |
|
"logits/rejected": 0.06698160618543625, |
|
"logps/chosen": -374.0677795410156, |
|
"logps/rejected": -360.3742370605469, |
|
"loss": 0.0849, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.0024321433156728745, |
|
"rewards/margins": 0.003862987505272031, |
|
"rewards/rejected": -0.006295130588114262, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -0.016021814197301865, |
|
"logits/rejected": 0.040130265057086945, |
|
"logps/chosen": -384.62115478515625, |
|
"logps/rejected": -369.37591552734375, |
|
"loss": 0.0899, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.005014514084905386, |
|
"rewards/margins": 0.00654798885807395, |
|
"rewards/rejected": -0.01156250387430191, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999731868769026e-07, |
|
"logits/chosen": 0.021576542407274246, |
|
"logits/rejected": 0.04092331975698471, |
|
"logps/chosen": -395.0044860839844, |
|
"logps/rejected": -385.6026306152344, |
|
"loss": 0.0905, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.011928597465157509, |
|
"rewards/margins": 0.01728428527712822, |
|
"rewards/rejected": -0.02921288087964058, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": 0.09396852552890778, |
|
"logits/rejected": 0.177364319562912, |
|
"logps/chosen": -373.46978759765625, |
|
"logps/rejected": -350.2561950683594, |
|
"loss": 0.0896, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.022122707217931747, |
|
"rewards/margins": 0.04510267823934555, |
|
"rewards/rejected": -0.067225381731987, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967625656594781e-07, |
|
"logits/chosen": 0.09231746941804886, |
|
"logits/rejected": 0.10504136979579926, |
|
"logps/chosen": -380.4566955566406, |
|
"logps/rejected": -384.76495361328125, |
|
"loss": 0.0895, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.020214151591062546, |
|
"rewards/margins": 0.044125162065029144, |
|
"rewards/rejected": -0.06433931738138199, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.93167072587771e-07, |
|
"logits/chosen": 0.1812177449464798, |
|
"logits/rejected": 0.2344866693019867, |
|
"logps/chosen": -373.54779052734375, |
|
"logps/rejected": -344.9815673828125, |
|
"loss": 0.0887, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.003499386366456747, |
|
"rewards/margins": 0.11121924966573715, |
|
"rewards/rejected": -0.11471863090991974, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": 0.23078179359436035, |
|
"logits/rejected": 0.3160688281059265, |
|
"logps/chosen": -398.22735595703125, |
|
"logps/rejected": -354.7359619140625, |
|
"loss": 0.0854, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.03982505947351456, |
|
"rewards/margins": 0.12135788053274155, |
|
"rewards/rejected": -0.1611829400062561, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.820919832540181e-07, |
|
"logits/chosen": 0.33522385358810425, |
|
"logits/rejected": 0.34693339467048645, |
|
"logps/chosen": -373.6068115234375, |
|
"logps/rejected": -393.63311767578125, |
|
"loss": 0.09, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.05229802802205086, |
|
"rewards/margins": 0.1304590255022049, |
|
"rewards/rejected": -0.18275703489780426, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": 0.49261584877967834, |
|
"eval_logits/rejected": 0.5302599668502808, |
|
"eval_logps/chosen": -392.5748291015625, |
|
"eval_logps/rejected": -418.8423767089844, |
|
"eval_loss": 0.08443526923656464, |
|
"eval_rewards/accuracies": 0.69921875, |
|
"eval_rewards/chosen": -0.09445539116859436, |
|
"eval_rewards/margins": 0.20123936235904694, |
|
"eval_rewards/rejected": -0.2956947684288025, |
|
"eval_runtime": 75.5045, |
|
"eval_samples_per_second": 26.488, |
|
"eval_steps_per_second": 0.424, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7467175306295647e-07, |
|
"logits/chosen": 0.5233359336853027, |
|
"logits/rejected": 0.5924205780029297, |
|
"logps/chosen": -409.8135681152344, |
|
"logps/rejected": -400.6418151855469, |
|
"loss": 0.0775, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.17791931331157684, |
|
"rewards/margins": 0.2254853993654251, |
|
"rewards/rejected": -0.40340471267700195, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": 0.6610409021377563, |
|
"logits/rejected": 0.8009072542190552, |
|
"logps/chosen": -459.3719787597656, |
|
"logps/rejected": -480.128662109375, |
|
"loss": 0.0697, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.2562519609928131, |
|
"rewards/margins": 0.2973101735115051, |
|
"rewards/rejected": -0.5535621643066406, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5626458262912735e-07, |
|
"logits/chosen": 0.8142817616462708, |
|
"logits/rejected": 1.0136159658432007, |
|
"logps/chosen": -453.57037353515625, |
|
"logps/rejected": -438.6094665527344, |
|
"loss": 0.0557, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.45035696029663086, |
|
"rewards/margins": 0.2075636386871338, |
|
"rewards/rejected": -0.6579206585884094, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.453763107901675e-07, |
|
"logits/chosen": 0.9267638325691223, |
|
"logits/rejected": 0.9543718099594116, |
|
"logps/chosen": -426.4134826660156, |
|
"logps/rejected": -436.49261474609375, |
|
"loss": 0.06, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.33891427516937256, |
|
"rewards/margins": 0.302972674369812, |
|
"rewards/rejected": -0.6418868899345398, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": 0.834929347038269, |
|
"logits/rejected": 1.0096248388290405, |
|
"logps/chosen": -383.9637756347656, |
|
"logps/rejected": -392.84912109375, |
|
"loss": 0.0588, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2390960454940796, |
|
"rewards/margins": 0.35297515988349915, |
|
"rewards/rejected": -0.5920711755752563, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2052190435769554e-07, |
|
"logits/chosen": 1.0894076824188232, |
|
"logits/rejected": 1.2157137393951416, |
|
"logps/chosen": -429.09857177734375, |
|
"logps/rejected": -461.9745178222656, |
|
"loss": 0.0509, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5552287101745605, |
|
"rewards/margins": 0.3786623775959015, |
|
"rewards/rejected": -0.9338911175727844, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0668899744407567e-07, |
|
"logits/chosen": 0.9078506231307983, |
|
"logits/rejected": 1.0372017621994019, |
|
"logps/chosen": -482.3373107910156, |
|
"logps/rejected": -479.88916015625, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.505352795124054, |
|
"rewards/margins": 0.26132458448410034, |
|
"rewards/rejected": -0.7666773796081543, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": 0.80833500623703, |
|
"logits/rejected": 0.8488121032714844, |
|
"logps/chosen": -413.3409118652344, |
|
"logps/rejected": -438.3705139160156, |
|
"loss": 0.0476, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4049296975135803, |
|
"rewards/margins": 0.3900560736656189, |
|
"rewards/rejected": -0.7949857115745544, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.765821230985757e-07, |
|
"logits/chosen": 0.9091412425041199, |
|
"logits/rejected": 1.0051593780517578, |
|
"logps/chosen": -395.74383544921875, |
|
"logps/rejected": -402.8367919921875, |
|
"loss": 0.0478, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.4961649775505066, |
|
"rewards/margins": 0.3637959361076355, |
|
"rewards/rejected": -0.8599609136581421, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.604695382782159e-07, |
|
"logits/chosen": 1.0421111583709717, |
|
"logits/rejected": 1.1686071157455444, |
|
"logps/chosen": -422.24224853515625, |
|
"logps/rejected": -469.1251525878906, |
|
"loss": 0.0405, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7005800008773804, |
|
"rewards/margins": 0.46449971199035645, |
|
"rewards/rejected": -1.1650797128677368, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": 1.1859312057495117, |
|
"eval_logits/rejected": 1.2733540534973145, |
|
"eval_logps/chosen": -449.3788757324219, |
|
"eval_logps/rejected": -505.84661865234375, |
|
"eval_loss": 0.045209601521492004, |
|
"eval_rewards/accuracies": 0.75390625, |
|
"eval_rewards/chosen": -0.6624964475631714, |
|
"eval_rewards/margins": 0.5032405257225037, |
|
"eval_rewards/rejected": -1.1657369136810303, |
|
"eval_runtime": 75.0855, |
|
"eval_samples_per_second": 26.636, |
|
"eval_steps_per_second": 0.426, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": 0.9289053082466125, |
|
"logits/rejected": 1.0322377681732178, |
|
"logps/chosen": -454.09521484375, |
|
"logps/rejected": -484.48956298828125, |
|
"loss": 0.0428, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5330354571342468, |
|
"rewards/margins": 0.47441625595092773, |
|
"rewards/rejected": -1.0074517726898193, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.265574537815398e-07, |
|
"logits/chosen": 0.6325788497924805, |
|
"logits/rejected": 0.8454742431640625, |
|
"logps/chosen": -443.6888732910156, |
|
"logps/rejected": -444.2510681152344, |
|
"loss": 0.051, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4299241006374359, |
|
"rewards/margins": 0.41193485260009766, |
|
"rewards/rejected": -0.8418590426445007, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0893973387735683e-07, |
|
"logits/chosen": 0.8997888565063477, |
|
"logits/rejected": 0.9853512048721313, |
|
"logps/chosen": -413.89520263671875, |
|
"logps/rejected": -458.99676513671875, |
|
"loss": 0.0525, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.5308324694633484, |
|
"rewards/margins": 0.4597201943397522, |
|
"rewards/rejected": -0.9905527830123901, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": 1.0547417402267456, |
|
"logits/rejected": 1.1306800842285156, |
|
"logps/chosen": -493.91790771484375, |
|
"logps/rejected": -539.1799926757812, |
|
"loss": 0.0471, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6113244295120239, |
|
"rewards/margins": 0.5182110667228699, |
|
"rewards/rejected": -1.1295355558395386, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7285261601056697e-07, |
|
"logits/chosen": 1.2281643152236938, |
|
"logits/rejected": 1.359076976776123, |
|
"logps/chosen": -466.77001953125, |
|
"logps/rejected": -483.91259765625, |
|
"loss": 0.0419, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7258759140968323, |
|
"rewards/margins": 0.42711353302001953, |
|
"rewards/rejected": -1.152989387512207, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5457665670441937e-07, |
|
"logits/chosen": 1.2255347967147827, |
|
"logits/rejected": 1.462003469467163, |
|
"logps/chosen": -491.76190185546875, |
|
"logps/rejected": -505.47161865234375, |
|
"loss": 0.0451, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7059242725372314, |
|
"rewards/margins": 0.6359472274780273, |
|
"rewards/rejected": -1.3418715000152588, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": 1.3674428462982178, |
|
"logits/rejected": 1.578064203262329, |
|
"logps/chosen": -486.397216796875, |
|
"logps/rejected": -492.1827087402344, |
|
"loss": 0.0472, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7773429155349731, |
|
"rewards/margins": 0.38945746421813965, |
|
"rewards/rejected": -1.1668003797531128, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1804923757009882e-07, |
|
"logits/chosen": 1.366081953048706, |
|
"logits/rejected": 1.5207383632659912, |
|
"logps/chosen": -477.0743103027344, |
|
"logps/rejected": -530.8953857421875, |
|
"loss": 0.0445, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8941423296928406, |
|
"rewards/margins": 0.4790104925632477, |
|
"rewards/rejected": -1.3731528520584106, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9999357655598891e-07, |
|
"logits/chosen": 1.2689809799194336, |
|
"logits/rejected": 1.4011085033416748, |
|
"logps/chosen": -438.982421875, |
|
"logps/rejected": -469.45703125, |
|
"loss": 0.0464, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.62468022108078, |
|
"rewards/margins": 0.513271689414978, |
|
"rewards/rejected": -1.1379519701004028, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": 1.1505718231201172, |
|
"logits/rejected": 1.4240622520446777, |
|
"logps/chosen": -458.03631591796875, |
|
"logps/rejected": -443.11712646484375, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6710134148597717, |
|
"rewards/margins": 0.39567166566848755, |
|
"rewards/rejected": -1.0666849613189697, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 1.2982094287872314, |
|
"eval_logits/rejected": 1.409311056137085, |
|
"eval_logps/chosen": -435.2132568359375, |
|
"eval_logps/rejected": -501.30841064453125, |
|
"eval_loss": 0.047696553170681, |
|
"eval_rewards/accuracies": 0.73828125, |
|
"eval_rewards/chosen": -0.5208398699760437, |
|
"eval_rewards/margins": 0.5995149612426758, |
|
"eval_rewards/rejected": -1.1203548908233643, |
|
"eval_runtime": 75.296, |
|
"eval_samples_per_second": 26.562, |
|
"eval_steps_per_second": 0.425, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.647817538357072e-07, |
|
"logits/chosen": 1.2780801057815552, |
|
"logits/rejected": 1.3399560451507568, |
|
"logps/chosen": -475.42413330078125, |
|
"logps/rejected": -517.4520263671875, |
|
"loss": 0.0478, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7529923319816589, |
|
"rewards/margins": 0.4368392825126648, |
|
"rewards/rejected": -1.1898316144943237, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.478143389201113e-07, |
|
"logits/chosen": 1.198677897453308, |
|
"logits/rejected": 1.4085700511932373, |
|
"logps/chosen": -498.35711669921875, |
|
"logps/rejected": -497.4380798339844, |
|
"loss": 0.0424, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6401562690734863, |
|
"rewards/margins": 0.48012202978134155, |
|
"rewards/rejected": -1.1202783584594727, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": 1.2183105945587158, |
|
"logits/rejected": 1.2747819423675537, |
|
"logps/chosen": -442.5284118652344, |
|
"logps/rejected": -533.216796875, |
|
"loss": 0.0454, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5910875201225281, |
|
"rewards/margins": 0.5799761414527893, |
|
"rewards/rejected": -1.1710636615753174, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1561076868822755e-07, |
|
"logits/chosen": 1.203604817390442, |
|
"logits/rejected": 1.1832085847854614, |
|
"logps/chosen": -441.4521484375, |
|
"logps/rejected": -512.8982543945312, |
|
"loss": 0.0428, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7571262121200562, |
|
"rewards/margins": 0.4640630781650543, |
|
"rewards/rejected": -1.221189260482788, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0054723495346482e-07, |
|
"logits/chosen": 1.3052194118499756, |
|
"logits/rejected": 1.382683515548706, |
|
"logps/chosen": -465.3661193847656, |
|
"logps/rejected": -528.7847290039062, |
|
"loss": 0.0412, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7757940292358398, |
|
"rewards/margins": 0.4744884967803955, |
|
"rewards/rejected": -1.2502825260162354, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": 1.1976938247680664, |
|
"logits/rejected": 1.432969331741333, |
|
"logps/chosen": -491.15771484375, |
|
"logps/rejected": -515.0520629882812, |
|
"loss": 0.0446, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6491819620132446, |
|
"rewards/margins": 0.6244359612464905, |
|
"rewards/rejected": -1.2736178636550903, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.289996455765748e-08, |
|
"logits/chosen": 1.192779541015625, |
|
"logits/rejected": 1.324210524559021, |
|
"logps/chosen": -504.5486755371094, |
|
"logps/rejected": -508.7030334472656, |
|
"loss": 0.0435, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7155844569206238, |
|
"rewards/margins": 0.5151349306106567, |
|
"rewards/rejected": -1.2307194471359253, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.046442623320145e-08, |
|
"logits/chosen": 1.223356008529663, |
|
"logits/rejected": 1.4434764385223389, |
|
"logps/chosen": -474.7169494628906, |
|
"logps/rejected": -518.0782470703125, |
|
"loss": 0.0476, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6729675531387329, |
|
"rewards/margins": 0.6387326717376709, |
|
"rewards/rejected": -1.3117002248764038, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": 1.3060978651046753, |
|
"logits/rejected": 1.4896109104156494, |
|
"logps/chosen": -470.46661376953125, |
|
"logps/rejected": -502.4981384277344, |
|
"loss": 0.0482, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.6783354878425598, |
|
"rewards/margins": 0.5045996904373169, |
|
"rewards/rejected": -1.1829349994659424, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.8702478614051345e-08, |
|
"logits/chosen": 1.3413165807724, |
|
"logits/rejected": 1.4800562858581543, |
|
"logps/chosen": -450.84844970703125, |
|
"logps/rejected": -509.7266540527344, |
|
"loss": 0.0457, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7674819231033325, |
|
"rewards/margins": 0.4173991084098816, |
|
"rewards/rejected": -1.1848809719085693, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 1.491492748260498, |
|
"eval_logits/rejected": 1.6154029369354248, |
|
"eval_logps/chosen": -448.419677734375, |
|
"eval_logps/rejected": -518.3443603515625, |
|
"eval_loss": 0.044891636818647385, |
|
"eval_rewards/accuracies": 0.73828125, |
|
"eval_rewards/chosen": -0.6529037952423096, |
|
"eval_rewards/margins": 0.6378109455108643, |
|
"eval_rewards/rejected": -1.2907147407531738, |
|
"eval_runtime": 74.6873, |
|
"eval_samples_per_second": 26.778, |
|
"eval_steps_per_second": 0.428, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9492720416985e-08, |
|
"logits/chosen": 1.3658090829849243, |
|
"logits/rejected": 1.523946762084961, |
|
"logps/chosen": -461.0426330566406, |
|
"logps/rejected": -491.6429138183594, |
|
"loss": 0.045, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6857269406318665, |
|
"rewards/margins": 0.5137700438499451, |
|
"rewards/rejected": -1.1994969844818115, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": 1.3796783685684204, |
|
"logits/rejected": 1.5178402662277222, |
|
"logps/chosen": -454.60455322265625, |
|
"logps/rejected": -483.65704345703125, |
|
"loss": 0.0418, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.6705530285835266, |
|
"rewards/margins": 0.604373574256897, |
|
"rewards/rejected": -1.2749265432357788, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4662207078575684e-08, |
|
"logits/chosen": 1.334680199623108, |
|
"logits/rejected": 1.4741976261138916, |
|
"logps/chosen": -504.280029296875, |
|
"logps/rejected": -529.8871459960938, |
|
"loss": 0.0453, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7305961847305298, |
|
"rewards/margins": 0.5881385207176208, |
|
"rewards/rejected": -1.3187347650527954, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.12094829893642e-09, |
|
"logits/chosen": 1.3827157020568848, |
|
"logits/rejected": 1.5478546619415283, |
|
"logps/chosen": -453.01171875, |
|
"logps/rejected": -480.3030700683594, |
|
"loss": 0.0414, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7755357027053833, |
|
"rewards/margins": 0.5378071069717407, |
|
"rewards/rejected": -1.313342809677124, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": 1.2280631065368652, |
|
"logits/rejected": 1.454526662826538, |
|
"logps/chosen": -487.4305114746094, |
|
"logps/rejected": -500.71087646484375, |
|
"loss": 0.0425, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6377500295639038, |
|
"rewards/margins": 0.5590785145759583, |
|
"rewards/rejected": -1.1968284845352173, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.9347820230782295e-09, |
|
"logits/chosen": 1.336721658706665, |
|
"logits/rejected": 1.4986612796783447, |
|
"logps/chosen": -455.5997619628906, |
|
"logps/rejected": -474.46038818359375, |
|
"loss": 0.0425, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7689257264137268, |
|
"rewards/margins": 0.4747004508972168, |
|
"rewards/rejected": -1.243626356124878, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.2839470889836627e-10, |
|
"logits/chosen": 1.2109500169754028, |
|
"logits/rejected": 1.3351854085922241, |
|
"logps/chosen": -490.6439514160156, |
|
"logps/rejected": -541.4273681640625, |
|
"loss": 0.0421, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7248164415359497, |
|
"rewards/margins": 0.548802375793457, |
|
"rewards/rejected": -1.2736186981201172, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0564979040210352, |
|
"train_runtime": 4410.0999, |
|
"train_samples_per_second": 13.862, |
|
"train_steps_per_second": 0.108 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 477, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|