zephyr-7b-dpo-full / trainer_state.json
wzhouad's picture
Model save
df30c76 verified
raw history blame
No virus
25.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984301412872841,
"eval_steps": 100,
"global_step": 477,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.0416666666666666e-08,
"logits/chosen": 0.01849743165075779,
"logits/rejected": 0.013860300183296204,
"logps/chosen": -318.92303466796875,
"logps/rejected": -327.4117126464844,
"loss": 0.0872,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.02,
"learning_rate": 1.0416666666666667e-07,
"logits/chosen": 0.0165844839066267,
"logits/rejected": 0.029045505449175835,
"logps/chosen": -380.119384765625,
"logps/rejected": -372.70452880859375,
"loss": 0.0916,
"rewards/accuracies": 0.4930555522441864,
"rewards/chosen": 0.00031676876824349165,
"rewards/margins": 0.0008045767317526042,
"rewards/rejected": -0.00048780813813209534,
"step": 10
},
{
"epoch": 0.04,
"learning_rate": 2.0833333333333333e-07,
"logits/chosen": -0.01443287543952465,
"logits/rejected": 0.01765434443950653,
"logps/chosen": -396.4976501464844,
"logps/rejected": -366.0671691894531,
"loss": 0.0929,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.000257034320384264,
"rewards/margins": 0.0013006285298615694,
"rewards/rejected": -0.0010435942094773054,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": 0.037671297788619995,
"logits/rejected": 0.06698160618543625,
"logps/chosen": -374.0677795410156,
"logps/rejected": -360.3742370605469,
"loss": 0.0849,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.0024321433156728745,
"rewards/margins": 0.003862987505272031,
"rewards/rejected": -0.006295130588114262,
"step": 30
},
{
"epoch": 0.08,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -0.016021814197301865,
"logits/rejected": 0.040130265057086945,
"logps/chosen": -384.62115478515625,
"logps/rejected": -369.37591552734375,
"loss": 0.0899,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.005014514084905386,
"rewards/margins": 0.00654798885807395,
"rewards/rejected": -0.01156250387430191,
"step": 40
},
{
"epoch": 0.1,
"learning_rate": 4.999731868769026e-07,
"logits/chosen": 0.021576542407274246,
"logits/rejected": 0.04092331975698471,
"logps/chosen": -395.0044860839844,
"logps/rejected": -385.6026306152344,
"loss": 0.0905,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.011928597465157509,
"rewards/margins": 0.01728428527712822,
"rewards/rejected": -0.02921288087964058,
"step": 50
},
{
"epoch": 0.13,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": 0.09396852552890778,
"logits/rejected": 0.177364319562912,
"logps/chosen": -373.46978759765625,
"logps/rejected": -350.2561950683594,
"loss": 0.0896,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.022122707217931747,
"rewards/margins": 0.04510267823934555,
"rewards/rejected": -0.067225381731987,
"step": 60
},
{
"epoch": 0.15,
"learning_rate": 4.967625656594781e-07,
"logits/chosen": 0.09231746941804886,
"logits/rejected": 0.10504136979579926,
"logps/chosen": -380.4566955566406,
"logps/rejected": -384.76495361328125,
"loss": 0.0895,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.020214151591062546,
"rewards/margins": 0.044125162065029144,
"rewards/rejected": -0.06433931738138199,
"step": 70
},
{
"epoch": 0.17,
"learning_rate": 4.93167072587771e-07,
"logits/chosen": 0.1812177449464798,
"logits/rejected": 0.2344866693019867,
"logps/chosen": -373.54779052734375,
"logps/rejected": -344.9815673828125,
"loss": 0.0887,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.003499386366456747,
"rewards/margins": 0.11121924966573715,
"rewards/rejected": -0.11471863090991974,
"step": 80
},
{
"epoch": 0.19,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": 0.23078179359436035,
"logits/rejected": 0.3160688281059265,
"logps/chosen": -398.22735595703125,
"logps/rejected": -354.7359619140625,
"loss": 0.0854,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.03982505947351456,
"rewards/margins": 0.12135788053274155,
"rewards/rejected": -0.1611829400062561,
"step": 90
},
{
"epoch": 0.21,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": 0.33522385358810425,
"logits/rejected": 0.34693339467048645,
"logps/chosen": -373.6068115234375,
"logps/rejected": -393.63311767578125,
"loss": 0.09,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.05229802802205086,
"rewards/margins": 0.1304590255022049,
"rewards/rejected": -0.18275703489780426,
"step": 100
},
{
"epoch": 0.21,
"eval_logits/chosen": 0.49261584877967834,
"eval_logits/rejected": 0.5302599668502808,
"eval_logps/chosen": -392.5748291015625,
"eval_logps/rejected": -418.8423767089844,
"eval_loss": 0.08443526923656464,
"eval_rewards/accuracies": 0.69921875,
"eval_rewards/chosen": -0.09445539116859436,
"eval_rewards/margins": 0.20123936235904694,
"eval_rewards/rejected": -0.2956947684288025,
"eval_runtime": 75.5045,
"eval_samples_per_second": 26.488,
"eval_steps_per_second": 0.424,
"step": 100
},
{
"epoch": 0.23,
"learning_rate": 4.7467175306295647e-07,
"logits/chosen": 0.5233359336853027,
"logits/rejected": 0.5924205780029297,
"logps/chosen": -409.8135681152344,
"logps/rejected": -400.6418151855469,
"loss": 0.0775,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.17791931331157684,
"rewards/margins": 0.2254853993654251,
"rewards/rejected": -0.40340471267700195,
"step": 110
},
{
"epoch": 0.25,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": 0.6610409021377563,
"logits/rejected": 0.8009072542190552,
"logps/chosen": -459.3719787597656,
"logps/rejected": -480.128662109375,
"loss": 0.0697,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.2562519609928131,
"rewards/margins": 0.2973101735115051,
"rewards/rejected": -0.5535621643066406,
"step": 120
},
{
"epoch": 0.27,
"learning_rate": 4.5626458262912735e-07,
"logits/chosen": 0.8142817616462708,
"logits/rejected": 1.0136159658432007,
"logps/chosen": -453.57037353515625,
"logps/rejected": -438.6094665527344,
"loss": 0.0557,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.45035696029663086,
"rewards/margins": 0.2075636386871338,
"rewards/rejected": -0.6579206585884094,
"step": 130
},
{
"epoch": 0.29,
"learning_rate": 4.453763107901675e-07,
"logits/chosen": 0.9267638325691223,
"logits/rejected": 0.9543718099594116,
"logps/chosen": -426.4134826660156,
"logps/rejected": -436.49261474609375,
"loss": 0.06,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.33891427516937256,
"rewards/margins": 0.302972674369812,
"rewards/rejected": -0.6418868899345398,
"step": 140
},
{
"epoch": 0.31,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": 0.834929347038269,
"logits/rejected": 1.0096248388290405,
"logps/chosen": -383.9637756347656,
"logps/rejected": -392.84912109375,
"loss": 0.0588,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.2390960454940796,
"rewards/margins": 0.35297515988349915,
"rewards/rejected": -0.5920711755752563,
"step": 150
},
{
"epoch": 0.33,
"learning_rate": 4.2052190435769554e-07,
"logits/chosen": 1.0894076824188232,
"logits/rejected": 1.2157137393951416,
"logps/chosen": -429.09857177734375,
"logps/rejected": -461.9745178222656,
"loss": 0.0509,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.5552287101745605,
"rewards/margins": 0.3786623775959015,
"rewards/rejected": -0.9338911175727844,
"step": 160
},
{
"epoch": 0.36,
"learning_rate": 4.0668899744407567e-07,
"logits/chosen": 0.9078506231307983,
"logits/rejected": 1.0372017621994019,
"logps/chosen": -482.3373107910156,
"logps/rejected": -479.88916015625,
"loss": 0.0479,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.505352795124054,
"rewards/margins": 0.26132458448410034,
"rewards/rejected": -0.7666773796081543,
"step": 170
},
{
"epoch": 0.38,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": 0.80833500623703,
"logits/rejected": 0.8488121032714844,
"logps/chosen": -413.3409118652344,
"logps/rejected": -438.3705139160156,
"loss": 0.0476,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.4049296975135803,
"rewards/margins": 0.3900560736656189,
"rewards/rejected": -0.7949857115745544,
"step": 180
},
{
"epoch": 0.4,
"learning_rate": 3.765821230985757e-07,
"logits/chosen": 0.9091412425041199,
"logits/rejected": 1.0051593780517578,
"logps/chosen": -395.74383544921875,
"logps/rejected": -402.8367919921875,
"loss": 0.0478,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.4961649775505066,
"rewards/margins": 0.3637959361076355,
"rewards/rejected": -0.8599609136581421,
"step": 190
},
{
"epoch": 0.42,
"learning_rate": 3.604695382782159e-07,
"logits/chosen": 1.0421111583709717,
"logits/rejected": 1.1686071157455444,
"logps/chosen": -422.24224853515625,
"logps/rejected": -469.1251525878906,
"loss": 0.0405,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.7005800008773804,
"rewards/margins": 0.46449971199035645,
"rewards/rejected": -1.1650797128677368,
"step": 200
},
{
"epoch": 0.42,
"eval_logits/chosen": 1.1859312057495117,
"eval_logits/rejected": 1.2733540534973145,
"eval_logps/chosen": -449.3788757324219,
"eval_logps/rejected": -505.84661865234375,
"eval_loss": 0.045209601521492004,
"eval_rewards/accuracies": 0.75390625,
"eval_rewards/chosen": -0.6624964475631714,
"eval_rewards/margins": 0.5032405257225037,
"eval_rewards/rejected": -1.1657369136810303,
"eval_runtime": 75.0855,
"eval_samples_per_second": 26.636,
"eval_steps_per_second": 0.426,
"step": 200
},
{
"epoch": 0.44,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": 0.9289053082466125,
"logits/rejected": 1.0322377681732178,
"logps/chosen": -454.09521484375,
"logps/rejected": -484.48956298828125,
"loss": 0.0428,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.5330354571342468,
"rewards/margins": 0.47441625595092773,
"rewards/rejected": -1.0074517726898193,
"step": 210
},
{
"epoch": 0.46,
"learning_rate": 3.265574537815398e-07,
"logits/chosen": 0.6325788497924805,
"logits/rejected": 0.8454742431640625,
"logps/chosen": -443.6888732910156,
"logps/rejected": -444.2510681152344,
"loss": 0.051,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.4299241006374359,
"rewards/margins": 0.41193485260009766,
"rewards/rejected": -0.8418590426445007,
"step": 220
},
{
"epoch": 0.48,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": 0.8997888565063477,
"logits/rejected": 0.9853512048721313,
"logps/chosen": -413.89520263671875,
"logps/rejected": -458.99676513671875,
"loss": 0.0525,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.5308324694633484,
"rewards/margins": 0.4597201943397522,
"rewards/rejected": -0.9905527830123901,
"step": 230
},
{
"epoch": 0.5,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": 1.0547417402267456,
"logits/rejected": 1.1306800842285156,
"logps/chosen": -493.91790771484375,
"logps/rejected": -539.1799926757812,
"loss": 0.0471,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.6113244295120239,
"rewards/margins": 0.5182110667228699,
"rewards/rejected": -1.1295355558395386,
"step": 240
},
{
"epoch": 0.52,
"learning_rate": 2.7285261601056697e-07,
"logits/chosen": 1.2281643152236938,
"logits/rejected": 1.359076976776123,
"logps/chosen": -466.77001953125,
"logps/rejected": -483.91259765625,
"loss": 0.0419,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.7258759140968323,
"rewards/margins": 0.42711353302001953,
"rewards/rejected": -1.152989387512207,
"step": 250
},
{
"epoch": 0.54,
"learning_rate": 2.5457665670441937e-07,
"logits/chosen": 1.2255347967147827,
"logits/rejected": 1.462003469467163,
"logps/chosen": -491.76190185546875,
"logps/rejected": -505.47161865234375,
"loss": 0.0451,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.7059242725372314,
"rewards/margins": 0.6359472274780273,
"rewards/rejected": -1.3418715000152588,
"step": 260
},
{
"epoch": 0.57,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": 1.3674428462982178,
"logits/rejected": 1.578064203262329,
"logps/chosen": -486.397216796875,
"logps/rejected": -492.1827087402344,
"loss": 0.0472,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.7773429155349731,
"rewards/margins": 0.38945746421813965,
"rewards/rejected": -1.1668003797531128,
"step": 270
},
{
"epoch": 0.59,
"learning_rate": 2.1804923757009882e-07,
"logits/chosen": 1.366081953048706,
"logits/rejected": 1.5207383632659912,
"logps/chosen": -477.0743103027344,
"logps/rejected": -530.8953857421875,
"loss": 0.0445,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.8941423296928406,
"rewards/margins": 0.4790104925632477,
"rewards/rejected": -1.3731528520584106,
"step": 280
},
{
"epoch": 0.61,
"learning_rate": 1.9999357655598891e-07,
"logits/chosen": 1.2689809799194336,
"logits/rejected": 1.4011085033416748,
"logps/chosen": -438.982421875,
"logps/rejected": -469.45703125,
"loss": 0.0464,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.62468022108078,
"rewards/margins": 0.513271689414978,
"rewards/rejected": -1.1379519701004028,
"step": 290
},
{
"epoch": 0.63,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": 1.1505718231201172,
"logits/rejected": 1.4240622520446777,
"logps/chosen": -458.03631591796875,
"logps/rejected": -443.11712646484375,
"loss": 0.0479,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.6710134148597717,
"rewards/margins": 0.39567166566848755,
"rewards/rejected": -1.0666849613189697,
"step": 300
},
{
"epoch": 0.63,
"eval_logits/chosen": 1.2982094287872314,
"eval_logits/rejected": 1.409311056137085,
"eval_logps/chosen": -435.2132568359375,
"eval_logps/rejected": -501.30841064453125,
"eval_loss": 0.047696553170681,
"eval_rewards/accuracies": 0.73828125,
"eval_rewards/chosen": -0.5208398699760437,
"eval_rewards/margins": 0.5995149612426758,
"eval_rewards/rejected": -1.1203548908233643,
"eval_runtime": 75.296,
"eval_samples_per_second": 26.562,
"eval_steps_per_second": 0.425,
"step": 300
},
{
"epoch": 0.65,
"learning_rate": 1.647817538357072e-07,
"logits/chosen": 1.2780801057815552,
"logits/rejected": 1.3399560451507568,
"logps/chosen": -475.42413330078125,
"logps/rejected": -517.4520263671875,
"loss": 0.0478,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.7529923319816589,
"rewards/margins": 0.4368392825126648,
"rewards/rejected": -1.1898316144943237,
"step": 310
},
{
"epoch": 0.67,
"learning_rate": 1.478143389201113e-07,
"logits/chosen": 1.198677897453308,
"logits/rejected": 1.4085700511932373,
"logps/chosen": -498.35711669921875,
"logps/rejected": -497.4380798339844,
"loss": 0.0424,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.6401562690734863,
"rewards/margins": 0.48012202978134155,
"rewards/rejected": -1.1202783584594727,
"step": 320
},
{
"epoch": 0.69,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": 1.2183105945587158,
"logits/rejected": 1.2747819423675537,
"logps/chosen": -442.5284118652344,
"logps/rejected": -533.216796875,
"loss": 0.0454,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.5910875201225281,
"rewards/margins": 0.5799761414527893,
"rewards/rejected": -1.1710636615753174,
"step": 330
},
{
"epoch": 0.71,
"learning_rate": 1.1561076868822755e-07,
"logits/chosen": 1.203604817390442,
"logits/rejected": 1.1832085847854614,
"logps/chosen": -441.4521484375,
"logps/rejected": -512.8982543945312,
"loss": 0.0428,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.7571262121200562,
"rewards/margins": 0.4640630781650543,
"rewards/rejected": -1.221189260482788,
"step": 340
},
{
"epoch": 0.73,
"learning_rate": 1.0054723495346482e-07,
"logits/chosen": 1.3052194118499756,
"logits/rejected": 1.382683515548706,
"logps/chosen": -465.3661193847656,
"logps/rejected": -528.7847290039062,
"loss": 0.0412,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.7757940292358398,
"rewards/margins": 0.4744884967803955,
"rewards/rejected": -1.2502825260162354,
"step": 350
},
{
"epoch": 0.75,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": 1.1976938247680664,
"logits/rejected": 1.432969331741333,
"logps/chosen": -491.15771484375,
"logps/rejected": -515.0520629882812,
"loss": 0.0446,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.6491819620132446,
"rewards/margins": 0.6244359612464905,
"rewards/rejected": -1.2736178636550903,
"step": 360
},
{
"epoch": 0.77,
"learning_rate": 7.289996455765748e-08,
"logits/chosen": 1.192779541015625,
"logits/rejected": 1.324210524559021,
"logps/chosen": -504.5486755371094,
"logps/rejected": -508.7030334472656,
"loss": 0.0435,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.7155844569206238,
"rewards/margins": 0.5151349306106567,
"rewards/rejected": -1.2307194471359253,
"step": 370
},
{
"epoch": 0.8,
"learning_rate": 6.046442623320145e-08,
"logits/chosen": 1.223356008529663,
"logits/rejected": 1.4434764385223389,
"logps/chosen": -474.7169494628906,
"logps/rejected": -518.0782470703125,
"loss": 0.0476,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.6729675531387329,
"rewards/margins": 0.6387326717376709,
"rewards/rejected": -1.3117002248764038,
"step": 380
},
{
"epoch": 0.82,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": 1.3060978651046753,
"logits/rejected": 1.4896109104156494,
"logps/chosen": -470.46661376953125,
"logps/rejected": -502.4981384277344,
"loss": 0.0482,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.6783354878425598,
"rewards/margins": 0.5045996904373169,
"rewards/rejected": -1.1829349994659424,
"step": 390
},
{
"epoch": 0.84,
"learning_rate": 3.8702478614051345e-08,
"logits/chosen": 1.3413165807724,
"logits/rejected": 1.4800562858581543,
"logps/chosen": -450.84844970703125,
"logps/rejected": -509.7266540527344,
"loss": 0.0457,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.7674819231033325,
"rewards/margins": 0.4173991084098816,
"rewards/rejected": -1.1848809719085693,
"step": 400
},
{
"epoch": 0.84,
"eval_logits/chosen": 1.491492748260498,
"eval_logits/rejected": 1.6154029369354248,
"eval_logps/chosen": -448.419677734375,
"eval_logps/rejected": -518.3443603515625,
"eval_loss": 0.044891636818647385,
"eval_rewards/accuracies": 0.73828125,
"eval_rewards/chosen": -0.6529037952423096,
"eval_rewards/margins": 0.6378109455108643,
"eval_rewards/rejected": -1.2907147407531738,
"eval_runtime": 74.6873,
"eval_samples_per_second": 26.778,
"eval_steps_per_second": 0.428,
"step": 400
},
{
"epoch": 0.86,
"learning_rate": 2.9492720416985e-08,
"logits/chosen": 1.3658090829849243,
"logits/rejected": 1.523946762084961,
"logps/chosen": -461.0426330566406,
"logps/rejected": -491.6429138183594,
"loss": 0.045,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.6857269406318665,
"rewards/margins": 0.5137700438499451,
"rewards/rejected": -1.1994969844818115,
"step": 410
},
{
"epoch": 0.88,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": 1.3796783685684204,
"logits/rejected": 1.5178402662277222,
"logps/chosen": -454.60455322265625,
"logps/rejected": -483.65704345703125,
"loss": 0.0418,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.6705530285835266,
"rewards/margins": 0.604373574256897,
"rewards/rejected": -1.2749265432357788,
"step": 420
},
{
"epoch": 0.9,
"learning_rate": 1.4662207078575684e-08,
"logits/chosen": 1.334680199623108,
"logits/rejected": 1.4741976261138916,
"logps/chosen": -504.280029296875,
"logps/rejected": -529.8871459960938,
"loss": 0.0453,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.7305961847305298,
"rewards/margins": 0.5881385207176208,
"rewards/rejected": -1.3187347650527954,
"step": 430
},
{
"epoch": 0.92,
"learning_rate": 9.12094829893642e-09,
"logits/chosen": 1.3827157020568848,
"logits/rejected": 1.5478546619415283,
"logps/chosen": -453.01171875,
"logps/rejected": -480.3030700683594,
"loss": 0.0414,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.7755357027053833,
"rewards/margins": 0.5378071069717407,
"rewards/rejected": -1.313342809677124,
"step": 440
},
{
"epoch": 0.94,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": 1.2280631065368652,
"logits/rejected": 1.454526662826538,
"logps/chosen": -487.4305114746094,
"logps/rejected": -500.71087646484375,
"loss": 0.0425,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.6377500295639038,
"rewards/margins": 0.5590785145759583,
"rewards/rejected": -1.1968284845352173,
"step": 450
},
{
"epoch": 0.96,
"learning_rate": 1.9347820230782295e-09,
"logits/chosen": 1.336721658706665,
"logits/rejected": 1.4986612796783447,
"logps/chosen": -455.5997619628906,
"logps/rejected": -474.46038818359375,
"loss": 0.0425,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.7689257264137268,
"rewards/margins": 0.4747004508972168,
"rewards/rejected": -1.243626356124878,
"step": 460
},
{
"epoch": 0.98,
"learning_rate": 3.2839470889836627e-10,
"logits/chosen": 1.2109500169754028,
"logits/rejected": 1.3351854085922241,
"logps/chosen": -490.6439514160156,
"logps/rejected": -541.4273681640625,
"loss": 0.0421,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.7248164415359497,
"rewards/margins": 0.548802375793457,
"rewards/rejected": -1.2736186981201172,
"step": 470
},
{
"epoch": 1.0,
"step": 477,
"total_flos": 0.0,
"train_loss": 0.0564979040210352,
"train_runtime": 4410.0999,
"train_samples_per_second": 13.862,
"train_steps_per_second": 0.108
}
],
"logging_steps": 10,
"max_steps": 477,
"num_train_epochs": 1,
"save_steps": 1000,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}