|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.847970962524414, |
|
"logits/rejected": -2.79160213470459, |
|
"logps/chosen": -284.9612731933594, |
|
"logps/rejected": -276.45928955078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.754901647567749, |
|
"logits/rejected": -2.7529661655426025, |
|
"logps/chosen": -249.956298828125, |
|
"logps/rejected": -223.05245971679688, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.3958333432674408, |
|
"rewards/chosen": -8.542059367755428e-05, |
|
"rewards/margins": -4.0294162317877635e-05, |
|
"rewards/rejected": -4.512643499765545e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.7449066638946533, |
|
"logits/rejected": -2.745481014251709, |
|
"logps/chosen": -257.4268493652344, |
|
"logps/rejected": -247.520751953125, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.00028673160704784095, |
|
"rewards/margins": 0.0011877163778990507, |
|
"rewards/rejected": -0.0009009848581627011, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.8009085655212402, |
|
"logits/rejected": -2.7534918785095215, |
|
"logps/chosen": -300.4103088378906, |
|
"logps/rejected": -261.89532470703125, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.0016673363279551268, |
|
"rewards/margins": 0.009702490642666817, |
|
"rewards/rejected": -0.008035155013203621, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.7635364532470703, |
|
"logits/rejected": -2.751422882080078, |
|
"logps/chosen": -256.6298522949219, |
|
"logps/rejected": -274.86297607421875, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.0019601243548095226, |
|
"rewards/margins": 0.025836413726210594, |
|
"rewards/rejected": -0.027796542271971703, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999733114418725e-07, |
|
"logits/chosen": -2.7672626972198486, |
|
"logits/rejected": -2.7396867275238037, |
|
"logps/chosen": -284.4268798828125, |
|
"logps/rejected": -256.52667236328125, |
|
"loss": 0.6675, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.023474793881177902, |
|
"rewards/margins": 0.06475953012704849, |
|
"rewards/rejected": -0.0882343202829361, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990398100856366e-07, |
|
"logits/chosen": -2.7358150482177734, |
|
"logits/rejected": -2.724313259124756, |
|
"logps/chosen": -281.9308166503906, |
|
"logps/rejected": -256.6224670410156, |
|
"loss": 0.6443, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.060463108122348785, |
|
"rewards/margins": 0.1052827388048172, |
|
"rewards/rejected": -0.1657458394765854, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967775735898179e-07, |
|
"logits/chosen": -2.781935453414917, |
|
"logits/rejected": -2.739537000656128, |
|
"logps/chosen": -291.1555480957031, |
|
"logps/rejected": -273.9505920410156, |
|
"loss": 0.6246, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.24020154774188995, |
|
"rewards/margins": 0.17989788949489594, |
|
"rewards/rejected": -0.4200994074344635, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931986719649298e-07, |
|
"logits/chosen": -2.782163143157959, |
|
"logits/rejected": -2.7544727325439453, |
|
"logps/chosen": -290.7063903808594, |
|
"logps/rejected": -333.33160400390625, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.30353400111198425, |
|
"rewards/margins": 0.3068069517612457, |
|
"rewards/rejected": -0.61034095287323, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.883222001996351e-07, |
|
"logits/chosen": -2.8103935718536377, |
|
"logits/rejected": -2.7860381603240967, |
|
"logps/chosen": -309.4369201660156, |
|
"logps/rejected": -328.04937744140625, |
|
"loss": 0.5871, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.34070074558258057, |
|
"rewards/margins": 0.4278062880039215, |
|
"rewards/rejected": -0.7685070037841797, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.821741763807186e-07, |
|
"logits/chosen": -2.775650978088379, |
|
"logits/rejected": -2.742344379425049, |
|
"logps/chosen": -354.2271423339844, |
|
"logps/rejected": -372.828369140625, |
|
"loss": 0.5691, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6526215672492981, |
|
"rewards/margins": 0.4535134732723236, |
|
"rewards/rejected": -1.1061351299285889, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.7190756797790527, |
|
"eval_logits/rejected": -2.702101707458496, |
|
"eval_logps/chosen": -322.6109924316406, |
|
"eval_logps/rejected": -376.20880126953125, |
|
"eval_loss": 0.5829024910926819, |
|
"eval_rewards/accuracies": 0.7421875, |
|
"eval_rewards/chosen": -0.6557134985923767, |
|
"eval_rewards/margins": 0.5328419208526611, |
|
"eval_rewards/rejected": -1.188555359840393, |
|
"eval_runtime": 53.0851, |
|
"eval_samples_per_second": 37.675, |
|
"eval_steps_per_second": 0.603, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.747874028753375e-07, |
|
"logits/chosen": -2.667227268218994, |
|
"logits/rejected": -2.6603758335113525, |
|
"logps/chosen": -321.42108154296875, |
|
"logps/rejected": -396.7526550292969, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5715780258178711, |
|
"rewards/margins": 0.6688358187675476, |
|
"rewards/rejected": -1.2404139041900635, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.662012913161997e-07, |
|
"logits/chosen": -2.622821807861328, |
|
"logits/rejected": -2.583700656890869, |
|
"logps/chosen": -340.69219970703125, |
|
"logps/rejected": -375.4017333984375, |
|
"loss": 0.5579, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7315243482589722, |
|
"rewards/margins": 0.5486994981765747, |
|
"rewards/rejected": -1.2802238464355469, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5646165232345103e-07, |
|
"logits/chosen": -2.5822339057922363, |
|
"logits/rejected": -2.547309398651123, |
|
"logps/chosen": -359.7410583496094, |
|
"logps/rejected": -351.17999267578125, |
|
"loss": 0.5523, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6760958433151245, |
|
"rewards/margins": 0.4332718849182129, |
|
"rewards/rejected": -1.1093676090240479, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.456204510851956e-07, |
|
"logits/chosen": -2.458064079284668, |
|
"logits/rejected": -2.434985637664795, |
|
"logps/chosen": -344.94622802734375, |
|
"logps/rejected": -373.15277099609375, |
|
"loss": 0.5431, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.9658713340759277, |
|
"rewards/margins": 0.568038821220398, |
|
"rewards/rejected": -1.5339101552963257, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.337355301007335e-07, |
|
"logits/chosen": -2.430382490158081, |
|
"logits/rejected": -2.411181926727295, |
|
"logps/chosen": -362.24664306640625, |
|
"logps/rejected": -394.7173767089844, |
|
"loss": 0.541, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.970133900642395, |
|
"rewards/margins": 0.5773912668228149, |
|
"rewards/rejected": -1.5475252866744995, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2087030056579986e-07, |
|
"logits/chosen": -2.3705012798309326, |
|
"logits/rejected": -2.3451476097106934, |
|
"logps/chosen": -340.9483947753906, |
|
"logps/rejected": -381.2392883300781, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7996856570243835, |
|
"rewards/margins": 0.6973718404769897, |
|
"rewards/rejected": -1.497057557106018, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.070934040463998e-07, |
|
"logits/chosen": -2.298063278198242, |
|
"logits/rejected": -2.2643802165985107, |
|
"logps/chosen": -356.18292236328125, |
|
"logps/rejected": -401.3460998535156, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8752641677856445, |
|
"rewards/margins": 0.6319175958633423, |
|
"rewards/rejected": -1.5071817636489868, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.9247834624635404e-07, |
|
"logits/chosen": -2.3489673137664795, |
|
"logits/rejected": -2.294405937194824, |
|
"logps/chosen": -366.259765625, |
|
"logps/rejected": -413.059326171875, |
|
"loss": 0.5228, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8981040716171265, |
|
"rewards/margins": 0.7530413866043091, |
|
"rewards/rejected": -1.651145339012146, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7710310482256523e-07, |
|
"logits/chosen": -2.22472882270813, |
|
"logits/rejected": -2.1942319869995117, |
|
"logps/chosen": -390.96893310546875, |
|
"logps/rejected": -435.68634033203125, |
|
"loss": 0.5221, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3375661373138428, |
|
"rewards/margins": 0.6510864496231079, |
|
"rewards/rejected": -1.9886524677276611, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.610497133404795e-07, |
|
"logits/chosen": -2.330658197402954, |
|
"logits/rejected": -2.253397226333618, |
|
"logps/chosen": -424.68511962890625, |
|
"logps/rejected": -460.4125061035156, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.9010859727859497, |
|
"rewards/margins": 0.9040181040763855, |
|
"rewards/rejected": -1.8051040172576904, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -2.273806571960449, |
|
"eval_logits/rejected": -2.2433524131774902, |
|
"eval_logps/chosen": -338.0599365234375, |
|
"eval_logps/rejected": -420.1078186035156, |
|
"eval_loss": 0.5300609469413757, |
|
"eval_rewards/accuracies": 0.78125, |
|
"eval_rewards/chosen": -0.810202956199646, |
|
"eval_rewards/margins": 0.8173429369926453, |
|
"eval_rewards/rejected": -1.6275460720062256, |
|
"eval_runtime": 53.0552, |
|
"eval_samples_per_second": 37.697, |
|
"eval_steps_per_second": 0.603, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4440382358952115e-07, |
|
"logits/chosen": -2.26928448677063, |
|
"logits/rejected": -2.201911449432373, |
|
"logps/chosen": -353.4331970214844, |
|
"logps/rejected": -383.96044921875, |
|
"loss": 0.5455, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8622655868530273, |
|
"rewards/margins": 0.5730525255203247, |
|
"rewards/rejected": -1.4353179931640625, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": -2.2439053058624268, |
|
"logits/rejected": -2.206618070602417, |
|
"logps/chosen": -370.7458190917969, |
|
"logps/rejected": -391.848388671875, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.7618538737297058, |
|
"rewards/margins": 0.7462855577468872, |
|
"rewards/rejected": -1.5081393718719482, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.096924887558854e-07, |
|
"logits/chosen": -2.1762518882751465, |
|
"logits/rejected": -2.1476693153381348, |
|
"logps/chosen": -382.38946533203125, |
|
"logps/rejected": -465.69561767578125, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.005793571472168, |
|
"rewards/margins": 0.7425030469894409, |
|
"rewards/rejected": -1.7482967376708984, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9181224366319943e-07, |
|
"logits/chosen": -2.1192374229431152, |
|
"logits/rejected": -2.0674259662628174, |
|
"logps/chosen": -391.3011474609375, |
|
"logps/rejected": -484.4254455566406, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.375982642173767, |
|
"rewards/margins": 0.8829982876777649, |
|
"rewards/rejected": -2.2589809894561768, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7370891215954565e-07, |
|
"logits/chosen": -2.1064059734344482, |
|
"logits/rejected": -2.0222904682159424, |
|
"logps/chosen": -397.3945007324219, |
|
"logps/rejected": -454.42340087890625, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3846924304962158, |
|
"rewards/margins": 0.8052938580513, |
|
"rewards/rejected": -2.18998646736145, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.55479083351317e-07, |
|
"logits/chosen": -2.0734519958496094, |
|
"logits/rejected": -2.041645050048828, |
|
"logps/chosen": -403.8518371582031, |
|
"logps/rejected": -443.9764099121094, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.352430820465088, |
|
"rewards/margins": 0.6026407480239868, |
|
"rewards/rejected": -1.9550716876983643, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3722002126275822e-07, |
|
"logits/chosen": -2.0378193855285645, |
|
"logits/rejected": -2.006934881210327, |
|
"logps/chosen": -402.4918518066406, |
|
"logps/rejected": -457.62811279296875, |
|
"loss": 0.5152, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.2763839960098267, |
|
"rewards/margins": 0.744287371635437, |
|
"rewards/rejected": -2.0206713676452637, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.19029145890313e-07, |
|
"logits/chosen": -2.033855438232422, |
|
"logits/rejected": -1.9725334644317627, |
|
"logps/chosen": -366.2498474121094, |
|
"logps/rejected": -433.2369079589844, |
|
"loss": 0.5284, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2484426498413086, |
|
"rewards/margins": 0.7818558812141418, |
|
"rewards/rejected": -2.0302984714508057, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0100351342479216e-07, |
|
"logits/chosen": -2.018475294113159, |
|
"logits/rejected": -1.949302077293396, |
|
"logps/chosen": -367.6812438964844, |
|
"logps/rejected": -429.4832458496094, |
|
"loss": 0.5041, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.210700273513794, |
|
"rewards/margins": 0.7417057752609253, |
|
"rewards/rejected": -1.9524061679840088, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8323929841460178e-07, |
|
"logits/chosen": -2.01090145111084, |
|
"logits/rejected": -1.9497419595718384, |
|
"logps/chosen": -396.8717956542969, |
|
"logps/rejected": -473.7056579589844, |
|
"loss": 0.5094, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.406031847000122, |
|
"rewards/margins": 0.7575126886367798, |
|
"rewards/rejected": -2.1635446548461914, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -2.0371742248535156, |
|
"eval_logits/rejected": -1.9920138120651245, |
|
"eval_logps/chosen": -394.5289611816406, |
|
"eval_logps/rejected": -488.7168884277344, |
|
"eval_loss": 0.514569878578186, |
|
"eval_rewards/accuracies": 0.765625, |
|
"eval_rewards/chosen": -1.374893307685852, |
|
"eval_rewards/margins": 0.9387427568435669, |
|
"eval_rewards/rejected": -2.313636064529419, |
|
"eval_runtime": 53.0256, |
|
"eval_samples_per_second": 37.718, |
|
"eval_steps_per_second": 0.603, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6583128063291573e-07, |
|
"logits/chosen": -1.9743964672088623, |
|
"logits/rejected": -1.8795156478881836, |
|
"logps/chosen": -401.95098876953125, |
|
"logps/rejected": -473.07586669921875, |
|
"loss": 0.4934, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.2877211570739746, |
|
"rewards/margins": 0.9713341593742371, |
|
"rewards/rejected": -2.2590553760528564, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.488723393865766e-07, |
|
"logits/chosen": -2.009753704071045, |
|
"logits/rejected": -1.9591827392578125, |
|
"logps/chosen": -424.99468994140625, |
|
"logps/rejected": -447.1941833496094, |
|
"loss": 0.5096, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.286123275756836, |
|
"rewards/margins": 0.8317530751228333, |
|
"rewards/rejected": -2.1178765296936035, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3245295796480788e-07, |
|
"logits/chosen": -2.021080493927002, |
|
"logits/rejected": -1.9558074474334717, |
|
"logps/chosen": -386.18670654296875, |
|
"logps/rejected": -441.7825622558594, |
|
"loss": 0.5108, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3397352695465088, |
|
"rewards/margins": 0.7417815923690796, |
|
"rewards/rejected": -2.081516742706299, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1666074087171627e-07, |
|
"logits/chosen": -1.9722802639007568, |
|
"logits/rejected": -1.9194387197494507, |
|
"logps/chosen": -390.5426330566406, |
|
"logps/rejected": -470.82958984375, |
|
"loss": 0.5234, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.3929929733276367, |
|
"rewards/margins": 0.8960745930671692, |
|
"rewards/rejected": -2.2890677452087402, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0157994641835734e-07, |
|
"logits/chosen": -1.9723567962646484, |
|
"logits/rejected": -1.9255586862564087, |
|
"logps/chosen": -353.8846740722656, |
|
"logps/rejected": -450.743408203125, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.279344916343689, |
|
"rewards/margins": 0.8211178779602051, |
|
"rewards/rejected": -2.1004626750946045, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.729103716819111e-08, |
|
"logits/chosen": -1.9666541814804077, |
|
"logits/rejected": -1.8845767974853516, |
|
"logps/chosen": -398.8426818847656, |
|
"logps/rejected": -476.284912109375, |
|
"loss": 0.4746, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.3157447576522827, |
|
"rewards/margins": 1.0537182092666626, |
|
"rewards/rejected": -2.3694632053375244, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.387025063449081e-08, |
|
"logits/chosen": -1.922550916671753, |
|
"logits/rejected": -1.8920552730560303, |
|
"logps/chosen": -385.36676025390625, |
|
"logps/rejected": -481.94219970703125, |
|
"loss": 0.4884, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3011926412582397, |
|
"rewards/margins": 0.9755498766899109, |
|
"rewards/rejected": -2.2767422199249268, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.138919252022435e-08, |
|
"logits/chosen": -1.964270830154419, |
|
"logits/rejected": -1.9201478958129883, |
|
"logps/chosen": -422.5608825683594, |
|
"logps/rejected": -470.6983337402344, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5673155784606934, |
|
"rewards/margins": 0.7714017629623413, |
|
"rewards/rejected": -2.338717460632324, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.991445467064689e-08, |
|
"logits/chosen": -1.9478000402450562, |
|
"logits/rejected": -1.9133468866348267, |
|
"logps/chosen": -396.41827392578125, |
|
"logps/rejected": -474.91168212890625, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.392188549041748, |
|
"rewards/margins": 0.9372695684432983, |
|
"rewards/rejected": -2.329457998275757, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9507259776993954e-08, |
|
"logits/chosen": -2.014727830886841, |
|
"logits/rejected": -1.9725955724716187, |
|
"logps/chosen": -433.93402099609375, |
|
"logps/rejected": -496.03948974609375, |
|
"loss": 0.5086, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.337740182876587, |
|
"rewards/margins": 0.89354008436203, |
|
"rewards/rejected": -2.231280565261841, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -2.0268406867980957, |
|
"eval_logits/rejected": -1.9826929569244385, |
|
"eval_logps/chosen": -388.0500183105469, |
|
"eval_logps/rejected": -484.0532531738281, |
|
"eval_loss": 0.5034094452857971, |
|
"eval_rewards/accuracies": 0.76953125, |
|
"eval_rewards/chosen": -1.3101037740707397, |
|
"eval_rewards/margins": 0.9568960070610046, |
|
"eval_rewards/rejected": -2.2669999599456787, |
|
"eval_runtime": 53.0612, |
|
"eval_samples_per_second": 37.692, |
|
"eval_steps_per_second": 0.603, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.022313472693447e-08, |
|
"logits/chosen": -1.9932317733764648, |
|
"logits/rejected": -1.9669653177261353, |
|
"logps/chosen": -391.12274169921875, |
|
"logps/rejected": -434.02191162109375, |
|
"loss": 0.5097, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3643336296081543, |
|
"rewards/margins": 0.6512311100959778, |
|
"rewards/rejected": -2.0155646800994873, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2111614344599684e-08, |
|
"logits/chosen": -2.064518928527832, |
|
"logits/rejected": -1.9801286458969116, |
|
"logps/chosen": -398.71868896484375, |
|
"logps/rejected": -479.0596618652344, |
|
"loss": 0.4848, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.2102010250091553, |
|
"rewards/margins": 1.0837668180465698, |
|
"rewards/rejected": -2.2939677238464355, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.521597710086439e-08, |
|
"logits/chosen": -2.049975633621216, |
|
"logits/rejected": -1.996206521987915, |
|
"logps/chosen": -411.322509765625, |
|
"logps/rejected": -459.893798828125, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.363693356513977, |
|
"rewards/margins": 0.7787196636199951, |
|
"rewards/rejected": -2.1424131393432617, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.57301420397924e-09, |
|
"logits/chosen": -1.9763036966323853, |
|
"logits/rejected": -1.950627326965332, |
|
"logps/chosen": -419.8603515625, |
|
"logps/rejected": -458.17822265625, |
|
"loss": 0.4956, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.4159471988677979, |
|
"rewards/margins": 0.7630717158317566, |
|
"rewards/rejected": -2.179018974304199, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.212833302556258e-09, |
|
"logits/chosen": -2.0032382011413574, |
|
"logits/rejected": -1.9466326236724854, |
|
"logps/chosen": -413.5555114746094, |
|
"logps/rejected": -492.5790100097656, |
|
"loss": 0.4873, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.3693794012069702, |
|
"rewards/margins": 0.907731831073761, |
|
"rewards/rejected": -2.277111291885376, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.158697848236607e-09, |
|
"logits/chosen": -1.964643120765686, |
|
"logits/rejected": -1.9253301620483398, |
|
"logps/chosen": -396.19683837890625, |
|
"logps/rejected": -466.6449279785156, |
|
"loss": 0.4853, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.3089487552642822, |
|
"rewards/margins": 0.8880389332771301, |
|
"rewards/rejected": -2.1969876289367676, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.269029751107489e-10, |
|
"logits/chosen": -2.0099263191223145, |
|
"logits/rejected": -1.9355924129486084, |
|
"logps/chosen": -420.68408203125, |
|
"logps/rejected": -471.353515625, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.3482139110565186, |
|
"rewards/margins": 1.0080922842025757, |
|
"rewards/rejected": -2.356306314468384, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 478, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5420855548092511, |
|
"train_runtime": 4282.9885, |
|
"train_samples_per_second": 14.274, |
|
"train_steps_per_second": 0.112 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|