|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 10000, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": 0.17655496299266815, |
|
"logits/rejected": 0.2531452775001526, |
|
"logps/chosen": -354.29669189453125, |
|
"logps/rejected": -305.259765625, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": 0.0010361697059124708, |
|
"rewards/margins": 0.0014542521676048636, |
|
"rewards/rejected": -0.00041808263631537557, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": 0.07140998542308807, |
|
"logits/rejected": 0.19915328919887543, |
|
"logps/chosen": -316.61407470703125, |
|
"logps/rejected": -276.1783142089844, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.001211934955790639, |
|
"rewards/margins": 0.00264042429625988, |
|
"rewards/rejected": -0.0014284893404692411, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits/chosen": 0.1830858290195465, |
|
"logits/rejected": 0.25493288040161133, |
|
"logps/chosen": -294.3023376464844, |
|
"logps/rejected": -298.47430419921875, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.00664560217410326, |
|
"rewards/margins": 0.008408578112721443, |
|
"rewards/rejected": -0.0017629768699407578, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": 0.1198926791548729, |
|
"logits/rejected": 0.2388772964477539, |
|
"logps/chosen": -343.3688659667969, |
|
"logps/rejected": -318.56866455078125, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.04203338176012039, |
|
"rewards/margins": 0.023049216717481613, |
|
"rewards/rejected": 0.01898416317999363, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.999463737538052e-07, |
|
"logits/chosen": 0.19016575813293457, |
|
"logits/rejected": 0.2768324613571167, |
|
"logps/chosen": -305.9139709472656, |
|
"logps/rejected": -285.70263671875, |
|
"loss": 0.4888, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0463864728808403, |
|
"rewards/margins": 0.06659023463726044, |
|
"rewards/rejected": -0.02020375430583954, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.980706626858607e-07, |
|
"logits/chosen": 0.1583642065525055, |
|
"logits/rejected": 0.2964373230934143, |
|
"logps/chosen": -292.2091979980469, |
|
"logps/rejected": -283.33062744140625, |
|
"loss": 0.4823, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.006695735268294811, |
|
"rewards/margins": 0.08554854989051819, |
|
"rewards/rejected": -0.0788528248667717, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.935251313189563e-07, |
|
"logits/chosen": 0.1668189913034439, |
|
"logits/rejected": 0.25383955240249634, |
|
"logps/chosen": -330.51483154296875, |
|
"logps/rejected": -332.74249267578125, |
|
"loss": 0.476, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.007911129854619503, |
|
"rewards/margins": 0.13003569841384888, |
|
"rewards/rejected": -0.13794682919979095, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.86334145175542e-07, |
|
"logits/chosen": 0.22892770171165466, |
|
"logits/rejected": 0.32262876629829407, |
|
"logps/chosen": -326.62847900390625, |
|
"logps/rejected": -321.47064208984375, |
|
"loss": 0.4678, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.07964827120304108, |
|
"rewards/margins": 0.2643834054470062, |
|
"rewards/rejected": -0.3440317213535309, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.765362502737097e-07, |
|
"logits/chosen": 0.12489993870258331, |
|
"logits/rejected": 0.2657889425754547, |
|
"logps/chosen": -358.5821838378906, |
|
"logps/rejected": -333.71466064453125, |
|
"loss": 0.4612, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.21640650928020477, |
|
"rewards/margins": 0.4499947130680084, |
|
"rewards/rejected": -0.6664012670516968, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.641839665080363e-07, |
|
"logits/chosen": 0.2374851256608963, |
|
"logits/rejected": 0.4098134934902191, |
|
"logps/chosen": -378.7792053222656, |
|
"logps/rejected": -408.1399841308594, |
|
"loss": 0.4512, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.44217753410339355, |
|
"rewards/margins": 0.715401291847229, |
|
"rewards/rejected": -1.157578706741333, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.493435061259129e-07, |
|
"logits/chosen": 0.29897215962409973, |
|
"logits/rejected": 0.34014248847961426, |
|
"logps/chosen": -395.0293884277344, |
|
"logps/rejected": -461.2764587402344, |
|
"loss": 0.4418, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8240998983383179, |
|
"rewards/margins": 0.7941638231277466, |
|
"rewards/rejected": -1.618263602256775, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.320944188084241e-07, |
|
"logits/chosen": 0.18543429672718048, |
|
"logits/rejected": 0.282682329416275, |
|
"logps/chosen": -440.6853942871094, |
|
"logps/rejected": -526.3844604492188, |
|
"loss": 0.4495, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.201317548751831, |
|
"rewards/margins": 0.8505627512931824, |
|
"rewards/rejected": -2.051880359649658, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.125291652582547e-07, |
|
"logits/chosen": 0.10988249629735947, |
|
"logits/rejected": 0.2532512843608856, |
|
"logps/chosen": -429.30322265625, |
|
"logps/rejected": -460.0655822753906, |
|
"loss": 0.4407, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.9080715179443359, |
|
"rewards/margins": 0.8440803289413452, |
|
"rewards/rejected": -1.7521518468856812, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.90752621580335e-07, |
|
"logits/chosen": 0.05259154364466667, |
|
"logits/rejected": 0.20351815223693848, |
|
"logps/chosen": -478.1226501464844, |
|
"logps/rejected": -552.33154296875, |
|
"loss": 0.4381, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.4607925415039062, |
|
"rewards/margins": 1.3634538650512695, |
|
"rewards/rejected": -2.8242461681365967, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.668815171119019e-07, |
|
"logits/chosen": 0.1267194300889969, |
|
"logits/rejected": 0.16065822541713715, |
|
"logps/chosen": -432.47418212890625, |
|
"logps/rejected": -556.4413452148438, |
|
"loss": 0.4373, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.9717355966567993, |
|
"rewards/margins": 1.443182110786438, |
|
"rewards/rejected": -2.4149177074432373, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.410438087153911e-07, |
|
"logits/chosen": 0.05742305517196655, |
|
"logits/rejected": 0.03335579112172127, |
|
"logps/chosen": -386.4638366699219, |
|
"logps/rejected": -537.6171264648438, |
|
"loss": 0.4335, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8440232276916504, |
|
"rewards/margins": 1.7251598834991455, |
|
"rewards/rejected": -2.569182872772217, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.133779948881513e-07, |
|
"logits/chosen": 0.04388447850942612, |
|
"logits/rejected": 0.06478340178728104, |
|
"logps/chosen": -450.94049072265625, |
|
"logps/rejected": -571.2717895507812, |
|
"loss": 0.4268, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.1457209587097168, |
|
"rewards/margins": 1.4885038137435913, |
|
"rewards/rejected": -2.6342251300811768, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.840323733655778e-07, |
|
"logits/chosen": 0.03801240772008896, |
|
"logits/rejected": 0.0668804943561554, |
|
"logps/chosen": -415.9105529785156, |
|
"logps/rejected": -594.4246826171875, |
|
"loss": 0.426, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8987852931022644, |
|
"rewards/margins": 2.0467095375061035, |
|
"rewards/rejected": -2.9454948902130127, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.531642461971514e-07, |
|
"logits/chosen": 0.12394122779369354, |
|
"logits/rejected": 0.07622597366571426, |
|
"logps/chosen": -482.99774169921875, |
|
"logps/rejected": -617.9317626953125, |
|
"loss": 0.4148, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4833831787109375, |
|
"rewards/margins": 1.5686841011047363, |
|
"rewards/rejected": -3.052067279815674, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.209390765564318e-07, |
|
"logits/chosen": 0.12547728419303894, |
|
"logits/rejected": 0.039741553366184235, |
|
"logps/chosen": -470.0662536621094, |
|
"logps/rejected": -810.3030395507812, |
|
"loss": 0.4152, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.2957651615142822, |
|
"rewards/margins": 3.8659985065460205, |
|
"rewards/rejected": -5.1617631912231445, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.875296018047809e-07, |
|
"logits/chosen": 0.20153549313545227, |
|
"logits/rejected": 0.1317548155784607, |
|
"logps/chosen": -447.82562255859375, |
|
"logps/rejected": -725.8985595703125, |
|
"loss": 0.4249, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.3366836309432983, |
|
"rewards/margins": 3.223564863204956, |
|
"rewards/rejected": -4.560248374938965, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 6.531149075630796e-07, |
|
"logits/chosen": -0.017775116488337517, |
|
"logits/rejected": 0.05367380380630493, |
|
"logps/chosen": -476.78790283203125, |
|
"logps/rejected": -663.9365844726562, |
|
"loss": 0.4167, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6010878086090088, |
|
"rewards/margins": 2.401573419570923, |
|
"rewards/rejected": -4.002661228179932, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 6.178794677547137e-07, |
|
"logits/chosen": 0.07326556742191315, |
|
"logits/rejected": -0.006058653350919485, |
|
"logps/chosen": -590.01123046875, |
|
"logps/rejected": -870.9129028320312, |
|
"loss": 0.4193, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.6394991874694824, |
|
"rewards/margins": 3.238422393798828, |
|
"rewards/rejected": -5.8779215812683105, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.820121557655108e-07, |
|
"logits/chosen": 0.13632330298423767, |
|
"logits/rejected": 0.12085568904876709, |
|
"logps/chosen": -450.1314392089844, |
|
"logps/rejected": -587.374267578125, |
|
"loss": 0.425, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3817965984344482, |
|
"rewards/margins": 1.4728713035583496, |
|
"rewards/rejected": -2.854668140411377, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.457052320211339e-07, |
|
"logits/chosen": 0.09744735062122345, |
|
"logits/rejected": -0.04311475530266762, |
|
"logps/chosen": -561.7251586914062, |
|
"logps/rejected": -1082.66064453125, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.4967703819274902, |
|
"rewards/margins": 5.509397029876709, |
|
"rewards/rejected": -8.006166458129883, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5.091533134088387e-07, |
|
"logits/chosen": 0.007685136049985886, |
|
"logits/rejected": -0.026540469378232956, |
|
"logps/chosen": -681.2808837890625, |
|
"logps/rejected": -1102.198486328125, |
|
"loss": 0.4237, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -3.266371250152588, |
|
"rewards/margins": 4.754992485046387, |
|
"rewards/rejected": -8.021364212036133, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.7255233006783624e-07, |
|
"logits/chosen": 0.24146917462348938, |
|
"logits/rejected": 0.05772332474589348, |
|
"logps/chosen": -437.0887756347656, |
|
"logps/rejected": -754.1742553710938, |
|
"loss": 0.409, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.0795494318008423, |
|
"rewards/margins": 3.357706069946289, |
|
"rewards/rejected": -4.437255859375, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.3609847514019763e-07, |
|
"logits/chosen": 0.15583154559135437, |
|
"logits/rejected": -0.01679980382323265, |
|
"logps/chosen": -622.4188232421875, |
|
"logps/rejected": -1143.203857421875, |
|
"loss": 0.4172, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -3.2593586444854736, |
|
"rewards/margins": 5.262009143829346, |
|
"rewards/rejected": -8.521368980407715, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9998715311197783e-07, |
|
"logits/chosen": 0.12384140491485596, |
|
"logits/rejected": -0.03689634054899216, |
|
"logps/chosen": -612.9854736328125, |
|
"logps/rejected": -1161.8275146484375, |
|
"loss": 0.4065, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.665544033050537, |
|
"rewards/margins": 5.724797248840332, |
|
"rewards/rejected": -8.390340805053711, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.6441193238179146e-07, |
|
"logits/chosen": 0.23247964680194855, |
|
"logits/rejected": 0.08442293107509613, |
|
"logps/chosen": -644.8258056640625, |
|
"logps/rejected": -1333.277099609375, |
|
"loss": 0.4067, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.5307083129882812, |
|
"rewards/margins": 6.568638801574707, |
|
"rewards/rejected": -10.099346160888672, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.295635076714144e-07, |
|
"logits/chosen": 0.21653930842876434, |
|
"logits/rejected": -0.010667298920452595, |
|
"logps/chosen": -576.2736206054688, |
|
"logps/rejected": -1167.0555419921875, |
|
"loss": 0.4003, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.805418014526367, |
|
"rewards/margins": 5.748055458068848, |
|
"rewards/rejected": -8.553472518920898, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.956286778402226e-07, |
|
"logits/chosen": 0.14956721663475037, |
|
"logits/rejected": -0.00617391150444746, |
|
"logps/chosen": -499.51556396484375, |
|
"logps/rejected": -1073.225830078125, |
|
"loss": 0.4081, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.8388452529907227, |
|
"rewards/margins": 5.99139928817749, |
|
"rewards/rejected": -7.830244541168213, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.6278934458271996e-07, |
|
"logits/chosen": 0.20027479529380798, |
|
"logits/rejected": 0.06552217900753021, |
|
"logps/chosen": -461.4195861816406, |
|
"logps/rejected": -1150.258544921875, |
|
"loss": 0.4027, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.6265113353729248, |
|
"rewards/margins": 6.768563270568848, |
|
"rewards/rejected": -8.395073890686035, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.312215373764551e-07, |
|
"logits/chosen": 0.1772742122411728, |
|
"logits/rejected": 0.058857548981904984, |
|
"logps/chosen": -519.1689453125, |
|
"logps/rejected": -1075.103759765625, |
|
"loss": 0.4056, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.324723720550537, |
|
"rewards/margins": 5.8179826736450195, |
|
"rewards/rejected": -8.142705917358398, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.0109446990692963e-07, |
|
"logits/chosen": 0.09322932362556458, |
|
"logits/rejected": -0.021080341190099716, |
|
"logps/chosen": -524.8082275390625, |
|
"logps/rejected": -1263.429443359375, |
|
"loss": 0.404, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.233060121536255, |
|
"rewards/margins": 7.2954888343811035, |
|
"rewards/rejected": -9.528549194335938, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.725696330273575e-07, |
|
"logits/chosen": 0.12329642474651337, |
|
"logits/rejected": -0.045363299548625946, |
|
"logps/chosen": -477.84747314453125, |
|
"logps/rejected": -1159.287353515625, |
|
"loss": 0.3987, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4969019889831543, |
|
"rewards/margins": 7.274144172668457, |
|
"rewards/rejected": -8.77104663848877, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4579992911531496e-07, |
|
"logits/chosen": 0.13813820481300354, |
|
"logits/rejected": 0.06726070493459702, |
|
"logps/chosen": -596.8673706054688, |
|
"logps/rejected": -1229.910888671875, |
|
"loss": 0.3989, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.484358787536621, |
|
"rewards/margins": 6.708567142486572, |
|
"rewards/rejected": -9.192926406860352, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.209288524664029e-07, |
|
"logits/chosen": 0.2262219935655594, |
|
"logits/rejected": 0.04883592948317528, |
|
"logps/chosen": -571.9241333007812, |
|
"logps/rejected": -1147.636474609375, |
|
"loss": 0.3965, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.7885093688964844, |
|
"rewards/margins": 5.966012954711914, |
|
"rewards/rejected": -8.754522323608398, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.808972011828054e-08, |
|
"logits/chosen": 0.13919615745544434, |
|
"logits/rejected": 0.08005174249410629, |
|
"logps/chosen": -603.2689208984375, |
|
"logps/rejected": -1278.978271484375, |
|
"loss": 0.3993, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.6157753467559814, |
|
"rewards/margins": 7.164151668548584, |
|
"rewards/rejected": -9.779927253723145, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.740495722810269e-08, |
|
"logits/chosen": 0.1855761706829071, |
|
"logits/rejected": 0.03339262679219246, |
|
"logps/chosen": -554.6050415039062, |
|
"logps/rejected": -1247.11474609375, |
|
"loss": 0.4064, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.375999927520752, |
|
"rewards/margins": 7.147269248962402, |
|
"rewards/rejected": -9.523270606994629, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.898544083397e-08, |
|
"logits/chosen": 0.10612723976373672, |
|
"logits/rejected": -0.03204170614480972, |
|
"logps/chosen": -598.8375244140625, |
|
"logps/rejected": -1218.921142578125, |
|
"loss": 0.4009, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -3.0963997840881348, |
|
"rewards/margins": 6.186778545379639, |
|
"rewards/rejected": -9.283178329467773, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.292990551804171e-08, |
|
"logits/chosen": 0.3134514391422272, |
|
"logits/rejected": 0.1133495420217514, |
|
"logps/chosen": -560.297607421875, |
|
"logps/rejected": -1385.083251953125, |
|
"loss": 0.3991, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.669637680053711, |
|
"rewards/margins": 8.3246488571167, |
|
"rewards/rejected": -10.994285583496094, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.9324414157151367e-08, |
|
"logits/chosen": 0.14708609879016876, |
|
"logits/rejected": 0.05113764852285385, |
|
"logps/chosen": -646.3408203125, |
|
"logps/rejected": -1521.79345703125, |
|
"loss": 0.3999, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.122638702392578, |
|
"rewards/margins": 8.8574800491333, |
|
"rewards/rejected": -11.980117797851562, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.824189659787284e-08, |
|
"logits/chosen": 0.19891302287578583, |
|
"logits/rejected": 0.057393454015254974, |
|
"logps/chosen": -530.86865234375, |
|
"logps/rejected": -1372.778076171875, |
|
"loss": 0.3979, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.4631145000457764, |
|
"rewards/margins": 8.486894607543945, |
|
"rewards/rejected": -10.950007438659668, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.741758728888217e-09, |
|
"logits/chosen": 0.20876403152942657, |
|
"logits/rejected": 0.052755843847990036, |
|
"logps/chosen": -683.3274536132812, |
|
"logps/rejected": -1404.552978515625, |
|
"loss": 0.3915, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.3337600231170654, |
|
"rewards/margins": 7.872265815734863, |
|
"rewards/rejected": -11.206026077270508, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.869564046156459e-09, |
|
"logits/chosen": 0.2985457181930542, |
|
"logits/rejected": 0.15650448203086853, |
|
"logps/chosen": -468.8932189941406, |
|
"logps/rejected": -1197.56201171875, |
|
"loss": 0.3987, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7095565795898438, |
|
"rewards/margins": 7.608504295349121, |
|
"rewards/rejected": -9.318059921264648, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.567894177967325e-10, |
|
"logits/chosen": 0.17393910884857178, |
|
"logits/rejected": 0.02789122983813286, |
|
"logps/chosen": -607.3438720703125, |
|
"logps/rejected": -1505.235595703125, |
|
"loss": 0.3978, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.6819469928741455, |
|
"rewards/margins": 9.39558219909668, |
|
"rewards/rejected": -12.07752799987793, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.42718374404267445, |
|
"train_runtime": 6325.1171, |
|
"train_samples_per_second": 9.665, |
|
"train_steps_per_second": 0.075 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 477, |
|
"num_train_epochs": 1, |
|
"save_steps": 10000, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|