|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.7050857543945312, |
|
"logits/rejected": -2.7461352348327637, |
|
"logps/chosen": -137.25845336914062, |
|
"logps/rejected": -163.38693237304688, |
|
"loss": 0.2729, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.7312138080596924, |
|
"logits/rejected": -2.6957082748413086, |
|
"logps/chosen": -255.09744262695312, |
|
"logps/rejected": -238.90676879882812, |
|
"loss": 0.2787, |
|
"rewards/accuracies": 0.5069444179534912, |
|
"rewards/chosen": 0.0004420094774104655, |
|
"rewards/margins": 0.0008137564291246235, |
|
"rewards/rejected": -0.0003717469226103276, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.751615047454834, |
|
"logits/rejected": -2.7480320930480957, |
|
"logps/chosen": -265.4415283203125, |
|
"logps/rejected": -255.5920867919922, |
|
"loss": 0.2805, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0003345116856507957, |
|
"rewards/margins": 0.0004976954078301787, |
|
"rewards/rejected": -0.0008322072098962963, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.7395973205566406, |
|
"logits/rejected": -2.696704864501953, |
|
"logps/chosen": -253.4207763671875, |
|
"logps/rejected": -252.7050018310547, |
|
"loss": 0.2729, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0011635018745437264, |
|
"rewards/margins": 0.009263232350349426, |
|
"rewards/rejected": -0.010426735505461693, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.788689613342285, |
|
"logits/rejected": -2.7742645740509033, |
|
"logps/chosen": -274.7150573730469, |
|
"logps/rejected": -279.29449462890625, |
|
"loss": 0.2782, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0004261514113750309, |
|
"rewards/margins": 0.028552129864692688, |
|
"rewards/rejected": -0.02897828258574009, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999733114418725e-07, |
|
"logits/chosen": -2.757750988006592, |
|
"logits/rejected": -2.7366127967834473, |
|
"logps/chosen": -287.0828857421875, |
|
"logps/rejected": -297.19842529296875, |
|
"loss": 0.2674, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.05043640732765198, |
|
"rewards/margins": 0.03981485590338707, |
|
"rewards/rejected": -0.09025127440690994, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990398100856366e-07, |
|
"logits/chosen": -2.7765159606933594, |
|
"logits/rejected": -2.7514355182647705, |
|
"logps/chosen": -279.025634765625, |
|
"logps/rejected": -262.62744140625, |
|
"loss": 0.2429, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.09661928564310074, |
|
"rewards/margins": 0.1170525774359703, |
|
"rewards/rejected": -0.21367184817790985, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967775735898179e-07, |
|
"logits/chosen": -2.769787549972534, |
|
"logits/rejected": -2.7495346069335938, |
|
"logps/chosen": -276.04913330078125, |
|
"logps/rejected": -308.559326171875, |
|
"loss": 0.2089, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.20965734124183655, |
|
"rewards/margins": 0.126783087849617, |
|
"rewards/rejected": -0.33644038438796997, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931986719649298e-07, |
|
"logits/chosen": -2.7401986122131348, |
|
"logits/rejected": -2.711556911468506, |
|
"logps/chosen": -276.31353759765625, |
|
"logps/rejected": -298.213623046875, |
|
"loss": 0.1808, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.22973378002643585, |
|
"rewards/margins": 0.28976646065711975, |
|
"rewards/rejected": -0.519500195980072, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.883222001996351e-07, |
|
"logits/chosen": -2.772463321685791, |
|
"logits/rejected": -2.732743740081787, |
|
"logps/chosen": -317.74468994140625, |
|
"logps/rejected": -323.55963134765625, |
|
"loss": 0.1576, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.4003233015537262, |
|
"rewards/margins": 0.3519899547100067, |
|
"rewards/rejected": -0.7523131966590881, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.821741763807186e-07, |
|
"logits/chosen": -2.7798848152160645, |
|
"logits/rejected": -2.7546212673187256, |
|
"logps/chosen": -320.1632385253906, |
|
"logps/rejected": -350.14154052734375, |
|
"loss": 0.1368, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5383033752441406, |
|
"rewards/margins": 0.33278244733810425, |
|
"rewards/rejected": -0.8710858225822449, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.7293763160705566, |
|
"eval_logits/rejected": -2.711284637451172, |
|
"eval_logps/chosen": -329.0978088378906, |
|
"eval_logps/rejected": -373.81689453125, |
|
"eval_loss": 0.1234334409236908, |
|
"eval_rewards/accuracies": 0.6953125, |
|
"eval_rewards/chosen": -0.7205817699432373, |
|
"eval_rewards/margins": 0.44405466318130493, |
|
"eval_rewards/rejected": -1.1646363735198975, |
|
"eval_runtime": 53.59, |
|
"eval_samples_per_second": 37.32, |
|
"eval_steps_per_second": 0.597, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.747874028753375e-07, |
|
"logits/chosen": -2.697169780731201, |
|
"logits/rejected": -2.657909393310547, |
|
"logps/chosen": -359.20281982421875, |
|
"logps/rejected": -371.93121337890625, |
|
"loss": 0.1139, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6524524688720703, |
|
"rewards/margins": 0.42760229110717773, |
|
"rewards/rejected": -1.080054759979248, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.662012913161997e-07, |
|
"logits/chosen": -2.7332141399383545, |
|
"logits/rejected": -2.687659502029419, |
|
"logps/chosen": -341.48504638671875, |
|
"logps/rejected": -387.9088439941406, |
|
"loss": 0.1124, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7721944451332092, |
|
"rewards/margins": 0.531410813331604, |
|
"rewards/rejected": -1.303605079650879, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5646165232345103e-07, |
|
"logits/chosen": -2.7062783241271973, |
|
"logits/rejected": -2.677564859390259, |
|
"logps/chosen": -346.4920349121094, |
|
"logps/rejected": -363.054443359375, |
|
"loss": 0.1145, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8793987035751343, |
|
"rewards/margins": 0.42397230863571167, |
|
"rewards/rejected": -1.3033709526062012, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.456204510851956e-07, |
|
"logits/chosen": -2.6977787017822266, |
|
"logits/rejected": -2.6695046424865723, |
|
"logps/chosen": -345.95257568359375, |
|
"logps/rejected": -377.9122619628906, |
|
"loss": 0.1328, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5838460922241211, |
|
"rewards/margins": 0.4167053699493408, |
|
"rewards/rejected": -1.000551462173462, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.337355301007335e-07, |
|
"logits/chosen": -2.5606093406677246, |
|
"logits/rejected": -2.506340265274048, |
|
"logps/chosen": -330.40277099609375, |
|
"logps/rejected": -359.18701171875, |
|
"loss": 0.1337, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.5414291620254517, |
|
"rewards/margins": 0.5864211320877075, |
|
"rewards/rejected": -1.1278501749038696, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2087030056579986e-07, |
|
"logits/chosen": -2.536637544631958, |
|
"logits/rejected": -2.5124268531799316, |
|
"logps/chosen": -373.6593322753906, |
|
"logps/rejected": -393.8511962890625, |
|
"loss": 0.1208, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.040235996246338, |
|
"rewards/margins": 0.36871328949928284, |
|
"rewards/rejected": -1.4089492559432983, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.070934040463998e-07, |
|
"logits/chosen": -2.5194365978240967, |
|
"logits/rejected": -2.49861741065979, |
|
"logps/chosen": -422.2001953125, |
|
"logps/rejected": -421.9383850097656, |
|
"loss": 0.0982, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.164865255355835, |
|
"rewards/margins": 0.41925472021102905, |
|
"rewards/rejected": -1.5841200351715088, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.9247834624635404e-07, |
|
"logits/chosen": -2.514953136444092, |
|
"logits/rejected": -2.522972583770752, |
|
"logps/chosen": -360.35504150390625, |
|
"logps/rejected": -405.37847900390625, |
|
"loss": 0.1026, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9703041315078735, |
|
"rewards/margins": 0.5068725347518921, |
|
"rewards/rejected": -1.4771766662597656, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7710310482256523e-07, |
|
"logits/chosen": -2.483044385910034, |
|
"logits/rejected": -2.4624695777893066, |
|
"logps/chosen": -340.3351745605469, |
|
"logps/rejected": -388.73944091796875, |
|
"loss": 0.1019, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9740051031112671, |
|
"rewards/margins": 0.5462032556533813, |
|
"rewards/rejected": -1.520208477973938, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.610497133404795e-07, |
|
"logits/chosen": -2.4317831993103027, |
|
"logits/rejected": -2.4145891666412354, |
|
"logps/chosen": -375.5243225097656, |
|
"logps/rejected": -440.06951904296875, |
|
"loss": 0.0936, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2799049615859985, |
|
"rewards/margins": 0.6007462739944458, |
|
"rewards/rejected": -1.8806512355804443, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -2.4997453689575195, |
|
"eval_logits/rejected": -2.484400749206543, |
|
"eval_logps/chosen": -361.1695556640625, |
|
"eval_logps/rejected": -433.0509948730469, |
|
"eval_loss": 0.10587478429079056, |
|
"eval_rewards/accuracies": 0.7421875, |
|
"eval_rewards/chosen": -1.0412991046905518, |
|
"eval_rewards/margins": 0.715677797794342, |
|
"eval_rewards/rejected": -1.756976842880249, |
|
"eval_runtime": 53.5221, |
|
"eval_samples_per_second": 37.368, |
|
"eval_steps_per_second": 0.598, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4440382358952115e-07, |
|
"logits/chosen": -2.456087827682495, |
|
"logits/rejected": -2.4703195095062256, |
|
"logps/chosen": -393.5277099609375, |
|
"logps/rejected": -452.48944091796875, |
|
"loss": 0.0942, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.8982565999031067, |
|
"rewards/margins": 0.733210027217865, |
|
"rewards/rejected": -1.6314666271209717, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": -2.432919979095459, |
|
"logits/rejected": -2.4045770168304443, |
|
"logps/chosen": -383.92626953125, |
|
"logps/rejected": -415.4346618652344, |
|
"loss": 0.097, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2355773448944092, |
|
"rewards/margins": 0.5543798804283142, |
|
"rewards/rejected": -1.789957046508789, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.096924887558854e-07, |
|
"logits/chosen": -2.447158098220825, |
|
"logits/rejected": -2.4324896335601807, |
|
"logps/chosen": -353.08367919921875, |
|
"logps/rejected": -397.47900390625, |
|
"loss": 0.1068, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9431230425834656, |
|
"rewards/margins": 0.58272784948349, |
|
"rewards/rejected": -1.5258508920669556, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9181224366319943e-07, |
|
"logits/chosen": -2.369117021560669, |
|
"logits/rejected": -2.3718531131744385, |
|
"logps/chosen": -412.95733642578125, |
|
"logps/rejected": -438.8494567871094, |
|
"loss": 0.1096, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.016252040863037, |
|
"rewards/margins": 0.6379404067993164, |
|
"rewards/rejected": -1.654192328453064, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7370891215954565e-07, |
|
"logits/chosen": -2.3740622997283936, |
|
"logits/rejected": -2.363107204437256, |
|
"logps/chosen": -373.68603515625, |
|
"logps/rejected": -428.0882873535156, |
|
"loss": 0.1033, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0415781736373901, |
|
"rewards/margins": 0.6691091656684875, |
|
"rewards/rejected": -1.7106873989105225, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.55479083351317e-07, |
|
"logits/chosen": -2.3177034854888916, |
|
"logits/rejected": -2.2815842628479004, |
|
"logps/chosen": -427.379638671875, |
|
"logps/rejected": -440.7774963378906, |
|
"loss": 0.096, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.3660972118377686, |
|
"rewards/margins": 0.6263972520828247, |
|
"rewards/rejected": -1.9924943447113037, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3722002126275822e-07, |
|
"logits/chosen": -2.3047027587890625, |
|
"logits/rejected": -2.273170232772827, |
|
"logps/chosen": -394.751220703125, |
|
"logps/rejected": -439.4383850097656, |
|
"loss": 0.1076, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2442924976348877, |
|
"rewards/margins": 0.6077025532722473, |
|
"rewards/rejected": -1.8519952297210693, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.19029145890313e-07, |
|
"logits/chosen": -2.3437588214874268, |
|
"logits/rejected": -2.3205742835998535, |
|
"logps/chosen": -443.5282287597656, |
|
"logps/rejected": -496.1018981933594, |
|
"loss": 0.1019, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.247488021850586, |
|
"rewards/margins": 0.7255340218544006, |
|
"rewards/rejected": -1.973022222518921, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0100351342479216e-07, |
|
"logits/chosen": -2.310223340988159, |
|
"logits/rejected": -2.295687198638916, |
|
"logps/chosen": -403.1307373046875, |
|
"logps/rejected": -424.34088134765625, |
|
"loss": 0.1011, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3174023628234863, |
|
"rewards/margins": 0.6217811107635498, |
|
"rewards/rejected": -1.9391834735870361, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8323929841460178e-07, |
|
"logits/chosen": -2.3279759883880615, |
|
"logits/rejected": -2.2855122089385986, |
|
"logps/chosen": -419.40191650390625, |
|
"logps/rejected": -439.6763610839844, |
|
"loss": 0.1045, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.2264329195022583, |
|
"rewards/margins": 0.6521132588386536, |
|
"rewards/rejected": -1.8785459995269775, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -2.3481948375701904, |
|
"eval_logits/rejected": -2.3263120651245117, |
|
"eval_logps/chosen": -374.2532653808594, |
|
"eval_logps/rejected": -455.86981201171875, |
|
"eval_loss": 0.10495973378419876, |
|
"eval_rewards/accuracies": 0.7734375, |
|
"eval_rewards/chosen": -1.1721361875534058, |
|
"eval_rewards/margins": 0.8130289316177368, |
|
"eval_rewards/rejected": -1.9851651191711426, |
|
"eval_runtime": 53.5201, |
|
"eval_samples_per_second": 37.369, |
|
"eval_steps_per_second": 0.598, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6583128063291573e-07, |
|
"logits/chosen": -2.2640485763549805, |
|
"logits/rejected": -2.2166085243225098, |
|
"logps/chosen": -430.3575744628906, |
|
"logps/rejected": -452.4070739746094, |
|
"loss": 0.1066, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3840069770812988, |
|
"rewards/margins": 0.6664990186691284, |
|
"rewards/rejected": -2.050506114959717, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.488723393865766e-07, |
|
"logits/chosen": -2.330543041229248, |
|
"logits/rejected": -2.3223278522491455, |
|
"logps/chosen": -373.0736083984375, |
|
"logps/rejected": -430.39093017578125, |
|
"loss": 0.0984, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.1866815090179443, |
|
"rewards/margins": 0.6963299512863159, |
|
"rewards/rejected": -1.8830114603042603, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3245295796480788e-07, |
|
"logits/chosen": -2.3311431407928467, |
|
"logits/rejected": -2.3115246295928955, |
|
"logps/chosen": -381.23162841796875, |
|
"logps/rejected": -418.2054748535156, |
|
"loss": 0.1052, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2116649150848389, |
|
"rewards/margins": 0.44343310594558716, |
|
"rewards/rejected": -1.6550979614257812, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1666074087171627e-07, |
|
"logits/chosen": -2.3534882068634033, |
|
"logits/rejected": -2.3318495750427246, |
|
"logps/chosen": -390.70855712890625, |
|
"logps/rejected": -460.699462890625, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.3004801273345947, |
|
"rewards/margins": 0.6981537938117981, |
|
"rewards/rejected": -1.9986339807510376, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0157994641835734e-07, |
|
"logits/chosen": -2.3172473907470703, |
|
"logits/rejected": -2.267843723297119, |
|
"logps/chosen": -428.8907775878906, |
|
"logps/rejected": -482.414794921875, |
|
"loss": 0.0953, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2873867750167847, |
|
"rewards/margins": 0.7597323656082153, |
|
"rewards/rejected": -2.047119140625, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.729103716819111e-08, |
|
"logits/chosen": -2.361574411392212, |
|
"logits/rejected": -2.313871383666992, |
|
"logps/chosen": -432.11572265625, |
|
"logps/rejected": -472.91900634765625, |
|
"loss": 0.0982, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.304806113243103, |
|
"rewards/margins": 0.7939103841781616, |
|
"rewards/rejected": -2.0987167358398438, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.387025063449081e-08, |
|
"logits/chosen": -2.31288480758667, |
|
"logits/rejected": -2.290679454803467, |
|
"logps/chosen": -437.47747802734375, |
|
"logps/rejected": -498.04449462890625, |
|
"loss": 0.091, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3451701402664185, |
|
"rewards/margins": 0.849733829498291, |
|
"rewards/rejected": -2.19490385055542, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.138919252022435e-08, |
|
"logits/chosen": -2.2611796855926514, |
|
"logits/rejected": -2.2090706825256348, |
|
"logps/chosen": -438.38714599609375, |
|
"logps/rejected": -479.932373046875, |
|
"loss": 0.1004, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.458297610282898, |
|
"rewards/margins": 0.8283805847167969, |
|
"rewards/rejected": -2.2866783142089844, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.991445467064689e-08, |
|
"logits/chosen": -2.280311107635498, |
|
"logits/rejected": -2.2701356410980225, |
|
"logps/chosen": -410.5409240722656, |
|
"logps/rejected": -455.7091369628906, |
|
"loss": 0.0997, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.457076072692871, |
|
"rewards/margins": 0.6067465543746948, |
|
"rewards/rejected": -2.0638227462768555, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9507259776993954e-08, |
|
"logits/chosen": -2.3210911750793457, |
|
"logits/rejected": -2.3131260871887207, |
|
"logps/chosen": -401.9046936035156, |
|
"logps/rejected": -457.5972595214844, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.4521684646606445, |
|
"rewards/margins": 0.6101087331771851, |
|
"rewards/rejected": -2.062277317047119, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -2.326262950897217, |
|
"eval_logits/rejected": -2.3033149242401123, |
|
"eval_logps/chosen": -388.9528503417969, |
|
"eval_logps/rejected": -474.4743347167969, |
|
"eval_loss": 0.09840647131204605, |
|
"eval_rewards/accuracies": 0.76953125, |
|
"eval_rewards/chosen": -1.3191319704055786, |
|
"eval_rewards/margins": 0.8520787954330444, |
|
"eval_rewards/rejected": -2.171210765838623, |
|
"eval_runtime": 53.5129, |
|
"eval_samples_per_second": 37.374, |
|
"eval_steps_per_second": 0.598, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.022313472693447e-08, |
|
"logits/chosen": -2.2848544120788574, |
|
"logits/rejected": -2.247723340988159, |
|
"logps/chosen": -388.43853759765625, |
|
"logps/rejected": -440.38482666015625, |
|
"loss": 0.0985, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.3306916952133179, |
|
"rewards/margins": 0.6538442969322205, |
|
"rewards/rejected": -1.9845361709594727, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2111614344599684e-08, |
|
"logits/chosen": -2.3198795318603516, |
|
"logits/rejected": -2.270514488220215, |
|
"logps/chosen": -397.82763671875, |
|
"logps/rejected": -440.84564208984375, |
|
"loss": 0.0899, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.2681812047958374, |
|
"rewards/margins": 0.7547353506088257, |
|
"rewards/rejected": -2.022916555404663, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.521597710086439e-08, |
|
"logits/chosen": -2.2713308334350586, |
|
"logits/rejected": -2.2533681392669678, |
|
"logps/chosen": -420.921142578125, |
|
"logps/rejected": -457.1463928222656, |
|
"loss": 0.0955, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3613792657852173, |
|
"rewards/margins": 0.7089418172836304, |
|
"rewards/rejected": -2.0703210830688477, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.57301420397924e-09, |
|
"logits/chosen": -2.284634590148926, |
|
"logits/rejected": -2.2636537551879883, |
|
"logps/chosen": -399.95465087890625, |
|
"logps/rejected": -473.30303955078125, |
|
"loss": 0.0907, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4010311365127563, |
|
"rewards/margins": 0.7459251284599304, |
|
"rewards/rejected": -2.146956443786621, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.212833302556258e-09, |
|
"logits/chosen": -2.3177847862243652, |
|
"logits/rejected": -2.2959697246551514, |
|
"logps/chosen": -393.1022644042969, |
|
"logps/rejected": -452.5653381347656, |
|
"loss": 0.096, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.2756528854370117, |
|
"rewards/margins": 0.722722053527832, |
|
"rewards/rejected": -1.9983749389648438, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.158697848236607e-09, |
|
"logits/chosen": -2.3011326789855957, |
|
"logits/rejected": -2.251350164413452, |
|
"logps/chosen": -408.0278015136719, |
|
"logps/rejected": -439.8304138183594, |
|
"loss": 0.0936, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.4015414714813232, |
|
"rewards/margins": 0.723892867565155, |
|
"rewards/rejected": -2.125434398651123, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.269029751107489e-10, |
|
"logits/chosen": -2.2898736000061035, |
|
"logits/rejected": -2.260240077972412, |
|
"logps/chosen": -394.651611328125, |
|
"logps/rejected": -457.83123779296875, |
|
"loss": 0.0951, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.2343953847885132, |
|
"rewards/margins": 0.8395439982414246, |
|
"rewards/rejected": -2.073939323425293, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 478, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1291348352467166, |
|
"train_runtime": 3954.3407, |
|
"train_samples_per_second": 15.46, |
|
"train_steps_per_second": 0.121 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|