|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9988623435722411, |
|
"eval_steps": 10000000, |
|
"global_step": 439, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 33.30332403665468, |
|
"learning_rate": 2.2727272727272727e-09, |
|
"logits/chosen": -1.6768856048583984, |
|
"logits/rejected": -1.7259055376052856, |
|
"logps/chosen": -1.2793102264404297, |
|
"logps/rejected": -1.2162058353424072, |
|
"loss": 1.3133, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 37.64303926905523, |
|
"learning_rate": 2.2727272727272725e-08, |
|
"logits/chosen": -1.7033135890960693, |
|
"logits/rejected": -1.668673038482666, |
|
"logps/chosen": -1.2131016254425049, |
|
"logps/rejected": -1.22050142288208, |
|
"loss": 1.313, |
|
"rewards/accuracies": 0.4513888955116272, |
|
"rewards/chosen": 0.00040783319855108857, |
|
"rewards/margins": -8.263149356935173e-05, |
|
"rewards/rejected": 0.0004904646775685251, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 38.69260337999141, |
|
"learning_rate": 4.545454545454545e-08, |
|
"logits/chosen": -1.7795250415802002, |
|
"logits/rejected": -1.7348783016204834, |
|
"logps/chosen": -1.1448484659194946, |
|
"logps/rejected": -1.1852957010269165, |
|
"loss": 1.3122, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.003251913469284773, |
|
"rewards/margins": 0.0004202231648378074, |
|
"rewards/rejected": -0.003672136692330241, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 44.09359407998382, |
|
"learning_rate": 6.818181818181817e-08, |
|
"logits/chosen": -1.7442439794540405, |
|
"logits/rejected": -1.6752439737319946, |
|
"logps/chosen": -1.1954559087753296, |
|
"logps/rejected": -1.248280644416809, |
|
"loss": 1.3059, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.025108838453888893, |
|
"rewards/margins": 0.01119022723287344, |
|
"rewards/rejected": -0.03629906848073006, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 28.883029165176804, |
|
"learning_rate": 9.09090909090909e-08, |
|
"logits/chosen": -1.7305904626846313, |
|
"logits/rejected": -1.6642875671386719, |
|
"logps/chosen": -1.2533624172210693, |
|
"logps/rejected": -1.3383153676986694, |
|
"loss": 1.2922, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.09282750636339188, |
|
"rewards/margins": 0.06525905430316925, |
|
"rewards/rejected": -0.15808656811714172, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 36.39900209589975, |
|
"learning_rate": 9.994307990108962e-08, |
|
"logits/chosen": -1.690720796585083, |
|
"logits/rejected": -1.625451683998108, |
|
"logps/chosen": -1.3044583797454834, |
|
"logps/rejected": -1.3643444776535034, |
|
"loss": 1.2643, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.19274269044399261, |
|
"rewards/margins": 0.0795869454741478, |
|
"rewards/rejected": -0.2723296284675598, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 43.081578827458706, |
|
"learning_rate": 9.959570405988094e-08, |
|
"logits/chosen": -1.71735417842865, |
|
"logits/rejected": -1.6361076831817627, |
|
"logps/chosen": -1.3119524717330933, |
|
"logps/rejected": -1.4046932458877563, |
|
"loss": 1.2541, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4057086110115051, |
|
"rewards/margins": 0.0816243588924408, |
|
"rewards/rejected": -0.4873329699039459, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 33.06897498171632, |
|
"learning_rate": 9.893476820924666e-08, |
|
"logits/chosen": -1.7922325134277344, |
|
"logits/rejected": -1.7017757892608643, |
|
"logps/chosen": -1.5047810077667236, |
|
"logps/rejected": -1.630091667175293, |
|
"loss": 1.2355, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5838777422904968, |
|
"rewards/margins": 0.1572917252779007, |
|
"rewards/rejected": -0.7411695718765259, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 37.24284057004877, |
|
"learning_rate": 9.796445099843647e-08, |
|
"logits/chosen": -1.774518370628357, |
|
"logits/rejected": -1.6856935024261475, |
|
"logps/chosen": -1.5832115411758423, |
|
"logps/rejected": -1.7514270544052124, |
|
"loss": 1.232, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7524863481521606, |
|
"rewards/margins": 0.21537098288536072, |
|
"rewards/rejected": -0.9678572416305542, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 45.064021238231845, |
|
"learning_rate": 9.669088708527066e-08, |
|
"logits/chosen": -1.7184202671051025, |
|
"logits/rejected": -1.6467373371124268, |
|
"logps/chosen": -1.7363929748535156, |
|
"logps/rejected": -1.8083902597427368, |
|
"loss": 1.2104, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.0013912916183472, |
|
"rewards/margins": 0.12990526854991913, |
|
"rewards/rejected": -1.1312966346740723, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 44.286763175528534, |
|
"learning_rate": 9.512212835085849e-08, |
|
"logits/chosen": -1.757889986038208, |
|
"logits/rejected": -1.6645339727401733, |
|
"logps/chosen": -1.779813528060913, |
|
"logps/rejected": -1.9353383779525757, |
|
"loss": 1.1819, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2227165699005127, |
|
"rewards/margins": 0.2256297618150711, |
|
"rewards/rejected": -1.448346495628357, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 43.36692624974112, |
|
"learning_rate": 9.326809299301306e-08, |
|
"logits/chosen": -1.761940360069275, |
|
"logits/rejected": -1.6550146341323853, |
|
"logps/chosen": -1.8854389190673828, |
|
"logps/rejected": -2.1229450702667236, |
|
"loss": 1.1674, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.369593620300293, |
|
"rewards/margins": 0.371805876493454, |
|
"rewards/rejected": -1.7413995265960693, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 41.78554813342914, |
|
"learning_rate": 9.114050282021158e-08, |
|
"logits/chosen": -1.7491047382354736, |
|
"logits/rejected": -1.6867637634277344, |
|
"logps/chosen": -1.8475677967071533, |
|
"logps/rejected": -2.0627474784851074, |
|
"loss": 1.1591, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.400010108947754, |
|
"rewards/margins": 0.3355749249458313, |
|
"rewards/rejected": -1.7355849742889404, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 40.42754129950971, |
|
"learning_rate": 8.875280914254802e-08, |
|
"logits/chosen": -1.737173080444336, |
|
"logits/rejected": -1.644561529159546, |
|
"logps/chosen": -2.0521700382232666, |
|
"logps/rejected": -2.296677827835083, |
|
"loss": 1.1348, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.7186797857284546, |
|
"rewards/margins": 0.43216562271118164, |
|
"rewards/rejected": -2.1508452892303467, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 39.13812568144021, |
|
"learning_rate": 8.612010772821971e-08, |
|
"logits/chosen": -1.7612278461456299, |
|
"logits/rejected": -1.715679407119751, |
|
"logps/chosen": -2.0781049728393555, |
|
"logps/rejected": -2.2759194374084473, |
|
"loss": 1.127, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.7284520864486694, |
|
"rewards/margins": 0.4454485774040222, |
|
"rewards/rejected": -2.173900604248047, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 37.596667789585375, |
|
"learning_rate": 8.325904336322055e-08, |
|
"logits/chosen": -1.735419511795044, |
|
"logits/rejected": -1.6814868450164795, |
|
"logps/chosen": -2.305412769317627, |
|
"logps/rejected": -2.55448317527771, |
|
"loss": 1.1399, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.2760961055755615, |
|
"rewards/margins": 0.4037933945655823, |
|
"rewards/rejected": -2.679889440536499, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 37.85769539137667, |
|
"learning_rate": 8.01877046176447e-08, |
|
"logits/chosen": -1.6751991510391235, |
|
"logits/rejected": -1.6064836978912354, |
|
"logps/chosen": -2.5598020553588867, |
|
"logps/rejected": -2.8157076835632324, |
|
"loss": 1.09, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.763003349304199, |
|
"rewards/margins": 0.40903931856155396, |
|
"rewards/rejected": -3.1720428466796875, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 33.932219318133306, |
|
"learning_rate": 7.692550948392249e-08, |
|
"logits/chosen": -1.7231628894805908, |
|
"logits/rejected": -1.6755987405776978, |
|
"logps/chosen": -2.624762535095215, |
|
"logps/rejected": -2.9136133193969727, |
|
"loss": 1.1053, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.8290135860443115, |
|
"rewards/margins": 0.5568121671676636, |
|
"rewards/rejected": -3.3858256340026855, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 48.11500069751816, |
|
"learning_rate": 7.349308261002021e-08, |
|
"logits/chosen": -1.6858348846435547, |
|
"logits/rejected": -1.6378986835479736, |
|
"logps/chosen": -2.640817165374756, |
|
"logps/rejected": -2.949113368988037, |
|
"loss": 1.0837, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.887462615966797, |
|
"rewards/margins": 0.5172919034957886, |
|
"rewards/rejected": -3.404754638671875, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 42.33388198011932, |
|
"learning_rate": 6.991212490377531e-08, |
|
"logits/chosen": -1.7423484325408936, |
|
"logits/rejected": -1.7037559747695923, |
|
"logps/chosen": -2.6472008228302, |
|
"logps/rejected": -3.0077877044677734, |
|
"loss": 1.0335, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.834195613861084, |
|
"rewards/margins": 0.6780903339385986, |
|
"rewards/rejected": -3.5122859477996826, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 49.65676542149092, |
|
"learning_rate": 6.620527633276978e-08, |
|
"logits/chosen": -1.6741564273834229, |
|
"logits/rejected": -1.6151821613311768, |
|
"logps/chosen": -2.735678195953369, |
|
"logps/rejected": -3.225632905960083, |
|
"loss": 1.0663, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.0824506282806396, |
|
"rewards/margins": 0.80633145570755, |
|
"rewards/rejected": -3.888781785964966, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 46.53275655997813, |
|
"learning_rate": 6.239597278716581e-08, |
|
"logits/chosen": -1.7146323919296265, |
|
"logits/rejected": -1.6657183170318604, |
|
"logps/chosen": -3.098931312561035, |
|
"logps/rejected": -3.467923641204834, |
|
"loss": 1.0287, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.707202911376953, |
|
"rewards/margins": 0.7793115377426147, |
|
"rewards/rejected": -4.486514091491699, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 47.77625681519385, |
|
"learning_rate": 5.8508297910462456e-08, |
|
"logits/chosen": -1.6560382843017578, |
|
"logits/rejected": -1.5879056453704834, |
|
"logps/chosen": -3.1243553161621094, |
|
"logps/rejected": -3.6013519763946533, |
|
"loss": 1.032, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.921679735183716, |
|
"rewards/margins": 0.8384466171264648, |
|
"rewards/rejected": -4.76012659072876, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 45.3152158322423, |
|
"learning_rate": 5.456683083494731e-08, |
|
"logits/chosen": -1.6423381567001343, |
|
"logits/rejected": -1.6075971126556396, |
|
"logps/chosen": -3.002626419067383, |
|
"logps/rejected": -3.339411497116089, |
|
"loss": 1.068, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -3.6176345348358154, |
|
"rewards/margins": 0.6260865330696106, |
|
"rewards/rejected": -4.2437214851379395, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 50.18712381426658, |
|
"learning_rate": 5.059649078450834e-08, |
|
"logits/chosen": -1.6221996545791626, |
|
"logits/rejected": -1.587894082069397, |
|
"logps/chosen": -2.9972426891326904, |
|
"logps/rejected": -3.4068732261657715, |
|
"loss": 1.0045, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -3.657778263092041, |
|
"rewards/margins": 0.6951833963394165, |
|
"rewards/rejected": -4.352961540222168, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 42.94625970616266, |
|
"learning_rate": 4.6622379527277186e-08, |
|
"logits/chosen": -1.6361802816390991, |
|
"logits/rejected": -1.5883018970489502, |
|
"logps/chosen": -3.0472984313964844, |
|
"logps/rejected": -3.4067275524139404, |
|
"loss": 1.0159, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -3.816819429397583, |
|
"rewards/margins": 0.65269935131073, |
|
"rewards/rejected": -4.469518661499023, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 49.81189434860217, |
|
"learning_rate": 4.26696226741691e-08, |
|
"logits/chosen": -1.6441590785980225, |
|
"logits/rejected": -1.5848346948623657, |
|
"logps/chosen": -3.2412009239196777, |
|
"logps/rejected": -3.667572021484375, |
|
"loss": 1.0333, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -4.0167555809021, |
|
"rewards/margins": 0.8128072619438171, |
|
"rewards/rejected": -4.829562664031982, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 46.698998113891435, |
|
"learning_rate": 3.876321082668098e-08, |
|
"logits/chosen": -1.6987736225128174, |
|
"logits/rejected": -1.6376842260360718, |
|
"logps/chosen": -3.1670312881469727, |
|
"logps/rejected": -3.625418186187744, |
|
"loss": 1.0046, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.857081174850464, |
|
"rewards/margins": 0.8981560468673706, |
|
"rewards/rejected": -4.755237579345703, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 54.35348471111713, |
|
"learning_rate": 3.492784157826244e-08, |
|
"logits/chosen": -1.63980233669281, |
|
"logits/rejected": -1.552004337310791, |
|
"logps/chosen": -3.2830092906951904, |
|
"logps/rejected": -3.8152382373809814, |
|
"loss": 1.0119, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -4.120265483856201, |
|
"rewards/margins": 0.9936790466308594, |
|
"rewards/rejected": -5.113945007324219, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 44.812750561614926, |
|
"learning_rate": 3.118776336817812e-08, |
|
"logits/chosen": -1.6625276803970337, |
|
"logits/rejected": -1.6122783422470093, |
|
"logps/chosen": -3.191256284713745, |
|
"logps/rejected": -3.756882429122925, |
|
"loss": 0.9859, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.97601580619812, |
|
"rewards/margins": 1.1168193817138672, |
|
"rewards/rejected": -5.092835426330566, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 45.89134253017904, |
|
"learning_rate": 2.7566622175067443e-08, |
|
"logits/chosen": -1.6413261890411377, |
|
"logits/rejected": -1.5825086832046509, |
|
"logps/chosen": -3.339484691619873, |
|
"logps/rejected": -3.9588654041290283, |
|
"loss": 0.994, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -4.261802673339844, |
|
"rewards/margins": 1.0827885866165161, |
|
"rewards/rejected": -5.3445916175842285, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 47.840562340740895, |
|
"learning_rate": 2.408731201945432e-08, |
|
"logits/chosen": -1.64263117313385, |
|
"logits/rejected": -1.6013950109481812, |
|
"logps/chosen": -3.251277446746826, |
|
"logps/rejected": -3.651395082473755, |
|
"loss": 1.0008, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -4.11476469039917, |
|
"rewards/margins": 0.7599252462387085, |
|
"rewards/rejected": -4.874690532684326, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 51.79356167073485, |
|
"learning_rate": 2.0771830220378112e-08, |
|
"logits/chosen": -1.5991486310958862, |
|
"logits/rejected": -1.5396713018417358, |
|
"logps/chosen": -3.2509543895721436, |
|
"logps/rejected": -3.6364498138427734, |
|
"loss": 1.0066, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -4.056139945983887, |
|
"rewards/margins": 0.7866916060447693, |
|
"rewards/rejected": -4.842831611633301, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 43.99284684689101, |
|
"learning_rate": 1.7641138321260257e-08, |
|
"logits/chosen": -1.6334537267684937, |
|
"logits/rejected": -1.5692901611328125, |
|
"logps/chosen": -3.158041477203369, |
|
"logps/rejected": -3.8241424560546875, |
|
"loss": 0.9807, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.9483726024627686, |
|
"rewards/margins": 1.2771327495574951, |
|
"rewards/rejected": -5.225505352020264, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 50.47520523412627, |
|
"learning_rate": 1.4715029564277793e-08, |
|
"logits/chosen": -1.6923463344573975, |
|
"logits/rejected": -1.6500104665756226, |
|
"logps/chosen": -3.0949554443359375, |
|
"logps/rejected": -3.6954338550567627, |
|
"loss": 1.0051, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.7805895805358887, |
|
"rewards/margins": 1.1268298625946045, |
|
"rewards/rejected": -4.907419681549072, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 48.63775480340643, |
|
"learning_rate": 1.2012003751113343e-08, |
|
"logits/chosen": -1.6796951293945312, |
|
"logits/rejected": -1.6264684200286865, |
|
"logps/chosen": -3.3736748695373535, |
|
"logps/rejected": -3.9650447368621826, |
|
"loss": 0.9726, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -4.423010349273682, |
|
"rewards/margins": 1.0823583602905273, |
|
"rewards/rejected": -5.505368709564209, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 55.26630420954737, |
|
"learning_rate": 9.549150281252633e-09, |
|
"logits/chosen": -1.6259968280792236, |
|
"logits/rejected": -1.5858485698699951, |
|
"logps/chosen": -3.211542844772339, |
|
"logps/rejected": -3.735614776611328, |
|
"loss": 0.9729, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -4.078815460205078, |
|
"rewards/margins": 0.9446828961372375, |
|
"rewards/rejected": -5.02349853515625, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 51.52261591377872, |
|
"learning_rate": 7.3420401072985306e-09, |
|
"logits/chosen": -1.6755279302597046, |
|
"logits/rejected": -1.6221554279327393, |
|
"logps/chosen": -3.299112319946289, |
|
"logps/rejected": -3.911120653152466, |
|
"loss": 0.9649, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -4.168996810913086, |
|
"rewards/margins": 1.1090896129608154, |
|
"rewards/rejected": -5.2780866622924805, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 51.72886520205544, |
|
"learning_rate": 5.404627290395369e-09, |
|
"logits/chosen": -1.6374752521514893, |
|
"logits/rejected": -1.5786619186401367, |
|
"logps/chosen": -3.220484972000122, |
|
"logps/rejected": -3.803584337234497, |
|
"loss": 0.968, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -4.061758518218994, |
|
"rewards/margins": 1.0862071514129639, |
|
"rewards/rejected": -5.147965431213379, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 55.56376010319163, |
|
"learning_rate": 3.74916077816162e-09, |
|
"logits/chosen": -1.6384235620498657, |
|
"logits/rejected": -1.5836341381072998, |
|
"logps/chosen": -3.2395005226135254, |
|
"logps/rejected": -3.746983051300049, |
|
"loss": 1.0011, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -4.179410934448242, |
|
"rewards/margins": 0.982707142829895, |
|
"rewards/rejected": -5.162117958068848, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 54.81143409505458, |
|
"learning_rate": 2.386106962899165e-09, |
|
"logits/chosen": -1.5698174238204956, |
|
"logits/rejected": -1.5115009546279907, |
|
"logps/chosen": -3.4176878929138184, |
|
"logps/rejected": -3.958037853240967, |
|
"loss": 0.9695, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -4.382534027099609, |
|
"rewards/margins": 0.9680202603340149, |
|
"rewards/rejected": -5.350554466247559, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 56.75402221437199, |
|
"learning_rate": 1.3240835096913706e-09, |
|
"logits/chosen": -1.594696283340454, |
|
"logits/rejected": -1.502890944480896, |
|
"logps/chosen": -3.213305711746216, |
|
"logps/rejected": -3.921264171600342, |
|
"loss": 1.0286, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -4.083509922027588, |
|
"rewards/margins": 1.2770874500274658, |
|
"rewards/rejected": -5.360597133636475, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 47.085112169528884, |
|
"learning_rate": 5.698048727497462e-10, |
|
"logits/chosen": -1.6298091411590576, |
|
"logits/rejected": -1.5658090114593506, |
|
"logps/chosen": -3.3380351066589355, |
|
"logps/rejected": -3.9660251140594482, |
|
"loss": 0.983, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -4.293475151062012, |
|
"rewards/margins": 1.1220663785934448, |
|
"rewards/rejected": -5.415541648864746, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 46.70771599324875, |
|
"learning_rate": 1.2803984447259387e-10, |
|
"logits/chosen": -1.6368719339370728, |
|
"logits/rejected": -1.5942411422729492, |
|
"logps/chosen": -3.3361122608184814, |
|
"logps/rejected": -3.98066782951355, |
|
"loss": 0.9434, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -4.3075456619262695, |
|
"rewards/margins": 1.1938055753707886, |
|
"rewards/rejected": -5.501351356506348, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 439, |
|
"total_flos": 0.0, |
|
"train_loss": 1.0809600353240967, |
|
"train_runtime": 6838.8864, |
|
"train_samples_per_second": 8.223, |
|
"train_steps_per_second": 0.064 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 439, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|