|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9988623435722411, |
|
"eval_steps": 10000000, |
|
"global_step": 439, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 23.98402260612519, |
|
"learning_rate": 2.2727272727272727e-09, |
|
"logits/chosen": -1.6768856048583984, |
|
"logits/rejected": -1.7259055376052856, |
|
"logps/chosen": -394.9654541015625, |
|
"logps/rejected": -320.0859069824219, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 31.95891283237601, |
|
"learning_rate": 2.2727272727272725e-08, |
|
"logits/chosen": -1.7029528617858887, |
|
"logits/rejected": -1.6683764457702637, |
|
"logps/chosen": -429.5246887207031, |
|
"logps/rejected": -403.747314453125, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/chosen": 2.8328322514425963e-05, |
|
"rewards/margins": -0.0006085141212679446, |
|
"rewards/rejected": 0.0006368425092659891, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 49.89152060792663, |
|
"learning_rate": 4.545454545454545e-08, |
|
"logits/chosen": -1.7806730270385742, |
|
"logits/rejected": -1.7358741760253906, |
|
"logps/chosen": -442.21636962890625, |
|
"logps/rejected": -401.44000244140625, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0007078830385580659, |
|
"rewards/margins": 0.0019328873604536057, |
|
"rewards/rejected": -0.0026407705154269934, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 35.37044354910807, |
|
"learning_rate": 6.818181818181817e-08, |
|
"logits/chosen": -1.751228928565979, |
|
"logits/rejected": -1.6827186346054077, |
|
"logps/chosen": -440.0089416503906, |
|
"logps/rejected": -401.7633361816406, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.002162130083888769, |
|
"rewards/margins": 0.012089937925338745, |
|
"rewards/rejected": -0.014252068474888802, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 24.433299552566275, |
|
"learning_rate": 9.09090909090909e-08, |
|
"logits/chosen": -1.7572921514511108, |
|
"logits/rejected": -1.6904194355010986, |
|
"logps/chosen": -431.66802978515625, |
|
"logps/rejected": -390.01898193359375, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.00594430323690176, |
|
"rewards/margins": 0.07497542351484299, |
|
"rewards/rejected": -0.06903111934661865, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 31.92529179185726, |
|
"learning_rate": 9.994307990108962e-08, |
|
"logits/chosen": -1.7621084451675415, |
|
"logits/rejected": -1.6976591348648071, |
|
"logps/chosen": -444.54913330078125, |
|
"logps/rejected": -393.5503234863281, |
|
"loss": 0.6599, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.04167298600077629, |
|
"rewards/margins": 0.09635747224092484, |
|
"rewards/rejected": -0.05468447878956795, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 28.704756119671675, |
|
"learning_rate": 9.959570405988094e-08, |
|
"logits/chosen": -1.8457626104354858, |
|
"logits/rejected": -1.7673368453979492, |
|
"logps/chosen": -385.0513916015625, |
|
"logps/rejected": -358.7567443847656, |
|
"loss": 0.6566, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.10071470588445663, |
|
"rewards/margins": 0.06823419034481049, |
|
"rewards/rejected": 0.032480526715517044, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 24.531377945654775, |
|
"learning_rate": 9.893476820924666e-08, |
|
"logits/chosen": -1.9651501178741455, |
|
"logits/rejected": -1.8813607692718506, |
|
"logps/chosen": -403.64105224609375, |
|
"logps/rejected": -371.46234130859375, |
|
"loss": 0.6395, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.2845568060874939, |
|
"rewards/margins": 0.13415710628032684, |
|
"rewards/rejected": 0.15039967000484467, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 27.08910191775396, |
|
"learning_rate": 9.796445099843647e-08, |
|
"logits/chosen": -2.0368916988372803, |
|
"logits/rejected": -1.9531713724136353, |
|
"logps/chosen": -407.11553955078125, |
|
"logps/rejected": -381.14794921875, |
|
"loss": 0.6473, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.3643694519996643, |
|
"rewards/margins": 0.1756153404712677, |
|
"rewards/rejected": 0.18875406682491302, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 38.20992212526602, |
|
"learning_rate": 9.669088708527066e-08, |
|
"logits/chosen": -2.025817394256592, |
|
"logits/rejected": -1.9624574184417725, |
|
"logps/chosen": -415.40020751953125, |
|
"logps/rejected": -390.00140380859375, |
|
"loss": 0.6291, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.36643102765083313, |
|
"rewards/margins": 0.1800619214773178, |
|
"rewards/rejected": 0.1863691359758377, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 27.439269349288335, |
|
"learning_rate": 9.512212835085849e-08, |
|
"logits/chosen": -2.1013572216033936, |
|
"logits/rejected": -2.0152974128723145, |
|
"logps/chosen": -388.7185974121094, |
|
"logps/rejected": -383.03546142578125, |
|
"loss": 0.6197, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.37033963203430176, |
|
"rewards/margins": 0.19511529803276062, |
|
"rewards/rejected": 0.17522430419921875, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 26.198395783666243, |
|
"learning_rate": 9.326809299301306e-08, |
|
"logits/chosen": -2.1095404624938965, |
|
"logits/rejected": -2.0083327293395996, |
|
"logps/chosen": -427.6131896972656, |
|
"logps/rejected": -393.1694030761719, |
|
"loss": 0.6112, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.37072473764419556, |
|
"rewards/margins": 0.26910915970802307, |
|
"rewards/rejected": 0.10161559283733368, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 26.3105476660504, |
|
"learning_rate": 9.114050282021158e-08, |
|
"logits/chosen": -2.1161999702453613, |
|
"logits/rejected": -2.05537748336792, |
|
"logps/chosen": -435.20086669921875, |
|
"logps/rejected": -412.62078857421875, |
|
"loss": 0.607, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.268494188785553, |
|
"rewards/margins": 0.23757004737854004, |
|
"rewards/rejected": 0.030924171209335327, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 27.752798140691322, |
|
"learning_rate": 8.875280914254802e-08, |
|
"logits/chosen": -2.1366939544677734, |
|
"logits/rejected": -2.0489516258239746, |
|
"logps/chosen": -392.9619140625, |
|
"logps/rejected": -363.98553466796875, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.2517772316932678, |
|
"rewards/margins": 0.2715442478656769, |
|
"rewards/rejected": -0.01976701058447361, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 24.888665971199188, |
|
"learning_rate": 8.612010772821971e-08, |
|
"logits/chosen": -2.180723190307617, |
|
"logits/rejected": -2.1342811584472656, |
|
"logps/chosen": -445.15472412109375, |
|
"logps/rejected": -407.91998291015625, |
|
"loss": 0.6048, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.32857105135917664, |
|
"rewards/margins": 0.3196411728858948, |
|
"rewards/rejected": 0.008929857984185219, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 27.895144442719968, |
|
"learning_rate": 8.325904336322055e-08, |
|
"logits/chosen": -2.172715663909912, |
|
"logits/rejected": -2.112837314605713, |
|
"logps/chosen": -402.29217529296875, |
|
"logps/rejected": -380.25335693359375, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.1980087012052536, |
|
"rewards/margins": 0.2475043088197708, |
|
"rewards/rejected": -0.049495600163936615, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 24.760628369907216, |
|
"learning_rate": 8.01877046176447e-08, |
|
"logits/chosen": -2.131298303604126, |
|
"logits/rejected": -2.059814453125, |
|
"logps/chosen": -396.01788330078125, |
|
"logps/rejected": -379.63665771484375, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.12456746399402618, |
|
"rewards/margins": 0.23458366096019745, |
|
"rewards/rejected": -0.11001620441675186, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 22.13569267518615, |
|
"learning_rate": 7.692550948392249e-08, |
|
"logits/chosen": -2.1969549655914307, |
|
"logits/rejected": -2.1351304054260254, |
|
"logps/chosen": -421.291015625, |
|
"logps/rejected": -387.77484130859375, |
|
"loss": 0.589, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.19595439732074738, |
|
"rewards/margins": 0.26243916153907776, |
|
"rewards/rejected": -0.06648479402065277, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 29.37383088378205, |
|
"learning_rate": 7.349308261002021e-08, |
|
"logits/chosen": -2.1651830673217773, |
|
"logits/rejected": -2.1070454120635986, |
|
"logps/chosen": -426.6806640625, |
|
"logps/rejected": -408.07781982421875, |
|
"loss": 0.5855, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.2470887452363968, |
|
"rewards/margins": 0.2750667631626129, |
|
"rewards/rejected": -0.02797803282737732, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 29.642029984354075, |
|
"learning_rate": 6.991212490377531e-08, |
|
"logits/chosen": -2.239077091217041, |
|
"logits/rejected": -2.188472270965576, |
|
"logps/chosen": -460.720947265625, |
|
"logps/rejected": -433.29608154296875, |
|
"loss": 0.58, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.17696049809455872, |
|
"rewards/margins": 0.3639344274997711, |
|
"rewards/rejected": -0.1869739592075348, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 40.17804362794063, |
|
"learning_rate": 6.620527633276978e-08, |
|
"logits/chosen": -2.1767477989196777, |
|
"logits/rejected": -2.1026599407196045, |
|
"logps/chosen": -425.94677734375, |
|
"logps/rejected": -428.23846435546875, |
|
"loss": 0.5857, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.10997031629085541, |
|
"rewards/margins": 0.38212892413139343, |
|
"rewards/rejected": -0.2721586227416992, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 23.44190889972074, |
|
"learning_rate": 6.239597278716581e-08, |
|
"logits/chosen": -2.2649803161621094, |
|
"logits/rejected": -2.202864646911621, |
|
"logps/chosen": -410.3318786621094, |
|
"logps/rejected": -393.8056640625, |
|
"loss": 0.5721, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.14795458316802979, |
|
"rewards/margins": 0.41721048951148987, |
|
"rewards/rejected": -0.26925593614578247, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 25.725130617472978, |
|
"learning_rate": 5.8508297910462456e-08, |
|
"logits/chosen": -2.2174572944641113, |
|
"logits/rejected": -2.1320137977600098, |
|
"logps/chosen": -415.51153564453125, |
|
"logps/rejected": -422.1241149902344, |
|
"loss": 0.5729, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.06063612177968025, |
|
"rewards/margins": 0.38312196731567383, |
|
"rewards/rejected": -0.32248586416244507, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 33.748772620696556, |
|
"learning_rate": 5.456683083494731e-08, |
|
"logits/chosen": -2.204589366912842, |
|
"logits/rejected": -2.164806842803955, |
|
"logps/chosen": -464.0941467285156, |
|
"logps/rejected": -462.9925231933594, |
|
"loss": 0.5856, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.1456037312746048, |
|
"rewards/margins": 0.29207009077072144, |
|
"rewards/rejected": -0.14646635949611664, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 35.95467752105537, |
|
"learning_rate": 5.059649078450834e-08, |
|
"logits/chosen": -2.2099993228912354, |
|
"logits/rejected": -2.1632981300354004, |
|
"logps/chosen": -434.889404296875, |
|
"logps/rejected": -441.2001037597656, |
|
"loss": 0.5675, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.03340945765376091, |
|
"rewards/margins": 0.3251820206642151, |
|
"rewards/rejected": -0.2917725443840027, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 21.692896426665428, |
|
"learning_rate": 4.6622379527277186e-08, |
|
"logits/chosen": -2.2246696949005127, |
|
"logits/rejected": -2.1740729808807373, |
|
"logps/chosen": -406.245361328125, |
|
"logps/rejected": -402.9896545410156, |
|
"loss": 0.5644, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.07031740248203278, |
|
"rewards/margins": 0.33437561988830566, |
|
"rewards/rejected": -0.40469303727149963, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 39.74076278883575, |
|
"learning_rate": 4.26696226741691e-08, |
|
"logits/chosen": -2.2478280067443848, |
|
"logits/rejected": -2.1786162853240967, |
|
"logps/chosen": -446.00897216796875, |
|
"logps/rejected": -439.8905334472656, |
|
"loss": 0.5747, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.13826611638069153, |
|
"rewards/margins": 0.3671457767486572, |
|
"rewards/rejected": -0.5054119229316711, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 24.611550372970484, |
|
"learning_rate": 3.876321082668098e-08, |
|
"logits/chosen": -2.306821346282959, |
|
"logits/rejected": -2.2316880226135254, |
|
"logps/chosen": -465.56439208984375, |
|
"logps/rejected": -459.61700439453125, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.019088072702288628, |
|
"rewards/margins": 0.4447278082370758, |
|
"rewards/rejected": -0.463815838098526, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 39.92086883833645, |
|
"learning_rate": 3.492784157826244e-08, |
|
"logits/chosen": -2.236232280731201, |
|
"logits/rejected": -2.1331005096435547, |
|
"logps/chosen": -452.8758850097656, |
|
"logps/rejected": -426.164306640625, |
|
"loss": 0.5642, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.054852552711963654, |
|
"rewards/margins": 0.43687576055526733, |
|
"rewards/rejected": -0.49172839522361755, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 30.441413413514407, |
|
"learning_rate": 3.118776336817812e-08, |
|
"logits/chosen": -2.286245346069336, |
|
"logits/rejected": -2.217912435531616, |
|
"logps/chosen": -441.1709899902344, |
|
"logps/rejected": -427.1979064941406, |
|
"loss": 0.5531, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.020920906215906143, |
|
"rewards/margins": 0.44389209151268005, |
|
"rewards/rejected": -0.4648129940032959, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 27.497209882056495, |
|
"learning_rate": 2.7566622175067443e-08, |
|
"logits/chosen": -2.2853000164031982, |
|
"logits/rejected": -2.2172305583953857, |
|
"logps/chosen": -439.1316833496094, |
|
"logps/rejected": -443.3128967285156, |
|
"loss": 0.5682, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.03673394396901131, |
|
"rewards/margins": 0.42858797311782837, |
|
"rewards/rejected": -0.46532192826271057, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 35.65048747036057, |
|
"learning_rate": 2.408731201945432e-08, |
|
"logits/chosen": -2.270371437072754, |
|
"logits/rejected": -2.2171905040740967, |
|
"logps/chosen": -437.3782653808594, |
|
"logps/rejected": -453.547607421875, |
|
"loss": 0.5546, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.06338675320148468, |
|
"rewards/margins": 0.3352622985839844, |
|
"rewards/rejected": -0.39864906668663025, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 34.348920131629896, |
|
"learning_rate": 2.0771830220378112e-08, |
|
"logits/chosen": -2.223020076751709, |
|
"logits/rejected": -2.158247709274292, |
|
"logps/chosen": -462.366455078125, |
|
"logps/rejected": -461.9310607910156, |
|
"loss": 0.5476, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.025463122874498367, |
|
"rewards/margins": 0.39216503500938416, |
|
"rewards/rejected": -0.41762813925743103, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 27.509152301102358, |
|
"learning_rate": 1.7641138321260257e-08, |
|
"logits/chosen": -2.2643046379089355, |
|
"logits/rejected": -2.188499689102173, |
|
"logps/chosen": -440.750732421875, |
|
"logps/rejected": -428.7913513183594, |
|
"loss": 0.5541, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06256476789712906, |
|
"rewards/margins": 0.46622103452682495, |
|
"rewards/rejected": -0.5287858247756958, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 27.197721402060907, |
|
"learning_rate": 1.4715029564277793e-08, |
|
"logits/chosen": -2.340247631072998, |
|
"logits/rejected": -2.2862460613250732, |
|
"logps/chosen": -450.05523681640625, |
|
"logps/rejected": -448.6039123535156, |
|
"loss": 0.5553, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.009611198678612709, |
|
"rewards/margins": 0.4653921127319336, |
|
"rewards/rejected": -0.45578089356422424, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 23.708931464533432, |
|
"learning_rate": 1.2012003751113343e-08, |
|
"logits/chosen": -2.325669288635254, |
|
"logits/rejected": -2.259887933731079, |
|
"logps/chosen": -452.64862060546875, |
|
"logps/rejected": -454.2327575683594, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.17242571711540222, |
|
"rewards/margins": 0.4136194586753845, |
|
"rewards/rejected": -0.5860452055931091, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 32.57218388588953, |
|
"learning_rate": 9.549150281252633e-09, |
|
"logits/chosen": -2.284872531890869, |
|
"logits/rejected": -2.2292895317077637, |
|
"logps/chosen": -473.66009521484375, |
|
"logps/rejected": -472.35919189453125, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.11653508991003036, |
|
"rewards/margins": 0.4531070590019226, |
|
"rewards/rejected": -0.5696421265602112, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 36.6888447736333, |
|
"learning_rate": 7.3420401072985306e-09, |
|
"logits/chosen": -2.322558641433716, |
|
"logits/rejected": -2.267219066619873, |
|
"logps/chosen": -451.59295654296875, |
|
"logps/rejected": -459.50665283203125, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.14963454008102417, |
|
"rewards/margins": 0.4179585874080658, |
|
"rewards/rejected": -0.5675932168960571, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 25.485415219281943, |
|
"learning_rate": 5.404627290395369e-09, |
|
"logits/chosen": -2.2841172218322754, |
|
"logits/rejected": -2.213308334350586, |
|
"logps/chosen": -445.1299743652344, |
|
"logps/rejected": -450.1717224121094, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.027432629838585854, |
|
"rewards/margins": 0.4644508957862854, |
|
"rewards/rejected": -0.4918835163116455, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 37.42685396157706, |
|
"learning_rate": 3.74916077816162e-09, |
|
"logits/chosen": -2.292269706726074, |
|
"logits/rejected": -2.226890802383423, |
|
"logps/chosen": -436.8121643066406, |
|
"logps/rejected": -433.2718200683594, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1562749743461609, |
|
"rewards/margins": 0.388277530670166, |
|
"rewards/rejected": -0.5445524454116821, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 24.281602089300215, |
|
"learning_rate": 2.386106962899165e-09, |
|
"logits/chosen": -2.218071460723877, |
|
"logits/rejected": -2.1384034156799316, |
|
"logps/chosen": -447.8563537597656, |
|
"logps/rejected": -437.1399841308594, |
|
"loss": 0.5478, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.219069242477417, |
|
"rewards/margins": 0.37984699010849, |
|
"rewards/rejected": -0.598916232585907, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 28.633671701703214, |
|
"learning_rate": 1.3240835096913706e-09, |
|
"logits/chosen": -2.2583167552948, |
|
"logits/rejected": -2.1519083976745605, |
|
"logps/chosen": -440.5953063964844, |
|
"logps/rejected": -431.39501953125, |
|
"loss": 0.5514, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.04258907586336136, |
|
"rewards/margins": 0.5055002570152283, |
|
"rewards/rejected": -0.5480893850326538, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 25.241002875120977, |
|
"learning_rate": 5.698048727497462e-10, |
|
"logits/chosen": -2.2798821926116943, |
|
"logits/rejected": -2.2044219970703125, |
|
"logps/chosen": -424.9578552246094, |
|
"logps/rejected": -435.0152893066406, |
|
"loss": 0.5514, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.11878044903278351, |
|
"rewards/margins": 0.5139662623405457, |
|
"rewards/rejected": -0.632746696472168, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 23.361501927411613, |
|
"learning_rate": 1.2803984447259387e-10, |
|
"logits/chosen": -2.295804977416992, |
|
"logits/rejected": -2.235670328140259, |
|
"logps/chosen": -475.6675720214844, |
|
"logps/rejected": -457.9853515625, |
|
"loss": 0.5428, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.12296229600906372, |
|
"rewards/margins": 0.4703094959259033, |
|
"rewards/rejected": -0.593271791934967, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 439, |
|
"total_flos": 0.0, |
|
"train_loss": 0.048529281703106095, |
|
"train_runtime": 584.5423, |
|
"train_samples_per_second": 96.205, |
|
"train_steps_per_second": 0.751 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 439, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|