|
{ |
|
"best_metric": 1.0073015689849854, |
|
"best_model_checkpoint": "saves/Vicuna-7B-v1.5/lora/orpo/checkpoint-1500", |
|
"epoch": 2.997999555456768, |
|
"eval_steps": 500, |
|
"global_step": 1686, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017781729273171815, |
|
"grad_norm": 0.3158996105194092, |
|
"learning_rate": 4.9995745934141085e-06, |
|
"logits/chosen": -0.7898403406143188, |
|
"logits/rejected": -0.7731221914291382, |
|
"logps/chosen": -1.1474043130874634, |
|
"logps/rejected": -1.2031431198120117, |
|
"loss": 1.227, |
|
"odds_ratio_loss": 0.7959282994270325, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.11474044620990753, |
|
"rewards/margins": 0.005573858506977558, |
|
"rewards/rejected": -0.12031430006027222, |
|
"sft_loss": 1.1474043130874634, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03556345854634363, |
|
"grad_norm": 0.8646821975708008, |
|
"learning_rate": 4.9982812903243405e-06, |
|
"logits/chosen": -0.7618139982223511, |
|
"logits/rejected": -0.7260042428970337, |
|
"logps/chosen": -0.9931285977363586, |
|
"logps/rejected": -1.050875186920166, |
|
"loss": 1.0707, |
|
"odds_ratio_loss": 0.7757659554481506, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.09931285679340363, |
|
"rewards/margins": 0.005774644669145346, |
|
"rewards/rejected": -0.10508750379085541, |
|
"sft_loss": 0.9931285977363586, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05334518781951545, |
|
"grad_norm": 0.2927573025226593, |
|
"learning_rate": 4.996120496405222e-06, |
|
"logits/chosen": -0.7767494916915894, |
|
"logits/rejected": -0.7559677362442017, |
|
"logps/chosen": -1.040177345275879, |
|
"logps/rejected": -1.2401186227798462, |
|
"loss": 1.1087, |
|
"odds_ratio_loss": 0.6853717565536499, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.10401773452758789, |
|
"rewards/margins": 0.019994117319583893, |
|
"rewards/rejected": -0.12401185184717178, |
|
"sft_loss": 1.040177345275879, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07112691709268726, |
|
"grad_norm": 0.3339848518371582, |
|
"learning_rate": 4.99309296196014e-06, |
|
"logits/chosen": -0.7875353693962097, |
|
"logits/rejected": -0.7857375741004944, |
|
"logps/chosen": -1.0764983892440796, |
|
"logps/rejected": -1.1753004789352417, |
|
"loss": 1.1498, |
|
"odds_ratio_loss": 0.7328984141349792, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.10764984041452408, |
|
"rewards/margins": 0.009880214929580688, |
|
"rewards/rejected": -0.11753007024526596, |
|
"sft_loss": 1.0764983892440796, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08890864636585907, |
|
"grad_norm": 0.3153611719608307, |
|
"learning_rate": 4.989199738255166e-06, |
|
"logits/chosen": -0.7786640524864197, |
|
"logits/rejected": -0.7964621782302856, |
|
"logps/chosen": -1.0476799011230469, |
|
"logps/rejected": -1.1452114582061768, |
|
"loss": 1.1221, |
|
"odds_ratio_loss": 0.7446193099021912, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.10476799309253693, |
|
"rewards/margins": 0.009753172285854816, |
|
"rewards/rejected": -0.11452116817235947, |
|
"sft_loss": 1.0476799011230469, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1066903756390309, |
|
"grad_norm": 2.7500874996185303, |
|
"learning_rate": 4.984442177154031e-06, |
|
"logits/chosen": -0.7653383612632751, |
|
"logits/rejected": -0.7529075741767883, |
|
"logps/chosen": -1.1525957584381104, |
|
"logps/rejected": -1.2310835123062134, |
|
"loss": 1.2305, |
|
"odds_ratio_loss": 0.7788733243942261, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.1152595728635788, |
|
"rewards/margins": 0.007848784327507019, |
|
"rewards/rejected": -0.12310836464166641, |
|
"sft_loss": 1.1525957584381104, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12447210491220272, |
|
"grad_norm": 0.3525276184082031, |
|
"learning_rate": 4.978821930648704e-06, |
|
"logits/chosen": -0.8071187734603882, |
|
"logits/rejected": -0.7696810364723206, |
|
"logps/chosen": -1.0399789810180664, |
|
"logps/rejected": -1.0721027851104736, |
|
"loss": 1.1208, |
|
"odds_ratio_loss": 0.8085241317749023, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.10399790853261948, |
|
"rewards/margins": 0.003212365787476301, |
|
"rewards/rejected": -0.10721027851104736, |
|
"sft_loss": 1.0399789810180664, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14225383418537452, |
|
"grad_norm": 0.6355476379394531, |
|
"learning_rate": 4.97234095028576e-06, |
|
"logits/chosen": -0.738179624080658, |
|
"logits/rejected": -0.7453175783157349, |
|
"logps/chosen": -1.1585901975631714, |
|
"logps/rejected": -1.2273097038269043, |
|
"loss": 1.2343, |
|
"odds_ratio_loss": 0.7569113969802856, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1158590167760849, |
|
"rewards/margins": 0.0068719410337507725, |
|
"rewards/rejected": -0.12273095548152924, |
|
"sft_loss": 1.1585901975631714, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16003556345854633, |
|
"grad_norm": 0.2942532002925873, |
|
"learning_rate": 4.965001486488743e-06, |
|
"logits/chosen": -0.7591525316238403, |
|
"logits/rejected": -0.7494860887527466, |
|
"logps/chosen": -1.0791616439819336, |
|
"logps/rejected": -1.2336231470108032, |
|
"loss": 1.1471, |
|
"odds_ratio_loss": 0.6791869401931763, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.10791617631912231, |
|
"rewards/margins": 0.015446141362190247, |
|
"rewards/rejected": -0.12336231768131256, |
|
"sft_loss": 1.0791616439819336, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17781729273171815, |
|
"grad_norm": 0.35266247391700745, |
|
"learning_rate": 4.956806087776732e-06, |
|
"logits/chosen": -0.6999791860580444, |
|
"logits/rejected": -0.6948890686035156, |
|
"logps/chosen": -1.0402957201004028, |
|
"logps/rejected": -1.2390520572662354, |
|
"loss": 1.1124, |
|
"odds_ratio_loss": 0.7215061187744141, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.10402955859899521, |
|
"rewards/margins": 0.01987563632428646, |
|
"rewards/rejected": -0.12390519678592682, |
|
"sft_loss": 1.0402957201004028, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19559902200489, |
|
"grad_norm": 0.4545610845088959, |
|
"learning_rate": 4.947757599879411e-06, |
|
"logits/chosen": -0.7189663052558899, |
|
"logits/rejected": -0.6851673126220703, |
|
"logps/chosen": -1.147323489189148, |
|
"logps/rejected": -1.289452314376831, |
|
"loss": 1.2227, |
|
"odds_ratio_loss": 0.7533982396125793, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.11473236232995987, |
|
"rewards/margins": 0.014212870970368385, |
|
"rewards/rejected": -0.1289452314376831, |
|
"sft_loss": 1.147323489189148, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2133807512780618, |
|
"grad_norm": 0.6324980854988098, |
|
"learning_rate": 4.937859164748931e-06, |
|
"logits/chosen": -0.7043695449829102, |
|
"logits/rejected": -0.6795639991760254, |
|
"logps/chosen": -1.0146863460540771, |
|
"logps/rejected": -1.0826324224472046, |
|
"loss": 1.0907, |
|
"odds_ratio_loss": 0.760542094707489, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.10146863758563995, |
|
"rewards/margins": 0.006794607732445002, |
|
"rewards/rejected": -0.10826325416564941, |
|
"sft_loss": 1.0146863460540771, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.23116248055123362, |
|
"grad_norm": 0.4255826771259308, |
|
"learning_rate": 4.92711421946891e-06, |
|
"logits/chosen": -0.6701909899711609, |
|
"logits/rejected": -0.7547520995140076, |
|
"logps/chosen": -1.0397005081176758, |
|
"logps/rejected": -1.1938796043395996, |
|
"loss": 1.1117, |
|
"odds_ratio_loss": 0.7198113799095154, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.10397003591060638, |
|
"rewards/margins": 0.015417915768921375, |
|
"rewards/rejected": -0.11938796192407608, |
|
"sft_loss": 1.0397005081176758, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.24894420982440543, |
|
"grad_norm": 0.7161264419555664, |
|
"learning_rate": 4.915526495060961e-06, |
|
"logits/chosen": -0.6202753782272339, |
|
"logits/rejected": -0.64984530210495, |
|
"logps/chosen": -1.0066936016082764, |
|
"logps/rejected": -1.1723135709762573, |
|
"loss": 1.0745, |
|
"odds_ratio_loss": 0.6777721643447876, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.10066936165094376, |
|
"rewards/margins": 0.016561999917030334, |
|
"rewards/rejected": -0.1172313541173935, |
|
"sft_loss": 1.0066936016082764, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.26672593909757725, |
|
"grad_norm": 0.540038526058197, |
|
"learning_rate": 4.903100015189153e-06, |
|
"logits/chosen": -0.5942473411560059, |
|
"logits/rejected": -0.5408576726913452, |
|
"logps/chosen": -0.9665758013725281, |
|
"logps/rejected": -1.1337311267852783, |
|
"loss": 1.0386, |
|
"odds_ratio_loss": 0.719926118850708, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09665757417678833, |
|
"rewards/margins": 0.01671554148197174, |
|
"rewards/rejected": -0.11337311565876007, |
|
"sft_loss": 0.9665758013725281, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.28450766837074903, |
|
"grad_norm": 2.370271682739258, |
|
"learning_rate": 4.889839094762848e-06, |
|
"logits/chosen": -0.5599099397659302, |
|
"logits/rejected": -0.5666571855545044, |
|
"logps/chosen": -1.0475890636444092, |
|
"logps/rejected": -1.1946136951446533, |
|
"loss": 1.1206, |
|
"odds_ratio_loss": 0.7300440073013306, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.10475890338420868, |
|
"rewards/margins": 0.014702451415359974, |
|
"rewards/rejected": -0.11946137249469757, |
|
"sft_loss": 1.0475890636444092, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3022893976439209, |
|
"grad_norm": 0.37259843945503235, |
|
"learning_rate": 4.875748338438416e-06, |
|
"logits/chosen": -0.5827142000198364, |
|
"logits/rejected": -0.5626250505447388, |
|
"logps/chosen": -0.9911508560180664, |
|
"logps/rejected": -1.0813571214675903, |
|
"loss": 1.0632, |
|
"odds_ratio_loss": 0.720399022102356, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09911508113145828, |
|
"rewards/margins": 0.009020629338920116, |
|
"rewards/rejected": -0.10813571512699127, |
|
"sft_loss": 0.9911508560180664, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.32007112691709266, |
|
"grad_norm": 0.3821701109409332, |
|
"learning_rate": 4.8608326390203386e-06, |
|
"logits/chosen": -0.6059321165084839, |
|
"logits/rejected": -0.5918234586715698, |
|
"logps/chosen": -0.9553475379943848, |
|
"logps/rejected": -1.1111819744110107, |
|
"loss": 1.0245, |
|
"odds_ratio_loss": 0.6911659240722656, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09553476423025131, |
|
"rewards/margins": 0.01558343879878521, |
|
"rewards/rejected": -0.11111819744110107, |
|
"sft_loss": 0.9553475379943848, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3378528561902645, |
|
"grad_norm": 0.3977317810058594, |
|
"learning_rate": 4.845097175762251e-06, |
|
"logits/chosen": -0.49882182478904724, |
|
"logits/rejected": -0.48370814323425293, |
|
"logps/chosen": -0.989281952381134, |
|
"logps/rejected": -1.0615712404251099, |
|
"loss": 1.0617, |
|
"odds_ratio_loss": 0.7244290113449097, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09892819821834564, |
|
"rewards/margins": 0.007228921167552471, |
|
"rewards/rejected": -0.10615710914134979, |
|
"sft_loss": 0.989281952381134, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3556345854634363, |
|
"grad_norm": 0.46290695667266846, |
|
"learning_rate": 4.8285474125685286e-06, |
|
"logits/chosen": -0.518696129322052, |
|
"logits/rejected": -0.5193291306495667, |
|
"logps/chosen": -1.1205590963363647, |
|
"logps/rejected": -1.1714627742767334, |
|
"loss": 1.198, |
|
"odds_ratio_loss": 0.7740126252174377, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.11205589771270752, |
|
"rewards/margins": 0.00509037496522069, |
|
"rewards/rejected": -0.11714627593755722, |
|
"sft_loss": 1.1205590963363647, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.37341631473660813, |
|
"grad_norm": 0.32425227761268616, |
|
"learning_rate": 4.811189096097025e-06, |
|
"logits/chosen": -0.5530649423599243, |
|
"logits/rejected": -0.5483794808387756, |
|
"logps/chosen": -0.9994535446166992, |
|
"logps/rejected": -1.1620233058929443, |
|
"loss": 1.0712, |
|
"odds_ratio_loss": 0.7175347208976746, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09994535893201828, |
|
"rewards/margins": 0.01625697687268257, |
|
"rewards/rejected": -0.11620233952999115, |
|
"sft_loss": 0.9994535446166992, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.39119804400978, |
|
"grad_norm": 0.5374495387077332, |
|
"learning_rate": 4.793028253763633e-06, |
|
"logits/chosen": -0.46489372849464417, |
|
"logits/rejected": -0.49711689352989197, |
|
"logps/chosen": -0.9644722938537598, |
|
"logps/rejected": -1.098311185836792, |
|
"loss": 1.0422, |
|
"odds_ratio_loss": 0.7768682837486267, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09644722938537598, |
|
"rewards/margins": 0.013383878394961357, |
|
"rewards/rejected": -0.10983110964298248, |
|
"sft_loss": 0.9644722938537598, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.40897977328295176, |
|
"grad_norm": 0.7932880520820618, |
|
"learning_rate": 4.774071191649352e-06, |
|
"logits/chosen": -0.5470231771469116, |
|
"logits/rejected": -0.5435986518859863, |
|
"logps/chosen": -0.9579310417175293, |
|
"logps/rejected": -1.1810802221298218, |
|
"loss": 1.0212, |
|
"odds_ratio_loss": 0.6330138444900513, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.09579310566186905, |
|
"rewards/margins": 0.02231491729617119, |
|
"rewards/rejected": -0.11810803413391113, |
|
"sft_loss": 0.9579310417175293, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4267615025561236, |
|
"grad_norm": 0.618280291557312, |
|
"learning_rate": 4.7543244923105975e-06, |
|
"logits/chosen": -0.5025745630264282, |
|
"logits/rejected": -0.4722610414028168, |
|
"logps/chosen": -1.0212466716766357, |
|
"logps/rejected": -1.0026448965072632, |
|
"loss": 1.1058, |
|
"odds_ratio_loss": 0.8450964093208313, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.1021246686577797, |
|
"rewards/margins": -0.0018601752817630768, |
|
"rewards/rejected": -0.10026448965072632, |
|
"sft_loss": 1.0212466716766357, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4445432318292954, |
|
"grad_norm": 0.39385247230529785, |
|
"learning_rate": 4.733795012493506e-06, |
|
"logits/chosen": -0.5138652324676514, |
|
"logits/rejected": -0.4715350270271301, |
|
"logps/chosen": -1.0123497247695923, |
|
"logps/rejected": -1.13383150100708, |
|
"loss": 1.0857, |
|
"odds_ratio_loss": 0.7335414886474609, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.10123495757579803, |
|
"rewards/margins": 0.012148191221058369, |
|
"rewards/rejected": -0.11338315904140472, |
|
"sft_loss": 1.0123497247695923, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.46232496110246724, |
|
"grad_norm": 0.3666248619556427, |
|
"learning_rate": 4.712489880753035e-06, |
|
"logits/chosen": -0.3967147171497345, |
|
"logits/rejected": -0.3805852234363556, |
|
"logps/chosen": -0.946629524230957, |
|
"logps/rejected": -1.0246347188949585, |
|
"loss": 1.0164, |
|
"odds_ratio_loss": 0.6973500847816467, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.09466294944286346, |
|
"rewards/margins": 0.007800529710948467, |
|
"rewards/rejected": -0.1024634838104248, |
|
"sft_loss": 0.946629524230957, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.480106690375639, |
|
"grad_norm": 0.6196191906929016, |
|
"learning_rate": 4.690416494977673e-06, |
|
"logits/chosen": -0.3590370714664459, |
|
"logits/rejected": -0.3209628164768219, |
|
"logps/chosen": -0.9477987289428711, |
|
"logps/rejected": -1.1744658946990967, |
|
"loss": 1.0133, |
|
"odds_ratio_loss": 0.654593825340271, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.09477987140417099, |
|
"rewards/margins": 0.02266671508550644, |
|
"rewards/rejected": -0.11744660139083862, |
|
"sft_loss": 0.9477987289428711, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.49788841964881086, |
|
"grad_norm": 0.38255006074905396, |
|
"learning_rate": 4.667582519820639e-06, |
|
"logits/chosen": -0.4478569030761719, |
|
"logits/rejected": -0.40335726737976074, |
|
"logps/chosen": -1.0600357055664062, |
|
"logps/rejected": -1.0844862461090088, |
|
"loss": 1.1374, |
|
"odds_ratio_loss": 0.7734627723693848, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.10600356757640839, |
|
"rewards/margins": 0.002445052145048976, |
|
"rewards/rejected": -0.10844862461090088, |
|
"sft_loss": 1.0600357055664062, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5156701489219827, |
|
"grad_norm": 0.6143254637718201, |
|
"learning_rate": 4.643995884038443e-06, |
|
"logits/chosen": -0.42634057998657227, |
|
"logits/rejected": -0.4024909436702728, |
|
"logps/chosen": -1.0625637769699097, |
|
"logps/rejected": -1.2203805446624756, |
|
"loss": 1.1314, |
|
"odds_ratio_loss": 0.6885315179824829, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1062563806772232, |
|
"rewards/margins": 0.01578168198466301, |
|
"rewards/rejected": -0.12203805148601532, |
|
"sft_loss": 1.0625637769699097, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5334518781951545, |
|
"grad_norm": 0.3366183042526245, |
|
"learning_rate": 4.6196647777377475e-06, |
|
"logits/chosen": -0.37543022632598877, |
|
"logits/rejected": -0.3797139525413513, |
|
"logps/chosen": -0.9299192428588867, |
|
"logps/rejected": -0.9767643213272095, |
|
"loss": 1.0053, |
|
"odds_ratio_loss": 0.7540563344955444, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.09299192577600479, |
|
"rewards/margins": 0.004684499930590391, |
|
"rewards/rejected": -0.09767641872167587, |
|
"sft_loss": 0.9299192428588867, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5512336074683263, |
|
"grad_norm": 0.5256261825561523, |
|
"learning_rate": 4.59459764953147e-06, |
|
"logits/chosen": -0.3965223431587219, |
|
"logits/rejected": -0.4247291684150696, |
|
"logps/chosen": -1.0226197242736816, |
|
"logps/rejected": -1.121930718421936, |
|
"loss": 1.0919, |
|
"odds_ratio_loss": 0.6925050616264343, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.1022619754076004, |
|
"rewards/margins": 0.00993109680712223, |
|
"rewards/rejected": -0.11219307035207748, |
|
"sft_loss": 1.0226197242736816, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5690153367414981, |
|
"grad_norm": 0.5753230452537537, |
|
"learning_rate": 4.568803203605133e-06, |
|
"logits/chosen": -0.38987019658088684, |
|
"logits/rejected": -0.40249496698379517, |
|
"logps/chosen": -1.0238714218139648, |
|
"logps/rejected": -1.191584825515747, |
|
"loss": 1.0951, |
|
"odds_ratio_loss": 0.7120264768600464, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.10238714516162872, |
|
"rewards/margins": 0.016771327704191208, |
|
"rewards/rejected": -0.11915846914052963, |
|
"sft_loss": 1.0238714218139648, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.58679706601467, |
|
"grad_norm": 0.40169399976730347, |
|
"learning_rate": 4.542290396694462e-06, |
|
"logits/chosen": -0.4059433043003082, |
|
"logits/rejected": -0.4052697718143463, |
|
"logps/chosen": -0.9671312570571899, |
|
"logps/rejected": -1.0644605159759521, |
|
"loss": 1.0391, |
|
"odds_ratio_loss": 0.7196342349052429, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09671313315629959, |
|
"rewards/margins": 0.009732924401760101, |
|
"rewards/rejected": -0.1064460501074791, |
|
"sft_loss": 0.9671312570571899, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6045787952878418, |
|
"grad_norm": 0.5619000792503357, |
|
"learning_rate": 4.515068434975298e-06, |
|
"logits/chosen": -0.4578043818473816, |
|
"logits/rejected": -0.4284750819206238, |
|
"logps/chosen": -0.9811161756515503, |
|
"logps/rejected": -1.1456761360168457, |
|
"loss": 1.0484, |
|
"odds_ratio_loss": 0.6727977991104126, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09811162203550339, |
|
"rewards/margins": 0.016455989331007004, |
|
"rewards/rejected": -0.11456761509180069, |
|
"sft_loss": 0.9811161756515503, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6223605245610135, |
|
"grad_norm": 0.5821824073791504, |
|
"learning_rate": 4.487146770866887e-06, |
|
"logits/chosen": -0.34484004974365234, |
|
"logits/rejected": -0.3222612738609314, |
|
"logps/chosen": -1.0583232641220093, |
|
"logps/rejected": -1.117333173751831, |
|
"loss": 1.1304, |
|
"odds_ratio_loss": 0.7205663919448853, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.10583231598138809, |
|
"rewards/margins": 0.005901001859456301, |
|
"rewards/rejected": -0.1117333322763443, |
|
"sft_loss": 1.0583232641220093, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6401422538341853, |
|
"grad_norm": 0.28447961807250977, |
|
"learning_rate": 4.458535099749666e-06, |
|
"logits/chosen": -0.43229636549949646, |
|
"logits/rejected": -0.40540462732315063, |
|
"logps/chosen": -1.1308929920196533, |
|
"logps/rejected": -1.0958976745605469, |
|
"loss": 1.2174, |
|
"odds_ratio_loss": 0.8652679324150085, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.11308930814266205, |
|
"rewards/margins": -0.0034995335154235363, |
|
"rewards/rejected": -0.10958977788686752, |
|
"sft_loss": 1.1308929920196533, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6579239831073572, |
|
"grad_norm": 0.27178603410720825, |
|
"learning_rate": 4.429243356598694e-06, |
|
"logits/chosen": -0.40932542085647583, |
|
"logits/rejected": -0.3859841227531433, |
|
"logps/chosen": -0.9554696083068848, |
|
"logps/rejected": -1.1517064571380615, |
|
"loss": 1.0243, |
|
"odds_ratio_loss": 0.6880883574485779, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.09554696083068848, |
|
"rewards/margins": 0.019623693078756332, |
|
"rewards/rejected": -0.11517064273357391, |
|
"sft_loss": 0.9554696083068848, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.675705712380529, |
|
"grad_norm": 0.34544578194618225, |
|
"learning_rate": 4.399281712533875e-06, |
|
"logits/chosen": -0.32934245467185974, |
|
"logits/rejected": -0.3599315285682678, |
|
"logps/chosen": -0.9367265701293945, |
|
"logps/rejected": -1.0202996730804443, |
|
"loss": 1.0101, |
|
"odds_ratio_loss": 0.7333763837814331, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.09367264807224274, |
|
"rewards/margins": 0.008357317186892033, |
|
"rewards/rejected": -0.1020299643278122, |
|
"sft_loss": 0.9367265701293945, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6934874416537008, |
|
"grad_norm": 0.48474597930908203, |
|
"learning_rate": 4.368660571288192e-06, |
|
"logits/chosen": -0.3377426266670227, |
|
"logits/rejected": -0.32565537095069885, |
|
"logps/chosen": -0.9353078007698059, |
|
"logps/rejected": -1.0242602825164795, |
|
"loss": 1.0071, |
|
"odds_ratio_loss": 0.7176766395568848, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.09353077411651611, |
|
"rewards/margins": 0.008895261213183403, |
|
"rewards/rejected": -0.10242603719234467, |
|
"sft_loss": 0.9353078007698059, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7112691709268726, |
|
"grad_norm": 0.3825822174549103, |
|
"learning_rate": 4.337390565595163e-06, |
|
"logits/chosen": -0.4158423840999603, |
|
"logits/rejected": -0.36646509170532227, |
|
"logps/chosen": -1.0673354864120483, |
|
"logps/rejected": -1.0877690315246582, |
|
"loss": 1.1448, |
|
"odds_ratio_loss": 0.7746785879135132, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.1067335456609726, |
|
"rewards/margins": 0.0020433522295206785, |
|
"rewards/rejected": -0.10877690464258194, |
|
"sft_loss": 1.0673354864120483, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7290509002000445, |
|
"grad_norm": 0.36279189586639404, |
|
"learning_rate": 4.305482553496786e-06, |
|
"logits/chosen": -0.33700472116470337, |
|
"logits/rejected": -0.3831488788127899, |
|
"logps/chosen": -0.9607623815536499, |
|
"logps/rejected": -1.0405422449111938, |
|
"loss": 1.0363, |
|
"odds_ratio_loss": 0.7554237842559814, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09607623517513275, |
|
"rewards/margins": 0.007977982982993126, |
|
"rewards/rejected": -0.10405422747135162, |
|
"sft_loss": 0.9607623815536499, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7468326294732163, |
|
"grad_norm": 0.457087904214859, |
|
"learning_rate": 4.272947614573244e-06, |
|
"logits/chosen": -0.3999176621437073, |
|
"logits/rejected": -0.3756122291088104, |
|
"logps/chosen": -1.0111384391784668, |
|
"logps/rejected": -1.0757354497909546, |
|
"loss": 1.0826, |
|
"odds_ratio_loss": 0.7144282460212708, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.10111384093761444, |
|
"rewards/margins": 0.006459714379161596, |
|
"rewards/rejected": -0.10757355391979218, |
|
"sft_loss": 1.0111384391784668, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7646143587463881, |
|
"grad_norm": 0.2605019509792328, |
|
"learning_rate": 4.23979704609569e-06, |
|
"logits/chosen": -0.36384835839271545, |
|
"logits/rejected": -0.34030967950820923, |
|
"logps/chosen": -0.9615520238876343, |
|
"logps/rejected": -1.0373448133468628, |
|
"loss": 1.0309, |
|
"odds_ratio_loss": 0.6935026049613953, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.09615520387887955, |
|
"rewards/margins": 0.007579285651445389, |
|
"rewards/rejected": -0.10373447835445404, |
|
"sft_loss": 0.9615520238876343, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.78239608801956, |
|
"grad_norm": 0.41911929845809937, |
|
"learning_rate": 4.206042359103435e-06, |
|
"logits/chosen": -0.38596296310424805, |
|
"logits/rejected": -0.37879234552383423, |
|
"logps/chosen": -0.9808257222175598, |
|
"logps/rejected": -1.121048927307129, |
|
"loss": 1.0531, |
|
"odds_ratio_loss": 0.7229377627372742, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.09808257222175598, |
|
"rewards/margins": 0.014022317714989185, |
|
"rewards/rejected": -0.11210489273071289, |
|
"sft_loss": 0.9808257222175598, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8001778172927317, |
|
"grad_norm": 0.7460839748382568, |
|
"learning_rate": 4.17169527440691e-06, |
|
"logits/chosen": -0.39514169096946716, |
|
"logits/rejected": -0.3737938106060028, |
|
"logps/chosen": -0.9438737630844116, |
|
"logps/rejected": -1.0060594081878662, |
|
"loss": 1.0182, |
|
"odds_ratio_loss": 0.7436385154724121, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09438737481832504, |
|
"rewards/margins": 0.006218560039997101, |
|
"rewards/rejected": -0.10060594230890274, |
|
"sft_loss": 0.9438737630844116, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8179595465659035, |
|
"grad_norm": 0.5300458669662476, |
|
"learning_rate": 4.136767718517797e-06, |
|
"logits/chosen": -0.3699805736541748, |
|
"logits/rejected": -0.3850511312484741, |
|
"logps/chosen": -0.959467887878418, |
|
"logps/rejected": -1.100988507270813, |
|
"loss": 1.0256, |
|
"odds_ratio_loss": 0.6614881753921509, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0959467813372612, |
|
"rewards/margins": 0.014152060262858868, |
|
"rewards/rejected": -0.11009885370731354, |
|
"sft_loss": 0.959467887878418, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8357412758390753, |
|
"grad_norm": 0.9485012292861938, |
|
"learning_rate": 4.1012718195077196e-06, |
|
"logits/chosen": -0.37103739380836487, |
|
"logits/rejected": -0.3039020895957947, |
|
"logps/chosen": -0.9647709131240845, |
|
"logps/rejected": -1.0279747247695923, |
|
"loss": 1.0376, |
|
"odds_ratio_loss": 0.7286756038665771, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.09647707641124725, |
|
"rewards/margins": 0.006320389453321695, |
|
"rewards/rejected": -0.10279747098684311, |
|
"sft_loss": 0.9647709131240845, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8535230051122472, |
|
"grad_norm": 0.5754956603050232, |
|
"learning_rate": 4.065219902796953e-06, |
|
"logits/chosen": -0.40020495653152466, |
|
"logits/rejected": -0.39535146951675415, |
|
"logps/chosen": -0.9706109166145325, |
|
"logps/rejected": -1.093976378440857, |
|
"loss": 1.0453, |
|
"odds_ratio_loss": 0.7464355230331421, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09706110507249832, |
|
"rewards/margins": 0.01233654748648405, |
|
"rewards/rejected": -0.10939764976501465, |
|
"sft_loss": 0.9706109166145325, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.871304734385419, |
|
"grad_norm": 0.3195387125015259, |
|
"learning_rate": 4.028624486874608e-06, |
|
"logits/chosen": -0.4315417408943176, |
|
"logits/rejected": -0.36453911662101746, |
|
"logps/chosen": -0.9465911984443665, |
|
"logps/rejected": -1.1121985912322998, |
|
"loss": 1.0194, |
|
"odds_ratio_loss": 0.7276239991188049, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.09465911984443665, |
|
"rewards/margins": 0.016560742631554604, |
|
"rewards/rejected": -0.1112198606133461, |
|
"sft_loss": 0.9465911984443665, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8890864636585908, |
|
"grad_norm": 0.6305994391441345, |
|
"learning_rate": 3.99149827895177e-06, |
|
"logits/chosen": -0.38445502519607544, |
|
"logits/rejected": -0.38218945264816284, |
|
"logps/chosen": -1.0171244144439697, |
|
"logps/rejected": -1.0506142377853394, |
|
"loss": 1.0913, |
|
"odds_ratio_loss": 0.7415187358856201, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.10171245038509369, |
|
"rewards/margins": 0.0033489768393337727, |
|
"rewards/rejected": -0.10506142675876617, |
|
"sft_loss": 1.0171244144439697, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8890864636585908, |
|
"eval_logits/chosen": -0.34904247522354126, |
|
"eval_logits/rejected": -0.31755369901657104, |
|
"eval_logps/chosen": -0.9676439166069031, |
|
"eval_logps/rejected": -1.1074860095977783, |
|
"eval_loss": 1.0354068279266357, |
|
"eval_odds_ratio_loss": 0.6776295900344849, |
|
"eval_rewards/accuracies": 0.5180000066757202, |
|
"eval_rewards/chosen": -0.09676438570022583, |
|
"eval_rewards/margins": 0.013984210789203644, |
|
"eval_rewards/rejected": -0.11074860394001007, |
|
"eval_runtime": 185.9798, |
|
"eval_samples_per_second": 5.377, |
|
"eval_sft_loss": 0.9676439166069031, |
|
"eval_steps_per_second": 2.688, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9068681929317626, |
|
"grad_norm": 0.33740749955177307, |
|
"learning_rate": 3.953854170549114e-06, |
|
"logits/chosen": -0.3074025809764862, |
|
"logits/rejected": -0.30263853073120117, |
|
"logps/chosen": -0.9824435114860535, |
|
"logps/rejected": -1.0204169750213623, |
|
"loss": 1.0555, |
|
"odds_ratio_loss": 0.7308207750320435, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.09824434667825699, |
|
"rewards/margins": 0.0037973597645759583, |
|
"rewards/rejected": -0.10204169899225235, |
|
"sft_loss": 0.9824435114860535, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.9246499222049345, |
|
"grad_norm": 0.4032406210899353, |
|
"learning_rate": 3.91570523302051e-06, |
|
"logits/chosen": -0.3414192199707031, |
|
"logits/rejected": -0.36243736743927, |
|
"logps/chosen": -0.8989545702934265, |
|
"logps/rejected": -1.0376076698303223, |
|
"loss": 0.9695, |
|
"odds_ratio_loss": 0.7055255174636841, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.08989545702934265, |
|
"rewards/margins": 0.013865319080650806, |
|
"rewards/rejected": -0.10376076400279999, |
|
"sft_loss": 0.8989545702934265, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9424316514781063, |
|
"grad_norm": 0.3632182776927948, |
|
"learning_rate": 3.8770647130141996e-06, |
|
"logits/chosen": -0.3258126378059387, |
|
"logits/rejected": -0.33273980021476746, |
|
"logps/chosen": -0.9584708213806152, |
|
"logps/rejected": -1.0552600622177124, |
|
"loss": 1.0316, |
|
"odds_ratio_loss": 0.731722891330719, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.09584707766771317, |
|
"rewards/margins": 0.009678924456238747, |
|
"rewards/rejected": -0.10552600771188736, |
|
"sft_loss": 0.9584708213806152, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.960213380751278, |
|
"grad_norm": 0.3121795058250427, |
|
"learning_rate": 3.837946027873086e-06, |
|
"logits/chosen": -0.32046863436698914, |
|
"logits/rejected": -0.3653668463230133, |
|
"logps/chosen": -0.966636061668396, |
|
"logps/rejected": -1.1031057834625244, |
|
"loss": 1.0367, |
|
"odds_ratio_loss": 0.7007311582565308, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.09666360169649124, |
|
"rewards/margins": 0.01364696491509676, |
|
"rewards/rejected": -0.11031056940555573, |
|
"sft_loss": 0.966636061668396, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9779951100244498, |
|
"grad_norm": 0.6487416625022888, |
|
"learning_rate": 3.7983627609757713e-06, |
|
"logits/chosen": -0.34747475385665894, |
|
"logits/rejected": -0.3490690290927887, |
|
"logps/chosen": -0.9615602493286133, |
|
"logps/rejected": -1.0271753072738647, |
|
"loss": 1.0318, |
|
"odds_ratio_loss": 0.702663779258728, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.09615601599216461, |
|
"rewards/margins": 0.0065615237690508366, |
|
"rewards/rejected": -0.10271754115819931, |
|
"sft_loss": 0.9615602493286133, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9957768392976217, |
|
"grad_norm": 0.3890874683856964, |
|
"learning_rate": 3.758328657019924e-06, |
|
"logits/chosen": -0.37014687061309814, |
|
"logits/rejected": -0.4008961319923401, |
|
"logps/chosen": -0.9199098348617554, |
|
"logps/rejected": -1.0562833547592163, |
|
"loss": 0.9886, |
|
"odds_ratio_loss": 0.6868860721588135, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.09199099242687225, |
|
"rewards/margins": 0.013637351803481579, |
|
"rewards/rejected": -0.1056283488869667, |
|
"sft_loss": 0.9199098348617554, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.0135585685707935, |
|
"grad_norm": 1.5021965503692627, |
|
"learning_rate": 3.717857617249642e-06, |
|
"logits/chosen": -0.409252405166626, |
|
"logits/rejected": -0.3774147033691406, |
|
"logps/chosen": -1.0592560768127441, |
|
"logps/rejected": -1.1887257099151611, |
|
"loss": 1.135, |
|
"odds_ratio_loss": 0.7577823400497437, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.10592560470104218, |
|
"rewards/margins": 0.012946966104209423, |
|
"rewards/rejected": -0.11887258291244507, |
|
"sft_loss": 1.0592560768127441, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.0313402978439654, |
|
"grad_norm": 0.36601969599723816, |
|
"learning_rate": 3.6769636946284543e-06, |
|
"logits/chosen": -0.33855992555618286, |
|
"logits/rejected": -0.38329094648361206, |
|
"logps/chosen": -0.9246651530265808, |
|
"logps/rejected": -1.0259661674499512, |
|
"loss": 0.9949, |
|
"odds_ratio_loss": 0.7019587755203247, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09246651828289032, |
|
"rewards/margins": 0.01013010274618864, |
|
"rewards/rejected": -0.10259661823511124, |
|
"sft_loss": 0.9246651530265808, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.049122027117137, |
|
"grad_norm": 0.3644584119319916, |
|
"learning_rate": 3.6356610889596355e-06, |
|
"logits/chosen": -0.3362785577774048, |
|
"logits/rejected": -0.3195570707321167, |
|
"logps/chosen": -0.9757383465766907, |
|
"logps/rejected": -1.0168259143829346, |
|
"loss": 1.0499, |
|
"odds_ratio_loss": 0.7411800622940063, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09757383167743683, |
|
"rewards/margins": 0.004108763299882412, |
|
"rewards/rejected": -0.10168258845806122, |
|
"sft_loss": 0.9757383465766907, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.066903756390309, |
|
"grad_norm": 0.38790592551231384, |
|
"learning_rate": 3.593964141955541e-06, |
|
"logits/chosen": -0.31955039501190186, |
|
"logits/rejected": -0.3287174701690674, |
|
"logps/chosen": -0.9446002244949341, |
|
"logps/rejected": -0.9857986569404602, |
|
"loss": 1.0183, |
|
"odds_ratio_loss": 0.7368658185005188, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09446002542972565, |
|
"rewards/margins": 0.004119834862649441, |
|
"rewards/rejected": -0.09857985377311707, |
|
"sft_loss": 0.9446002244949341, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0846854856634809, |
|
"grad_norm": 0.3323744237422943, |
|
"learning_rate": 3.5518873322576573e-06, |
|
"logits/chosen": -0.43425217270851135, |
|
"logits/rejected": -0.3568256199359894, |
|
"logps/chosen": -0.9986424446105957, |
|
"logps/rejected": -1.0531480312347412, |
|
"loss": 1.073, |
|
"odds_ratio_loss": 0.7439261674880981, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.09986423701047897, |
|
"rewards/margins": 0.005450558383017778, |
|
"rewards/rejected": -0.10531480610370636, |
|
"sft_loss": 0.9986424446105957, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.1024672149366526, |
|
"grad_norm": 0.45893725752830505, |
|
"learning_rate": 3.5094452704091143e-06, |
|
"logits/chosen": -0.3812747299671173, |
|
"logits/rejected": -0.36471351981163025, |
|
"logps/chosen": -0.9423580169677734, |
|
"logps/rejected": -1.0641114711761475, |
|
"loss": 1.0114, |
|
"odds_ratio_loss": 0.6907029747962952, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09423580020666122, |
|
"rewards/margins": 0.01217535138130188, |
|
"rewards/rejected": -0.10641114413738251, |
|
"sft_loss": 0.9423580169677734, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.1202489442098245, |
|
"grad_norm": 0.5117968916893005, |
|
"learning_rate": 3.46665269378139e-06, |
|
"logits/chosen": -0.3292369842529297, |
|
"logits/rejected": -0.3725055158138275, |
|
"logps/chosen": -0.9826286435127258, |
|
"logps/rejected": -1.0871622562408447, |
|
"loss": 1.0548, |
|
"odds_ratio_loss": 0.7213753461837769, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09826286137104034, |
|
"rewards/margins": 0.010453373193740845, |
|
"rewards/rejected": -0.10871622711420059, |
|
"sft_loss": 0.9826286435127258, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.1380306734829961, |
|
"grad_norm": 0.5622742176055908, |
|
"learning_rate": 3.4235244614569794e-06, |
|
"logits/chosen": -0.3315224051475525, |
|
"logits/rejected": -0.3257826566696167, |
|
"logps/chosen": -1.1072447299957275, |
|
"logps/rejected": -1.0443857908248901, |
|
"loss": 1.1924, |
|
"odds_ratio_loss": 0.8511736989021301, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.11072447150945663, |
|
"rewards/margins": -0.006285896059125662, |
|
"rewards/rejected": -0.10443858057260513, |
|
"sft_loss": 1.1072447299957275, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.155812402756168, |
|
"grad_norm": 0.27428311109542847, |
|
"learning_rate": 3.3800755490698008e-06, |
|
"logits/chosen": -0.30900219082832336, |
|
"logits/rejected": -0.33938735723495483, |
|
"logps/chosen": -0.9312244653701782, |
|
"logps/rejected": -1.0983222723007202, |
|
"loss": 0.9964, |
|
"odds_ratio_loss": 0.651997447013855, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0931224375963211, |
|
"rewards/margins": 0.01670977845788002, |
|
"rewards/rejected": -0.10983221232891083, |
|
"sft_loss": 0.9312244653701782, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.17359413202934, |
|
"grad_norm": 1.0422977209091187, |
|
"learning_rate": 3.3363210436051287e-06, |
|
"logits/chosen": -0.3527902662754059, |
|
"logits/rejected": -0.3563137948513031, |
|
"logps/chosen": -0.978245735168457, |
|
"logps/rejected": -1.0940849781036377, |
|
"loss": 1.0514, |
|
"odds_ratio_loss": 0.73140949010849, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0978245884180069, |
|
"rewards/margins": 0.011583918705582619, |
|
"rewards/rejected": -0.10940849781036377, |
|
"sft_loss": 0.978245735168457, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.1913758613025116, |
|
"grad_norm": 0.4168451428413391, |
|
"learning_rate": 3.292276138160867e-06, |
|
"logits/chosen": -0.28714054822921753, |
|
"logits/rejected": -0.30155253410339355, |
|
"logps/chosen": -0.934456467628479, |
|
"logps/rejected": -1.0636101961135864, |
|
"loss": 1.0032, |
|
"odds_ratio_loss": 0.6879295110702515, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.09344564378261566, |
|
"rewards/margins": 0.012915370985865593, |
|
"rewards/rejected": -0.1063610091805458, |
|
"sft_loss": 0.934456467628479, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.2091575905756835, |
|
"grad_norm": 0.34239086508750916, |
|
"learning_rate": 3.2479561266719694e-06, |
|
"logits/chosen": -0.381683886051178, |
|
"logits/rejected": -0.37388402223587036, |
|
"logps/chosen": -0.9762662649154663, |
|
"logps/rejected": -1.0414526462554932, |
|
"loss": 1.0493, |
|
"odds_ratio_loss": 0.7306024432182312, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09762662649154663, |
|
"rewards/margins": 0.006518647074699402, |
|
"rewards/rejected": -0.10414527356624603, |
|
"sft_loss": 0.9762662649154663, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.2269393198488552, |
|
"grad_norm": 0.4666767716407776, |
|
"learning_rate": 3.2033763985998533e-06, |
|
"logits/chosen": -0.3561275601387024, |
|
"logits/rejected": -0.3666972517967224, |
|
"logps/chosen": -0.9278993606567383, |
|
"logps/rejected": -1.172456979751587, |
|
"loss": 0.9924, |
|
"odds_ratio_loss": 0.6447319984436035, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.09278994053602219, |
|
"rewards/margins": 0.024455763399600983, |
|
"rewards/rejected": -0.11724568903446198, |
|
"sft_loss": 0.9278993606567383, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.244721049122027, |
|
"grad_norm": 0.4466889202594757, |
|
"learning_rate": 3.1585524335886335e-06, |
|
"logits/chosen": -0.3700794279575348, |
|
"logits/rejected": -0.37532711029052734, |
|
"logps/chosen": -0.893964409828186, |
|
"logps/rejected": -1.0242712497711182, |
|
"loss": 0.9628, |
|
"odds_ratio_loss": 0.6878638863563538, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.08939644694328308, |
|
"rewards/margins": 0.013030675239861012, |
|
"rewards/rejected": -0.10242712497711182, |
|
"sft_loss": 0.893964409828186, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.262502778395199, |
|
"grad_norm": 0.6432116031646729, |
|
"learning_rate": 3.1134997960900536e-06, |
|
"logits/chosen": -0.3843459486961365, |
|
"logits/rejected": -0.4183478355407715, |
|
"logps/chosen": -0.8787266612052917, |
|
"logps/rejected": -1.1227346658706665, |
|
"loss": 0.9417, |
|
"odds_ratio_loss": 0.6295467615127563, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.08787266910076141, |
|
"rewards/margins": 0.024400796741247177, |
|
"rewards/rejected": -0.11227346956729889, |
|
"sft_loss": 0.8787266612052917, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.2802845076683709, |
|
"grad_norm": 0.47079232335090637, |
|
"learning_rate": 3.0682341299589583e-06, |
|
"logits/chosen": -0.3750189244747162, |
|
"logits/rejected": -0.33040302991867065, |
|
"logps/chosen": -0.9284566640853882, |
|
"logps/rejected": -0.9662970304489136, |
|
"loss": 1.0031, |
|
"odds_ratio_loss": 0.7467560172080994, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.09284567832946777, |
|
"rewards/margins": 0.0037840281147509813, |
|
"rewards/rejected": -0.09662970155477524, |
|
"sft_loss": 0.9284566640853882, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.2980662369415426, |
|
"grad_norm": 0.4881021976470947, |
|
"learning_rate": 3.022771153021201e-06, |
|
"logits/chosen": -0.3772386610507965, |
|
"logits/rejected": -0.3512099087238312, |
|
"logps/chosen": -0.9160524606704712, |
|
"logps/rejected": -1.0388538837432861, |
|
"loss": 0.986, |
|
"odds_ratio_loss": 0.6990936994552612, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.09160524606704712, |
|
"rewards/margins": 0.012280138209462166, |
|
"rewards/rejected": -0.10388537496328354, |
|
"sft_loss": 0.9160524606704712, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.3158479662147144, |
|
"grad_norm": 0.3279300034046173, |
|
"learning_rate": 2.9771266516158625e-06, |
|
"logits/chosen": -0.33211830258369446, |
|
"logits/rejected": -0.3039989471435547, |
|
"logps/chosen": -0.9333264231681824, |
|
"logps/rejected": -1.0419334173202515, |
|
"loss": 1.0054, |
|
"odds_ratio_loss": 0.72088623046875, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09333264082670212, |
|
"rewards/margins": 0.010860702954232693, |
|
"rewards/rejected": -0.10419335216283798, |
|
"sft_loss": 0.9333264231681824, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.3336296954878861, |
|
"grad_norm": 0.311788409948349, |
|
"learning_rate": 2.9313164751136802e-06, |
|
"logits/chosen": -0.3910767436027527, |
|
"logits/rejected": -0.36302170157432556, |
|
"logps/chosen": -0.9149459004402161, |
|
"logps/rejected": -1.0412867069244385, |
|
"loss": 0.9824, |
|
"odds_ratio_loss": 0.6748364567756653, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0914945974946022, |
|
"rewards/margins": 0.01263406127691269, |
|
"rewards/rejected": -0.1041286438703537, |
|
"sft_loss": 0.9149459004402161, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.351411424761058, |
|
"grad_norm": 0.5009350180625916, |
|
"learning_rate": 2.8853565304135956e-06, |
|
"logits/chosen": -0.28646108508110046, |
|
"logits/rejected": -0.3241187632083893, |
|
"logps/chosen": -0.988601803779602, |
|
"logps/rejected": -1.0276473760604858, |
|
"loss": 1.0645, |
|
"odds_ratio_loss": 0.759224534034729, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.09886018931865692, |
|
"rewards/margins": 0.0039045563898980618, |
|
"rewards/rejected": -0.10276474803686142, |
|
"sft_loss": 0.988601803779602, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.36919315403423, |
|
"grad_norm": 0.5821639895439148, |
|
"learning_rate": 2.839262776419313e-06, |
|
"logits/chosen": -0.345294713973999, |
|
"logits/rejected": -0.34865519404411316, |
|
"logps/chosen": -0.9152688980102539, |
|
"logps/rejected": -1.12654709815979, |
|
"loss": 0.9828, |
|
"odds_ratio_loss": 0.6755408644676208, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09152691066265106, |
|
"rewards/margins": 0.021127816289663315, |
|
"rewards/rejected": -0.11265470832586288, |
|
"sft_loss": 0.9152688980102539, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.3869748833074016, |
|
"grad_norm": 0.39795824885368347, |
|
"learning_rate": 2.793051218497817e-06, |
|
"logits/chosen": -0.27542608976364136, |
|
"logits/rejected": -0.27257028222084045, |
|
"logps/chosen": -0.931863009929657, |
|
"logps/rejected": -0.9498918652534485, |
|
"loss": 1.0074, |
|
"odds_ratio_loss": 0.7550782561302185, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.09318631142377853, |
|
"rewards/margins": 0.0018028710037469864, |
|
"rewards/rejected": -0.09498917311429977, |
|
"sft_loss": 0.931863009929657, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.4047566125805735, |
|
"grad_norm": 0.37384262681007385, |
|
"learning_rate": 2.7467379029217437e-06, |
|
"logits/chosen": -0.34524422883987427, |
|
"logits/rejected": -0.36011195182800293, |
|
"logps/chosen": -0.9515836834907532, |
|
"logps/rejected": -1.0694557428359985, |
|
"loss": 1.0211, |
|
"odds_ratio_loss": 0.6952496767044067, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09515835344791412, |
|
"rewards/margins": 0.011787201277911663, |
|
"rewards/rejected": -0.10694557428359985, |
|
"sft_loss": 0.9515836834907532, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.4225383418537452, |
|
"grad_norm": 0.30680692195892334, |
|
"learning_rate": 2.7003389112975546e-06, |
|
"logits/chosen": -0.26400548219680786, |
|
"logits/rejected": -0.20824924111366272, |
|
"logps/chosen": -0.9995955228805542, |
|
"logps/rejected": -1.0734318494796753, |
|
"loss": 1.0721, |
|
"odds_ratio_loss": 0.7255308628082275, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.09995955973863602, |
|
"rewards/margins": 0.007383632007986307, |
|
"rewards/rejected": -0.10734319686889648, |
|
"sft_loss": 0.9995955228805542, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.440320071126917, |
|
"grad_norm": 0.7603825926780701, |
|
"learning_rate": 2.653870354981437e-06, |
|
"logits/chosen": -0.36708512902259827, |
|
"logits/rejected": -0.4067977964878082, |
|
"logps/chosen": -0.869776725769043, |
|
"logps/rejected": -0.9957377314567566, |
|
"loss": 0.9397, |
|
"odds_ratio_loss": 0.6991982460021973, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08697767555713654, |
|
"rewards/margins": 0.012596105225384235, |
|
"rewards/rejected": -0.0995737761259079, |
|
"sft_loss": 0.869776725769043, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.458101800400089, |
|
"grad_norm": 0.8572419881820679, |
|
"learning_rate": 2.6073483694848777e-06, |
|
"logits/chosen": -0.3313853442668915, |
|
"logits/rejected": -0.2504517734050751, |
|
"logps/chosen": -0.9180091619491577, |
|
"logps/rejected": -1.0551806688308716, |
|
"loss": 0.9865, |
|
"odds_ratio_loss": 0.6853691339492798, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.09180092811584473, |
|
"rewards/margins": 0.013717141933739185, |
|
"rewards/rejected": -0.10551806539297104, |
|
"sft_loss": 0.9180091619491577, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.4758835296732609, |
|
"grad_norm": 0.2907600700855255, |
|
"learning_rate": 2.560789108871847e-06, |
|
"logits/chosen": -0.35712695121765137, |
|
"logits/rejected": -0.34705477952957153, |
|
"logps/chosen": -0.9147292971611023, |
|
"logps/rejected": -1.1361644268035889, |
|
"loss": 0.9806, |
|
"odds_ratio_loss": 0.6587303280830383, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09147293865680695, |
|
"rewards/margins": 0.02214350923895836, |
|
"rewards/rejected": -0.113616444170475, |
|
"sft_loss": 0.9147292971611023, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.4936652589464325, |
|
"grad_norm": 0.9957931637763977, |
|
"learning_rate": 2.514208740149544e-06, |
|
"logits/chosen": -0.38370782136917114, |
|
"logits/rejected": -0.372738778591156, |
|
"logps/chosen": -1.0301647186279297, |
|
"logps/rejected": -1.131388783454895, |
|
"loss": 1.1016, |
|
"odds_ratio_loss": 0.7141064405441284, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.10301647335290909, |
|
"rewards/margins": 0.010122401639819145, |
|
"rewards/rejected": -0.11313886940479279, |
|
"sft_loss": 1.0301647186279297, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.5114469882196042, |
|
"grad_norm": 0.3347834050655365, |
|
"learning_rate": 2.46762343765464e-06, |
|
"logits/chosen": -0.33272939920425415, |
|
"logits/rejected": -0.3354397416114807, |
|
"logps/chosen": -0.9821497797966003, |
|
"logps/rejected": -1.1356861591339111, |
|
"loss": 1.0494, |
|
"odds_ratio_loss": 0.672347903251648, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09821496903896332, |
|
"rewards/margins": 0.015353633090853691, |
|
"rewards/rejected": -0.11356861889362335, |
|
"sft_loss": 0.9821497797966003, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.5292287174927761, |
|
"grad_norm": 0.40781450271606445, |
|
"learning_rate": 2.4210493774369903e-06, |
|
"logits/chosen": -0.3659764528274536, |
|
"logits/rejected": -0.34343641996383667, |
|
"logps/chosen": -0.9932387471199036, |
|
"logps/rejected": -1.0735210180282593, |
|
"loss": 1.0663, |
|
"odds_ratio_loss": 0.7305824160575867, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09932386875152588, |
|
"rewards/margins": 0.008028226904571056, |
|
"rewards/rejected": -0.10735210031270981, |
|
"sft_loss": 0.9932387471199036, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.547010446765948, |
|
"grad_norm": 0.33270904421806335, |
|
"learning_rate": 2.374502731642732e-06, |
|
"logits/chosen": -0.33156028389930725, |
|
"logits/rejected": -0.3256151080131531, |
|
"logps/chosen": -0.9762036204338074, |
|
"logps/rejected": -1.0732605457305908, |
|
"loss": 1.0483, |
|
"odds_ratio_loss": 0.7209652662277222, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09762036800384521, |
|
"rewards/margins": 0.009705697186291218, |
|
"rewards/rejected": -0.10732606798410416, |
|
"sft_loss": 0.9762036204338074, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.56479217603912, |
|
"grad_norm": 0.46649253368377686, |
|
"learning_rate": 2.3279996628987556e-06, |
|
"logits/chosen": -0.3505496084690094, |
|
"logits/rejected": -0.3284318149089813, |
|
"logps/chosen": -0.9539216756820679, |
|
"logps/rejected": -1.0178234577178955, |
|
"loss": 1.0269, |
|
"odds_ratio_loss": 0.7295688390731812, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09539216756820679, |
|
"rewards/margins": 0.006390177644789219, |
|
"rewards/rejected": -0.10178234428167343, |
|
"sft_loss": 0.9539216756820679, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.5825739053122916, |
|
"grad_norm": 0.343382865190506, |
|
"learning_rate": 2.281556318700474e-06, |
|
"logits/chosen": -0.2859468460083008, |
|
"logits/rejected": -0.25978535413742065, |
|
"logps/chosen": -0.904071033000946, |
|
"logps/rejected": -0.9673022031784058, |
|
"loss": 0.9788, |
|
"odds_ratio_loss": 0.7473067045211792, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.0904071107506752, |
|
"rewards/margins": 0.00632312148809433, |
|
"rewards/rejected": -0.09673022478818893, |
|
"sft_loss": 0.904071033000946, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.6003556345854635, |
|
"grad_norm": 0.6206201314926147, |
|
"learning_rate": 2.2351888258048408e-06, |
|
"logits/chosen": -0.3074144423007965, |
|
"logits/rejected": -0.2645527720451355, |
|
"logps/chosen": -0.8916131854057312, |
|
"logps/rejected": -0.9986615180969238, |
|
"loss": 0.9603, |
|
"odds_ratio_loss": 0.6866299510002136, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08916132152080536, |
|
"rewards/margins": 0.010704840533435345, |
|
"rewards/rejected": -0.09986615926027298, |
|
"sft_loss": 0.8916131854057312, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.6181373638586352, |
|
"grad_norm": 0.3601900339126587, |
|
"learning_rate": 2.188913284630584e-06, |
|
"logits/chosen": -0.33852243423461914, |
|
"logits/rejected": -0.3135743737220764, |
|
"logps/chosen": -0.9911006689071655, |
|
"logps/rejected": -1.016789197921753, |
|
"loss": 1.0679, |
|
"odds_ratio_loss": 0.7680201530456543, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09911007434129715, |
|
"rewards/margins": 0.0025688547175377607, |
|
"rewards/rejected": -0.10167893022298813, |
|
"sft_loss": 0.9911006689071655, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.635919093131807, |
|
"grad_norm": 0.6057630777359009, |
|
"learning_rate": 2.1427457636675652e-06, |
|
"logits/chosen": -0.3320189118385315, |
|
"logits/rejected": -0.28204983472824097, |
|
"logps/chosen": -1.0480351448059082, |
|
"logps/rejected": -1.1421617269515991, |
|
"loss": 1.1202, |
|
"odds_ratio_loss": 0.7219060659408569, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.10480351746082306, |
|
"rewards/margins": 0.00941266119480133, |
|
"rewards/rejected": -0.11421617120504379, |
|
"sft_loss": 1.0480351448059082, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.653700822404979, |
|
"grad_norm": 0.27687886357307434, |
|
"learning_rate": 2.096702293897247e-06, |
|
"logits/chosen": -0.3558569550514221, |
|
"logits/rejected": -0.4100232720375061, |
|
"logps/chosen": -0.9075578451156616, |
|
"logps/rejected": -1.1192221641540527, |
|
"loss": 0.9773, |
|
"odds_ratio_loss": 0.6971360445022583, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.09075579047203064, |
|
"rewards/margins": 0.02116643264889717, |
|
"rewards/rejected": -0.11192221939563751, |
|
"sft_loss": 0.9075578451156616, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.6714825516781509, |
|
"grad_norm": 0.5104541182518005, |
|
"learning_rate": 2.0507988632261672e-06, |
|
"logits/chosen": -0.37269848585128784, |
|
"logits/rejected": -0.3488038182258606, |
|
"logps/chosen": -0.8780601620674133, |
|
"logps/rejected": -1.035788893699646, |
|
"loss": 0.9453, |
|
"odds_ratio_loss": 0.6724425554275513, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08780601620674133, |
|
"rewards/margins": 0.015772882848978043, |
|
"rewards/rejected": -0.10357888787984848, |
|
"sft_loss": 0.8780601620674133, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.6892642809513225, |
|
"grad_norm": 1.108080506324768, |
|
"learning_rate": 2.005051410934382e-06, |
|
"logits/chosen": -0.3843027949333191, |
|
"logits/rejected": -0.36695486307144165, |
|
"logps/chosen": -1.0294411182403564, |
|
"logps/rejected": -1.073974847793579, |
|
"loss": 1.1057, |
|
"odds_ratio_loss": 0.7625271081924438, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.10294412076473236, |
|
"rewards/margins": 0.004453369881957769, |
|
"rewards/rejected": -0.10739749670028687, |
|
"sft_loss": 1.0294411182403564, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.7070460102244942, |
|
"grad_norm": 0.6668155789375305, |
|
"learning_rate": 1.9594758221407843e-06, |
|
"logits/chosen": -0.30207034945487976, |
|
"logits/rejected": -0.31365981698036194, |
|
"logps/chosen": -0.8924224972724915, |
|
"logps/rejected": -1.0662165880203247, |
|
"loss": 0.9564, |
|
"odds_ratio_loss": 0.6395965218544006, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.08924224227666855, |
|
"rewards/margins": 0.017379416152834892, |
|
"rewards/rejected": -0.10662166774272919, |
|
"sft_loss": 0.8924224972724915, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.724827739497666, |
|
"grad_norm": 0.5297231674194336, |
|
"learning_rate": 1.9140879222872408e-06, |
|
"logits/chosen": -0.3790926933288574, |
|
"logits/rejected": -0.34034663438796997, |
|
"logps/chosen": -0.9109382629394531, |
|
"logps/rejected": -0.9725145101547241, |
|
"loss": 0.9864, |
|
"odds_ratio_loss": 0.7550500631332397, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.09109383821487427, |
|
"rewards/margins": 0.006157620809972286, |
|
"rewards/rejected": -0.09725145250558853, |
|
"sft_loss": 0.9109382629394531, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.742609468770838, |
|
"grad_norm": 0.2978646457195282, |
|
"learning_rate": 1.8689034716434346e-06, |
|
"logits/chosen": -0.3594937026500702, |
|
"logits/rejected": -0.3786514699459076, |
|
"logps/chosen": -0.9791936874389648, |
|
"logps/rejected": -1.0208795070648193, |
|
"loss": 1.054, |
|
"odds_ratio_loss": 0.7475694417953491, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.09791935980319977, |
|
"rewards/margins": 0.0041685826145112514, |
|
"rewards/rejected": -0.10208795219659805, |
|
"sft_loss": 0.9791936874389648, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.76039119804401, |
|
"grad_norm": 0.3484848439693451, |
|
"learning_rate": 1.8239381598343576e-06, |
|
"logits/chosen": -0.29449883103370667, |
|
"logits/rejected": -0.3054262697696686, |
|
"logps/chosen": -0.9115015864372253, |
|
"logps/rejected": -1.0031999349594116, |
|
"loss": 0.9826, |
|
"odds_ratio_loss": 0.7106297016143799, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.09115016460418701, |
|
"rewards/margins": 0.00916983187198639, |
|
"rewards/rejected": -0.1003199964761734, |
|
"sft_loss": 0.9115015864372253, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.7781729273171816, |
|
"grad_norm": 2.2374985218048096, |
|
"learning_rate": 1.779207600392312e-06, |
|
"logits/chosen": -0.2810733914375305, |
|
"logits/rejected": -0.27120235562324524, |
|
"logps/chosen": -0.9607506990432739, |
|
"logps/rejected": -1.0408788919448853, |
|
"loss": 1.0328, |
|
"odds_ratio_loss": 0.7200591564178467, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.09607508033514023, |
|
"rewards/margins": 0.008012807928025723, |
|
"rewards/rejected": -0.10408788919448853, |
|
"sft_loss": 0.9607506990432739, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.7781729273171816, |
|
"eval_logits/chosen": -0.33078742027282715, |
|
"eval_logits/rejected": -0.29791274666786194, |
|
"eval_logps/chosen": -0.9451074004173279, |
|
"eval_logps/rejected": -1.0856181383132935, |
|
"eval_loss": 1.0125839710235596, |
|
"eval_odds_ratio_loss": 0.6747645735740662, |
|
"eval_rewards/accuracies": 0.515999972820282, |
|
"eval_rewards/chosen": -0.0945107489824295, |
|
"eval_rewards/margins": 0.014051074162125587, |
|
"eval_rewards/rejected": -0.10856182873249054, |
|
"eval_runtime": 185.8537, |
|
"eval_samples_per_second": 5.381, |
|
"eval_sft_loss": 0.9451074004173279, |
|
"eval_steps_per_second": 2.69, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.7959546565903532, |
|
"grad_norm": 0.7795166373252869, |
|
"learning_rate": 1.7347273253353552e-06, |
|
"logits/chosen": -0.33356940746307373, |
|
"logits/rejected": -0.3380289077758789, |
|
"logps/chosen": -0.918900191783905, |
|
"logps/rejected": -0.9768841862678528, |
|
"loss": 0.9932, |
|
"odds_ratio_loss": 0.7426038980484009, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09189002215862274, |
|
"rewards/margins": 0.005798395723104477, |
|
"rewards/rejected": -0.09768841415643692, |
|
"sft_loss": 0.918900191783905, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.8137363858635251, |
|
"grad_norm": 0.8157365322113037, |
|
"learning_rate": 1.690512779774029e-06, |
|
"logits/chosen": -0.3094736635684967, |
|
"logits/rejected": -0.28969138860702515, |
|
"logps/chosen": -0.9715908765792847, |
|
"logps/rejected": -1.1499989032745361, |
|
"loss": 1.037, |
|
"odds_ratio_loss": 0.6542772054672241, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.09715909510850906, |
|
"rewards/margins": 0.017840798944234848, |
|
"rewards/rejected": -0.11499989032745361, |
|
"sft_loss": 0.9715908765792847, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.831518115136697, |
|
"grad_norm": 0.5331993103027344, |
|
"learning_rate": 1.6465793165482838e-06, |
|
"logits/chosen": -0.274508535861969, |
|
"logits/rejected": -0.26048415899276733, |
|
"logps/chosen": -0.9679173231124878, |
|
"logps/rejected": -1.0533314943313599, |
|
"loss": 1.0376, |
|
"odds_ratio_loss": 0.6963869333267212, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09679173678159714, |
|
"rewards/margins": 0.008541420102119446, |
|
"rewards/rejected": -0.10533314943313599, |
|
"sft_loss": 0.9679173231124878, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.849299844409869, |
|
"grad_norm": 0.4930827021598816, |
|
"learning_rate": 1.6029421908964305e-06, |
|
"logits/chosen": -0.3850288391113281, |
|
"logits/rejected": -0.3791029155254364, |
|
"logps/chosen": -0.8834483027458191, |
|
"logps/rejected": -1.2469079494476318, |
|
"loss": 0.9502, |
|
"odds_ratio_loss": 0.6672720313072205, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.08834483474493027, |
|
"rewards/margins": 0.03634597733616829, |
|
"rewards/rejected": -0.12469079345464706, |
|
"sft_loss": 0.8834483027458191, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.8670815736830408, |
|
"grad_norm": 0.7664922475814819, |
|
"learning_rate": 1.559616555157985e-06, |
|
"logits/chosen": -0.30128011107444763, |
|
"logits/rejected": -0.33186617493629456, |
|
"logps/chosen": -0.9356236457824707, |
|
"logps/rejected": -1.047398328781128, |
|
"loss": 1.0066, |
|
"odds_ratio_loss": 0.7096288800239563, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.09356234967708588, |
|
"rewards/margins": 0.01117746438831091, |
|
"rewards/rejected": -0.10473982989788055, |
|
"sft_loss": 0.9356236457824707, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.8848633029562125, |
|
"grad_norm": 0.465348482131958, |
|
"learning_rate": 1.516617453512252e-06, |
|
"logits/chosen": -0.36206910014152527, |
|
"logits/rejected": -0.34239286184310913, |
|
"logps/chosen": -0.9592390060424805, |
|
"logps/rejected": -1.0232237577438354, |
|
"loss": 1.0338, |
|
"odds_ratio_loss": 0.7456762194633484, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.09592391550540924, |
|
"rewards/margins": 0.006398468278348446, |
|
"rewards/rejected": -0.10232237726449966, |
|
"sft_loss": 0.9592390060424805, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.9026450322293842, |
|
"grad_norm": 0.830959677696228, |
|
"learning_rate": 1.473959816754449e-06, |
|
"logits/chosen": -0.39980772137641907, |
|
"logits/rejected": -0.3537663221359253, |
|
"logps/chosen": -0.920127272605896, |
|
"logps/rejected": -0.9525257349014282, |
|
"loss": 0.9942, |
|
"odds_ratio_loss": 0.7409034967422485, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.09201272577047348, |
|
"rewards/margins": 0.0032398372422903776, |
|
"rewards/rejected": -0.09525256603956223, |
|
"sft_loss": 0.920127272605896, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.920426761502556, |
|
"grad_norm": 0.442227303981781, |
|
"learning_rate": 1.4316584571112213e-06, |
|
"logits/chosen": -0.23950842022895813, |
|
"logits/rejected": -0.25979962944984436, |
|
"logps/chosen": -0.9493446350097656, |
|
"logps/rejected": -1.02411687374115, |
|
"loss": 1.022, |
|
"odds_ratio_loss": 0.7267680764198303, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09493447840213776, |
|
"rewards/margins": 0.007477219216525555, |
|
"rewards/rejected": -0.10241168737411499, |
|
"sft_loss": 0.9493446350097656, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.938208490775728, |
|
"grad_norm": 0.4206017851829529, |
|
"learning_rate": 1.389728063097306e-06, |
|
"logits/chosen": -0.23708462715148926, |
|
"logits/rejected": -0.24299781024456024, |
|
"logps/chosen": -0.9439695477485657, |
|
"logps/rejected": -1.1116364002227783, |
|
"loss": 1.0118, |
|
"odds_ratio_loss": 0.6782708764076233, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09439694881439209, |
|
"rewards/margins": 0.016766689717769623, |
|
"rewards/rejected": -0.1111636534333229, |
|
"sft_loss": 0.9439695477485657, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.9559902200488999, |
|
"grad_norm": 0.3826051354408264, |
|
"learning_rate": 1.348183194415179e-06, |
|
"logits/chosen": -0.332774817943573, |
|
"logits/rejected": -0.35824882984161377, |
|
"logps/chosen": -0.9340184926986694, |
|
"logps/rejected": -1.110877275466919, |
|
"loss": 1.0005, |
|
"odds_ratio_loss": 0.6648778915405273, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.09340184926986694, |
|
"rewards/margins": 0.01768588088452816, |
|
"rewards/rejected": -0.11108773946762085, |
|
"sft_loss": 0.9340184926986694, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.9737719493220716, |
|
"grad_norm": 0.3005673587322235, |
|
"learning_rate": 1.3070382768994015e-06, |
|
"logits/chosen": -0.30200204253196716, |
|
"logits/rejected": -0.3130107522010803, |
|
"logps/chosen": -0.9192419052124023, |
|
"logps/rejected": -0.9889400601387024, |
|
"loss": 0.9898, |
|
"odds_ratio_loss": 0.7055012583732605, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.09192419052124023, |
|
"rewards/margins": 0.006969820708036423, |
|
"rewards/rejected": -0.09889401495456696, |
|
"sft_loss": 0.9192419052124023, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.9915536785952432, |
|
"grad_norm": 0.4379596710205078, |
|
"learning_rate": 1.2663075975074746e-06, |
|
"logits/chosen": -0.3314594626426697, |
|
"logits/rejected": -0.33315131068229675, |
|
"logps/chosen": -0.9054539799690247, |
|
"logps/rejected": -1.0939247608184814, |
|
"loss": 0.9734, |
|
"odds_ratio_loss": 0.6797955632209778, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.0905454009771347, |
|
"rewards/margins": 0.018847089260816574, |
|
"rewards/rejected": -0.10939247906208038, |
|
"sft_loss": 0.9054539799690247, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.009335407868415, |
|
"grad_norm": 0.6127385497093201, |
|
"learning_rate": 1.2260052993589034e-06, |
|
"logits/chosen": -0.382732093334198, |
|
"logits/rejected": -0.36521822214126587, |
|
"logps/chosen": -1.0369594097137451, |
|
"logps/rejected": -1.0331060886383057, |
|
"loss": 1.1183, |
|
"odds_ratio_loss": 0.8130975961685181, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.10369595140218735, |
|
"rewards/margins": -0.0003853384405374527, |
|
"rewards/rejected": -0.10331060737371445, |
|
"sft_loss": 1.0369594097137451, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.027117137141587, |
|
"grad_norm": 0.3373187780380249, |
|
"learning_rate": 1.1861453768242099e-06, |
|
"logits/chosen": -0.3635232448577881, |
|
"logits/rejected": -0.3613505959510803, |
|
"logps/chosen": -0.9056431651115417, |
|
"logps/rejected": -1.0306495428085327, |
|
"loss": 0.9749, |
|
"odds_ratio_loss": 0.6926708221435547, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0905643105506897, |
|
"rewards/margins": 0.012500641867518425, |
|
"rewards/rejected": -0.10306496918201447, |
|
"sft_loss": 0.9056431651115417, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.044898866414759, |
|
"grad_norm": 0.9102166891098022, |
|
"learning_rate": 1.1467416706655982e-06, |
|
"logits/chosen": -0.2888937294483185, |
|
"logits/rejected": -0.26064902544021606, |
|
"logps/chosen": -0.9796838760375977, |
|
"logps/rejected": -1.1222679615020752, |
|
"loss": 1.0522, |
|
"odds_ratio_loss": 0.7250452637672424, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09796838462352753, |
|
"rewards/margins": 0.014258405193686485, |
|
"rewards/rejected": -0.11222679913043976, |
|
"sft_loss": 0.9796838760375977, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.062680595687931, |
|
"grad_norm": 0.3294011652469635, |
|
"learning_rate": 1.1078078632309559e-06, |
|
"logits/chosen": -0.34561508893966675, |
|
"logits/rejected": -0.3147248923778534, |
|
"logps/chosen": -0.9134725332260132, |
|
"logps/rejected": -1.0285111665725708, |
|
"loss": 0.9808, |
|
"odds_ratio_loss": 0.6730369329452515, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.09134725481271744, |
|
"rewards/margins": 0.011503859423100948, |
|
"rewards/rejected": -0.10285113006830215, |
|
"sft_loss": 0.9134725332260132, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.0804623249611023, |
|
"grad_norm": 0.34308087825775146, |
|
"learning_rate": 1.0693574737028627e-06, |
|
"logits/chosen": -0.3372167944908142, |
|
"logits/rejected": -0.33946290612220764, |
|
"logps/chosen": -0.9201191067695618, |
|
"logps/rejected": -1.0031434297561646, |
|
"loss": 0.9946, |
|
"odds_ratio_loss": 0.744364321231842, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.09201192110776901, |
|
"rewards/margins": 0.008302421309053898, |
|
"rewards/rejected": -0.10031434148550034, |
|
"sft_loss": 0.9201191067695618, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.098244054234274, |
|
"grad_norm": 0.5865955948829651, |
|
"learning_rate": 1.0314038534042586e-06, |
|
"logits/chosen": -0.2901017963886261, |
|
"logits/rejected": -0.32853323221206665, |
|
"logps/chosen": -0.9257968068122864, |
|
"logps/rejected": -1.0451035499572754, |
|
"loss": 0.9964, |
|
"odds_ratio_loss": 0.7055808901786804, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0925796777009964, |
|
"rewards/margins": 0.01193068828433752, |
|
"rewards/rejected": -0.10451038181781769, |
|
"sft_loss": 0.9257968068122864, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.116025783507446, |
|
"grad_norm": 0.41964584589004517, |
|
"learning_rate": 9.939601811623946e-07, |
|
"logits/chosen": -0.31542712450027466, |
|
"logits/rejected": -0.30006498098373413, |
|
"logps/chosen": -0.9362471699714661, |
|
"logps/rejected": -1.0245290994644165, |
|
"loss": 1.0084, |
|
"odds_ratio_loss": 0.7219125032424927, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09362472593784332, |
|
"rewards/margins": 0.008828198537230492, |
|
"rewards/rejected": -0.10245291888713837, |
|
"sft_loss": 0.9362471699714661, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.133807512780618, |
|
"grad_norm": 0.48077794909477234, |
|
"learning_rate": 9.570394587326825e-07, |
|
"logits/chosen": -0.29744619131088257, |
|
"logits/rejected": -0.34743356704711914, |
|
"logps/chosen": -0.9422229528427124, |
|
"logps/rejected": -1.1074718236923218, |
|
"loss": 1.0093, |
|
"odds_ratio_loss": 0.6704057455062866, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.094222292304039, |
|
"rewards/margins": 0.016524888575077057, |
|
"rewards/rejected": -0.11074719578027725, |
|
"sft_loss": 0.9422229528427124, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.15158924205379, |
|
"grad_norm": 0.3064732253551483, |
|
"learning_rate": 9.206545062840302e-07, |
|
"logits/chosen": -0.2666998505592346, |
|
"logits/rejected": -0.3201262652873993, |
|
"logps/chosen": -0.8927067518234253, |
|
"logps/rejected": -1.0634257793426514, |
|
"loss": 0.9575, |
|
"odds_ratio_loss": 0.6478100419044495, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.08927067369222641, |
|
"rewards/margins": 0.017071900889277458, |
|
"rewards/rejected": -0.10634257644414902, |
|
"sft_loss": 0.8927067518234253, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.1693709713269618, |
|
"grad_norm": 0.3534330725669861, |
|
"learning_rate": 8.848179579472285e-07, |
|
"logits/chosen": -0.3102249801158905, |
|
"logits/rejected": -0.2955402433872223, |
|
"logps/chosen": -0.9082851409912109, |
|
"logps/rejected": -0.9553133845329285, |
|
"loss": 0.9795, |
|
"odds_ratio_loss": 0.7121320962905884, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09082850813865662, |
|
"rewards/margins": 0.00470283068716526, |
|
"rewards/rejected": -0.09553134441375732, |
|
"sft_loss": 0.9082851409912109, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.1871527006001332, |
|
"grad_norm": 0.6444931626319885, |
|
"learning_rate": 8.495422574279403e-07, |
|
"logits/chosen": -0.3936762809753418, |
|
"logits/rejected": -0.42016810178756714, |
|
"logps/chosen": -0.8496967554092407, |
|
"logps/rejected": -1.0362155437469482, |
|
"loss": 0.9135, |
|
"odds_ratio_loss": 0.6377807855606079, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.08496967703104019, |
|
"rewards/margins": 0.018651869148015976, |
|
"rewards/rejected": -0.10362155735492706, |
|
"sft_loss": 0.8496967554092407, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.204934429873305, |
|
"grad_norm": 0.4805600941181183, |
|
"learning_rate": 8.148396536858063e-07, |
|
"logits/chosen": -0.3237206041812897, |
|
"logits/rejected": -0.3143185079097748, |
|
"logps/chosen": -0.9960983991622925, |
|
"logps/rejected": -1.1420572996139526, |
|
"loss": 1.0672, |
|
"odds_ratio_loss": 0.7113397121429443, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0996098443865776, |
|
"rewards/margins": 0.014595886692404747, |
|
"rewards/rejected": -0.1142057403922081, |
|
"sft_loss": 0.9960983991622925, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.222716159146477, |
|
"grad_norm": 0.676315188407898, |
|
"learning_rate": 7.807221966811815e-07, |
|
"logits/chosen": -0.29545170068740845, |
|
"logits/rejected": -0.31817343831062317, |
|
"logps/chosen": -0.9420124292373657, |
|
"logps/rejected": -1.0276824235916138, |
|
"loss": 1.0181, |
|
"odds_ratio_loss": 0.7609573006629944, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.0942012369632721, |
|
"rewards/margins": 0.008567007258534431, |
|
"rewards/rejected": -0.10276825726032257, |
|
"sft_loss": 0.9420124292373657, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.240497888419649, |
|
"grad_norm": 0.3943430781364441, |
|
"learning_rate": 7.47201733190962e-07, |
|
"logits/chosen": -0.3520922362804413, |
|
"logits/rejected": -0.3318483829498291, |
|
"logps/chosen": -0.8970060348510742, |
|
"logps/rejected": -0.9855879545211792, |
|
"loss": 0.9669, |
|
"odds_ratio_loss": 0.6993352174758911, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0897006094455719, |
|
"rewards/margins": 0.008858194574713707, |
|
"rewards/rejected": -0.09855880588293076, |
|
"sft_loss": 0.8970060348510742, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.258279617692821, |
|
"grad_norm": 0.5184921026229858, |
|
"learning_rate": 7.142899026949721e-07, |
|
"logits/chosen": -0.33211636543273926, |
|
"logits/rejected": -0.3313821256160736, |
|
"logps/chosen": -0.9101552963256836, |
|
"logps/rejected": -0.9938360452651978, |
|
"loss": 0.9798, |
|
"odds_ratio_loss": 0.6968866586685181, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.09101552516222, |
|
"rewards/margins": 0.00836807768791914, |
|
"rewards/rejected": -0.09938360750675201, |
|
"sft_loss": 0.9101552963256836, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.2760613469659923, |
|
"grad_norm": 1.8007909059524536, |
|
"learning_rate": 6.819981333343273e-07, |
|
"logits/chosen": -0.3704894185066223, |
|
"logits/rejected": -0.3426709771156311, |
|
"logps/chosen": -0.9317655563354492, |
|
"logps/rejected": -1.0302845239639282, |
|
"loss": 1.003, |
|
"odds_ratio_loss": 0.7128146886825562, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09317655861377716, |
|
"rewards/margins": 0.009851890616118908, |
|
"rewards/rejected": -0.10302845388650894, |
|
"sft_loss": 0.9317655563354492, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.293843076239164, |
|
"grad_norm": 0.4554091989994049, |
|
"learning_rate": 6.503376379431839e-07, |
|
"logits/chosen": -0.2947995066642761, |
|
"logits/rejected": -0.279682457447052, |
|
"logps/chosen": -0.9925037622451782, |
|
"logps/rejected": -0.9870964884757996, |
|
"loss": 1.068, |
|
"odds_ratio_loss": 0.7550127506256104, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.09925039112567902, |
|
"rewards/margins": -0.0005407325807027519, |
|
"rewards/rejected": -0.09870964288711548, |
|
"sft_loss": 0.9925037622451782, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.311624805512336, |
|
"grad_norm": 1.7697697877883911, |
|
"learning_rate": 6.193194101552502e-07, |
|
"logits/chosen": -0.31604236364364624, |
|
"logits/rejected": -0.35974448919296265, |
|
"logps/chosen": -0.936480700969696, |
|
"logps/rejected": -1.0702247619628906, |
|
"loss": 1.002, |
|
"odds_ratio_loss": 0.655421793460846, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.09364806860685349, |
|
"rewards/margins": 0.013374416157603264, |
|
"rewards/rejected": -0.1070224866271019, |
|
"sft_loss": 0.936480700969696, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.329406534785508, |
|
"grad_norm": 0.6282922625541687, |
|
"learning_rate": 5.889542205864083e-07, |
|
"logits/chosen": -0.3355167806148529, |
|
"logits/rejected": -0.3377595543861389, |
|
"logps/chosen": -0.9515066146850586, |
|
"logps/rejected": -1.0681602954864502, |
|
"loss": 1.0205, |
|
"odds_ratio_loss": 0.6903635859489441, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0951506644487381, |
|
"rewards/margins": 0.01166537031531334, |
|
"rewards/rejected": -0.10681603848934174, |
|
"sft_loss": 0.9515066146850586, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.34718826405868, |
|
"grad_norm": 0.3864741027355194, |
|
"learning_rate": 5.592526130947862e-07, |
|
"logits/chosen": -0.31521058082580566, |
|
"logits/rejected": -0.3186022937297821, |
|
"logps/chosen": -0.9329264760017395, |
|
"logps/rejected": -1.0726194381713867, |
|
"loss": 1.0056, |
|
"odds_ratio_loss": 0.7264095544815063, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.09329266101121902, |
|
"rewards/margins": 0.013969297520816326, |
|
"rewards/rejected": -0.10726194083690643, |
|
"sft_loss": 0.9329264760017395, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.3649699933318518, |
|
"grad_norm": 0.8674092292785645, |
|
"learning_rate": 5.302249011195507e-07, |
|
"logits/chosen": -0.3717043995857239, |
|
"logits/rejected": -0.3457496166229248, |
|
"logps/chosen": -0.9407739639282227, |
|
"logps/rejected": -0.9671589136123657, |
|
"loss": 1.015, |
|
"odds_ratio_loss": 0.7421091198921204, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09407740086317062, |
|
"rewards/margins": 0.0026384838856756687, |
|
"rewards/rejected": -0.09671588987112045, |
|
"sft_loss": 0.9407739639282227, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.382751722605023, |
|
"grad_norm": 0.8201255798339844, |
|
"learning_rate": 5.018811640997307e-07, |
|
"logits/chosen": -0.3262820839881897, |
|
"logits/rejected": -0.28208276629447937, |
|
"logps/chosen": -0.9741110801696777, |
|
"logps/rejected": -1.1972548961639404, |
|
"loss": 1.0409, |
|
"odds_ratio_loss": 0.6679055690765381, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.0974111258983612, |
|
"rewards/margins": 0.022314375266432762, |
|
"rewards/rejected": -0.11972548812627792, |
|
"sft_loss": 0.9741110801696777, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.400533451878195, |
|
"grad_norm": 0.3292596638202667, |
|
"learning_rate": 4.7423124397427105e-07, |
|
"logits/chosen": -0.37047189474105835, |
|
"logits/rejected": -0.31794866919517517, |
|
"logps/chosen": -0.9531441926956177, |
|
"logps/rejected": -1.015749216079712, |
|
"loss": 1.0256, |
|
"odds_ratio_loss": 0.7250458002090454, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.09531442075967789, |
|
"rewards/margins": 0.006260508205741644, |
|
"rewards/rejected": -0.10157492011785507, |
|
"sft_loss": 0.9531441926956177, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.418315181151367, |
|
"grad_norm": 0.4776778817176819, |
|
"learning_rate": 4.472847417645787e-07, |
|
"logits/chosen": -0.2806258201599121, |
|
"logits/rejected": -0.3024401366710663, |
|
"logps/chosen": -0.9200853109359741, |
|
"logps/rejected": -1.114600419998169, |
|
"loss": 0.9877, |
|
"odds_ratio_loss": 0.6760807633399963, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.09200852364301682, |
|
"rewards/margins": 0.01945151947438717, |
|
"rewards/rejected": -0.11146005243062973, |
|
"sft_loss": 0.9200853109359741, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.436096910424539, |
|
"grad_norm": 0.3043542802333832, |
|
"learning_rate": 4.210510142406993e-07, |
|
"logits/chosen": -0.32727354764938354, |
|
"logits/rejected": -0.3754233717918396, |
|
"logps/chosen": -0.9101996421813965, |
|
"logps/rejected": -1.0942609310150146, |
|
"loss": 0.977, |
|
"odds_ratio_loss": 0.6675896644592285, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09101996570825577, |
|
"rewards/margins": 0.018406113609671593, |
|
"rewards/rejected": -0.10942608118057251, |
|
"sft_loss": 0.9101996421813965, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.4538786396977104, |
|
"grad_norm": 0.4151700437068939, |
|
"learning_rate": 3.9553917067232966e-07, |
|
"logits/chosen": -0.33969706296920776, |
|
"logits/rejected": -0.36881956458091736, |
|
"logps/chosen": -0.9399350881576538, |
|
"logps/rejected": -1.071777105331421, |
|
"loss": 1.0133, |
|
"odds_ratio_loss": 0.7333552241325378, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09399349987506866, |
|
"rewards/margins": 0.013184216804802418, |
|
"rewards/rejected": -0.10717771202325821, |
|
"sft_loss": 0.9399350881576538, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.4716603689708823, |
|
"grad_norm": 0.4568045437335968, |
|
"learning_rate": 3.707580696657509e-07, |
|
"logits/chosen": -0.2799975275993347, |
|
"logits/rejected": -0.30841827392578125, |
|
"logps/chosen": -0.9116710424423218, |
|
"logps/rejected": -0.9513956308364868, |
|
"loss": 0.9844, |
|
"odds_ratio_loss": 0.7269908785820007, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.09116710722446442, |
|
"rewards/margins": 0.003972449339926243, |
|
"rewards/rejected": -0.09513955563306808, |
|
"sft_loss": 0.9116710424423218, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.489442098244054, |
|
"grad_norm": 0.425468772649765, |
|
"learning_rate": 3.4671631608781815e-07, |
|
"logits/chosen": -0.3139536380767822, |
|
"logits/rejected": -0.32965949177742004, |
|
"logps/chosen": -0.9703924059867859, |
|
"logps/rejected": -1.079158067703247, |
|
"loss": 1.0439, |
|
"odds_ratio_loss": 0.7353022694587708, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.09703925251960754, |
|
"rewards/margins": 0.010876556858420372, |
|
"rewards/rejected": -0.10791579633951187, |
|
"sft_loss": 0.9703924059867859, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.507223827517226, |
|
"grad_norm": 0.6458228826522827, |
|
"learning_rate": 3.234222580780405e-07, |
|
"logits/chosen": -0.3632466197013855, |
|
"logits/rejected": -0.3340745270252228, |
|
"logps/chosen": -0.942143440246582, |
|
"logps/rejected": -0.9809234738349915, |
|
"loss": 1.0153, |
|
"odds_ratio_loss": 0.7311049103736877, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09421434998512268, |
|
"rewards/margins": 0.0038779997266829014, |
|
"rewards/rejected": -0.09809235483407974, |
|
"sft_loss": 0.942143440246582, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.525005556790398, |
|
"grad_norm": 0.7571399211883545, |
|
"learning_rate": 3.0088398414982375e-07, |
|
"logits/chosen": -0.40216293931007385, |
|
"logits/rejected": -0.3554636836051941, |
|
"logps/chosen": -0.9506216049194336, |
|
"logps/rejected": -1.1040918827056885, |
|
"loss": 1.0238, |
|
"odds_ratio_loss": 0.7313109636306763, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.09506215900182724, |
|
"rewards/margins": 0.015347021631896496, |
|
"rewards/rejected": -0.11040918529033661, |
|
"sft_loss": 0.9506216049194336, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.54278728606357, |
|
"grad_norm": 0.41928017139434814, |
|
"learning_rate": 2.7910932038184487e-07, |
|
"logits/chosen": -0.38035768270492554, |
|
"logits/rejected": -0.43410953879356384, |
|
"logps/chosen": -0.9504894018173218, |
|
"logps/rejected": -1.033362627029419, |
|
"loss": 1.0219, |
|
"odds_ratio_loss": 0.7138369083404541, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0950489416718483, |
|
"rewards/margins": 0.008287337608635426, |
|
"rewards/rejected": -0.10333627462387085, |
|
"sft_loss": 0.9504894018173218, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.5605690153367417, |
|
"grad_norm": 0.6664097905158997, |
|
"learning_rate": 2.5810582770057325e-07, |
|
"logits/chosen": -0.3502410054206848, |
|
"logits/rejected": -0.31972765922546387, |
|
"logps/chosen": -0.912204384803772, |
|
"logps/rejected": -1.0270380973815918, |
|
"loss": 0.9827, |
|
"odds_ratio_loss": 0.7054314613342285, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.09122045338153839, |
|
"rewards/margins": 0.011483349837362766, |
|
"rewards/rejected": -0.10270379483699799, |
|
"sft_loss": 0.912204384803772, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.578350744609913, |
|
"grad_norm": 0.5214207768440247, |
|
"learning_rate": 2.3788079925484402e-07, |
|
"logits/chosen": -0.2704157829284668, |
|
"logits/rejected": -0.30042511224746704, |
|
"logps/chosen": -0.980503261089325, |
|
"logps/rejected": -1.0476016998291016, |
|
"loss": 1.054, |
|
"odds_ratio_loss": 0.7349393963813782, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0980503261089325, |
|
"rewards/margins": 0.006709852255880833, |
|
"rewards/rejected": -0.10476018488407135, |
|
"sft_loss": 0.980503261089325, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.596132473883085, |
|
"grad_norm": 0.3559114336967468, |
|
"learning_rate": 2.1844125788342661e-07, |
|
"logits/chosen": -0.3745304048061371, |
|
"logits/rejected": -0.3963877558708191, |
|
"logps/chosen": -0.8978282809257507, |
|
"logps/rejected": -1.1463072299957275, |
|
"loss": 0.966, |
|
"odds_ratio_loss": 0.6815627813339233, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.08978282660245895, |
|
"rewards/margins": 0.02484789676964283, |
|
"rewards/rejected": -0.11463073641061783, |
|
"sft_loss": 0.8978282809257507, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.613914203156257, |
|
"grad_norm": 0.4206191599369049, |
|
"learning_rate": 1.9979395367644428e-07, |
|
"logits/chosen": -0.3081280589103699, |
|
"logits/rejected": -0.2860923111438751, |
|
"logps/chosen": -0.8848710060119629, |
|
"logps/rejected": -1.030397653579712, |
|
"loss": 0.9502, |
|
"odds_ratio_loss": 0.6536397337913513, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08848710358142853, |
|
"rewards/margins": 0.014552672393620014, |
|
"rewards/rejected": -0.10303977876901627, |
|
"sft_loss": 0.8848710060119629, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.631695932429429, |
|
"grad_norm": 0.6648186445236206, |
|
"learning_rate": 1.81945361631512e-07, |
|
"logits/chosen": -0.3387419283390045, |
|
"logits/rejected": -0.2922862768173218, |
|
"logps/chosen": -0.927925705909729, |
|
"logps/rejected": -0.9954597353935242, |
|
"loss": 1.0003, |
|
"odds_ratio_loss": 0.7234224081039429, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0927925705909729, |
|
"rewards/margins": 0.006753397174179554, |
|
"rewards/rejected": -0.09954597055912018, |
|
"sft_loss": 0.927925705909729, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.6494776617026004, |
|
"grad_norm": 0.5596628189086914, |
|
"learning_rate": 1.6490167940538343e-07, |
|
"logits/chosen": -0.3137277066707611, |
|
"logits/rejected": -0.3255840241909027, |
|
"logps/chosen": -0.9538249969482422, |
|
"logps/rejected": -1.0488290786743164, |
|
"loss": 1.0255, |
|
"odds_ratio_loss": 0.7165058851242065, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.09538250416517258, |
|
"rewards/margins": 0.009500409476459026, |
|
"rewards/rejected": -0.10488291084766388, |
|
"sft_loss": 0.9538249969482422, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.6672593909757722, |
|
"grad_norm": 0.4116540849208832, |
|
"learning_rate": 1.4866882516191339e-07, |
|
"logits/chosen": -0.31974849104881287, |
|
"logits/rejected": -0.27599194645881653, |
|
"logps/chosen": -0.9288945198059082, |
|
"logps/rejected": -1.0830228328704834, |
|
"loss": 0.9998, |
|
"odds_ratio_loss": 0.7095054984092712, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0928894504904747, |
|
"rewards/margins": 0.015412822365760803, |
|
"rewards/rejected": -0.1083022803068161, |
|
"sft_loss": 0.9288945198059082, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.6672593909757722, |
|
"eval_logits/chosen": -0.3320940136909485, |
|
"eval_logits/rejected": -0.29884636402130127, |
|
"eval_logps/chosen": -0.9399133324623108, |
|
"eval_logps/rejected": -1.080655574798584, |
|
"eval_loss": 1.0073015689849854, |
|
"eval_odds_ratio_loss": 0.6738813519477844, |
|
"eval_rewards/accuracies": 0.515999972820282, |
|
"eval_rewards/chosen": -0.09399133920669556, |
|
"eval_rewards/margins": 0.01407422125339508, |
|
"eval_rewards/rejected": -0.10806556046009064, |
|
"eval_runtime": 185.9317, |
|
"eval_samples_per_second": 5.378, |
|
"eval_sft_loss": 0.9399133324623108, |
|
"eval_steps_per_second": 2.689, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.685041120248944, |
|
"grad_norm": 0.6644484996795654, |
|
"learning_rate": 1.3325243551706057e-07, |
|
"logits/chosen": -0.3859871029853821, |
|
"logits/rejected": -0.36218634247779846, |
|
"logps/chosen": -0.9241644144058228, |
|
"logps/rejected": -1.1543761491775513, |
|
"loss": 0.9915, |
|
"odds_ratio_loss": 0.6730437874794006, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0924164205789566, |
|
"rewards/margins": 0.023021187633275986, |
|
"rewards/rejected": -0.11543761193752289, |
|
"sft_loss": 0.9241644144058228, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.702822849522116, |
|
"grad_norm": 0.6883984208106995, |
|
"learning_rate": 1.1865786358165737e-07, |
|
"logits/chosen": -0.3818913400173187, |
|
"logits/rejected": -0.27337896823883057, |
|
"logps/chosen": -0.9033206701278687, |
|
"logps/rejected": -1.0108495950698853, |
|
"loss": 0.9727, |
|
"odds_ratio_loss": 0.6942235827445984, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.09033207595348358, |
|
"rewards/margins": 0.010752884671092033, |
|
"rewards/rejected": -0.10108494758605957, |
|
"sft_loss": 0.9033206701278687, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.720604578795288, |
|
"grad_norm": 1.4156850576400757, |
|
"learning_rate": 1.0489017710262311e-07, |
|
"logits/chosen": -0.39080482721328735, |
|
"logits/rejected": -0.3747466206550598, |
|
"logps/chosen": -1.0374637842178345, |
|
"logps/rejected": -1.1824612617492676, |
|
"loss": 1.1147, |
|
"odds_ratio_loss": 0.7718855142593384, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.10374637693166733, |
|
"rewards/margins": 0.014499744400382042, |
|
"rewards/rejected": -0.11824611574411392, |
|
"sft_loss": 1.0374637842178345, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.73838630806846, |
|
"grad_norm": 0.4921424984931946, |
|
"learning_rate": 9.195415670326446e-08, |
|
"logits/chosen": -0.326080858707428, |
|
"logits/rejected": -0.321908175945282, |
|
"logps/chosen": -0.9485294222831726, |
|
"logps/rejected": -1.082155704498291, |
|
"loss": 1.0195, |
|
"odds_ratio_loss": 0.7096532583236694, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.09485294669866562, |
|
"rewards/margins": 0.013362633064389229, |
|
"rewards/rejected": -0.1082155704498291, |
|
"sft_loss": 0.9485294222831726, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.7561680373416317, |
|
"grad_norm": 0.686665415763855, |
|
"learning_rate": 7.985429422327384e-08, |
|
"logits/chosen": -0.35336002707481384, |
|
"logits/rejected": -0.3244116008281708, |
|
"logps/chosen": -0.9436219930648804, |
|
"logps/rejected": -0.975549578666687, |
|
"loss": 1.0188, |
|
"odds_ratio_loss": 0.7518836855888367, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.09436219930648804, |
|
"rewards/margins": 0.0031927600502967834, |
|
"rewards/rejected": -0.09755495190620422, |
|
"sft_loss": 0.9436219930648804, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.773949766614803, |
|
"grad_norm": 0.30419808626174927, |
|
"learning_rate": 6.859479115900818e-08, |
|
"logits/chosen": -0.31769606471061707, |
|
"logits/rejected": -0.31846362352371216, |
|
"logps/chosen": -0.9142364263534546, |
|
"logps/rejected": -1.0324945449829102, |
|
"loss": 0.9834, |
|
"odds_ratio_loss": 0.6916245222091675, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0914236530661583, |
|
"rewards/margins": 0.011825799010694027, |
|
"rewards/rejected": -0.1032494530081749, |
|
"sft_loss": 0.9142364263534546, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.791731495887975, |
|
"grad_norm": 1.5349509716033936, |
|
"learning_rate": 5.817955720457902e-08, |
|
"logits/chosen": -0.33953648805618286, |
|
"logits/rejected": -0.297925740480423, |
|
"logps/chosen": -0.9395607709884644, |
|
"logps/rejected": -1.0038203001022339, |
|
"loss": 1.0133, |
|
"odds_ratio_loss": 0.7371524572372437, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09395607560873032, |
|
"rewards/margins": 0.006425946019589901, |
|
"rewards/rejected": -0.10038203001022339, |
|
"sft_loss": 0.9395607709884644, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.809513225161147, |
|
"grad_norm": 0.36313971877098083, |
|
"learning_rate": 4.861220889427199e-08, |
|
"logits/chosen": -0.35685330629348755, |
|
"logits/rejected": -0.35064131021499634, |
|
"logps/chosen": -0.9390374422073364, |
|
"logps/rejected": -1.019951581954956, |
|
"loss": 1.012, |
|
"odds_ratio_loss": 0.7297292351722717, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09390375763177872, |
|
"rewards/margins": 0.008091414347290993, |
|
"rewards/rejected": -0.10199517011642456, |
|
"sft_loss": 0.9390374422073364, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.827294954434319, |
|
"grad_norm": 0.26599186658859253, |
|
"learning_rate": 3.9896068346758074e-08, |
|
"logits/chosen": -0.39413073658943176, |
|
"logits/rejected": -0.38061630725860596, |
|
"logps/chosen": -0.948017954826355, |
|
"logps/rejected": -1.034618616104126, |
|
"loss": 1.0172, |
|
"odds_ratio_loss": 0.6922141313552856, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.09480179846286774, |
|
"rewards/margins": 0.008660053834319115, |
|
"rewards/rejected": -0.1034618467092514, |
|
"sft_loss": 0.948017954826355, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.8450766837074903, |
|
"grad_norm": 0.9985164403915405, |
|
"learning_rate": 3.203416211153832e-08, |
|
"logits/chosen": -0.3526967763900757, |
|
"logits/rejected": -0.25582748651504517, |
|
"logps/chosen": -0.9348894357681274, |
|
"logps/rejected": -1.0583240985870361, |
|
"loss": 1.0071, |
|
"odds_ratio_loss": 0.7220235466957092, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.09348894655704498, |
|
"rewards/margins": 0.01234346441924572, |
|
"rewards/rejected": -0.10583242028951645, |
|
"sft_loss": 0.9348894357681274, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.8628584129806622, |
|
"grad_norm": 0.4895220994949341, |
|
"learning_rate": 2.5029220118019393e-08, |
|
"logits/chosen": -0.3774477243423462, |
|
"logits/rejected": -0.34018778800964355, |
|
"logps/chosen": -0.9445845484733582, |
|
"logps/rejected": -0.9962360262870789, |
|
"loss": 1.0176, |
|
"odds_ratio_loss": 0.7305063009262085, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.0944584533572197, |
|
"rewards/margins": 0.0051651508547365665, |
|
"rewards/rejected": -0.09962360560894012, |
|
"sft_loss": 0.9445845484733582, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.880640142253834, |
|
"grad_norm": 0.39454635977745056, |
|
"learning_rate": 1.8883674727586122e-08, |
|
"logits/chosen": -0.3457157611846924, |
|
"logits/rejected": -0.33168259263038635, |
|
"logps/chosen": -0.8693550825119019, |
|
"logps/rejected": -1.09225332736969, |
|
"loss": 0.9328, |
|
"odds_ratio_loss": 0.6342187523841858, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.08693551272153854, |
|
"rewards/margins": 0.022289803251624107, |
|
"rewards/rejected": -0.1092253178358078, |
|
"sft_loss": 0.8693550825119019, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.898421871527006, |
|
"grad_norm": 0.29763612151145935, |
|
"learning_rate": 1.3599659889000639e-08, |
|
"logits/chosen": -0.26188623905181885, |
|
"logits/rejected": -0.27545788884162903, |
|
"logps/chosen": -0.9086050987243652, |
|
"logps/rejected": -0.9591732025146484, |
|
"loss": 0.9816, |
|
"odds_ratio_loss": 0.7299038171768188, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.0908605083823204, |
|
"rewards/margins": 0.005056814290583134, |
|
"rewards/rejected": -0.09591732919216156, |
|
"sft_loss": 0.9086050987243652, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.916203600800178, |
|
"grad_norm": 3.087757110595703, |
|
"learning_rate": 9.179010397421528e-09, |
|
"logits/chosen": -0.29684725403785706, |
|
"logits/rejected": -0.26544058322906494, |
|
"logps/chosen": -1.0444749593734741, |
|
"logps/rejected": -1.1464588642120361, |
|
"loss": 1.1156, |
|
"odds_ratio_loss": 0.7117230892181396, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.10444750636816025, |
|
"rewards/margins": 0.010198366828262806, |
|
"rewards/rejected": -0.11464587599039078, |
|
"sft_loss": 1.0444749593734741, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.93398533007335, |
|
"grad_norm": 0.7389609813690186, |
|
"learning_rate": 5.623261257296509e-09, |
|
"logits/chosen": -0.33190470933914185, |
|
"logits/rejected": -0.2921023964881897, |
|
"logps/chosen": -0.8605577349662781, |
|
"logps/rejected": -0.9687950015068054, |
|
"loss": 0.9291, |
|
"odds_ratio_loss": 0.6854843497276306, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.08605578541755676, |
|
"rewards/margins": 0.010823719203472137, |
|
"rewards/rejected": -0.0968794971704483, |
|
"sft_loss": 0.8605577349662781, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.9517670593465217, |
|
"grad_norm": 0.49204200506210327, |
|
"learning_rate": 2.933647149357122e-09, |
|
"logits/chosen": -0.3684224784374237, |
|
"logits/rejected": -0.3360394537448883, |
|
"logps/chosen": -0.9260095357894897, |
|
"logps/rejected": -1.059597373008728, |
|
"loss": 0.9945, |
|
"odds_ratio_loss": 0.6844674348831177, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09260095655918121, |
|
"rewards/margins": 0.013358776457607746, |
|
"rewards/rejected": -0.10595973581075668, |
|
"sft_loss": 0.9260095357894897, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.969548788619693, |
|
"grad_norm": 0.4070994257926941, |
|
"learning_rate": 1.1111020018930717e-09, |
|
"logits/chosen": -0.2591468393802643, |
|
"logits/rejected": -0.31176748871803284, |
|
"logps/chosen": -0.9283815622329712, |
|
"logps/rejected": -0.9903603792190552, |
|
"loss": 1.0009, |
|
"odds_ratio_loss": 0.7251425981521606, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.09283814579248428, |
|
"rewards/margins": 0.006197893992066383, |
|
"rewards/rejected": -0.09903603792190552, |
|
"sft_loss": 0.9283815622329712, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.987330517892865, |
|
"grad_norm": 0.31971636414527893, |
|
"learning_rate": 1.5625866646051813e-10, |
|
"logits/chosen": -0.3598848283290863, |
|
"logits/rejected": -0.3403863310813904, |
|
"logps/chosen": -0.9049466252326965, |
|
"logps/rejected": -1.057483434677124, |
|
"loss": 0.9695, |
|
"odds_ratio_loss": 0.6452642679214478, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.09049466997385025, |
|
"rewards/margins": 0.015253685414791107, |
|
"rewards/rejected": -0.10574835538864136, |
|
"sft_loss": 0.9049466252326965, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.997999555456768, |
|
"step": 1686, |
|
"total_flos": 1.8817568285770383e+18, |
|
"train_loss": 1.0353579054523618, |
|
"train_runtime": 16950.0138, |
|
"train_samples_per_second": 1.593, |
|
"train_steps_per_second": 0.099 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1686, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1.8817568285770383e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|