|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 168, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 17.800336779303485, |
|
"learning_rate": 2.941176470588235e-08, |
|
"logits/chosen": -3.0399391651153564, |
|
"logits/rejected": -2.5624823570251465, |
|
"logps/chosen": -891.05517578125, |
|
"logps/rejected": -1084.880126953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 18.838509896307443, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": -2.7760655879974365, |
|
"logits/rejected": -2.790731906890869, |
|
"logps/chosen": -503.9539489746094, |
|
"logps/rejected": -1057.63232421875, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5763888955116272, |
|
"rewards/chosen": 0.0020897421054542065, |
|
"rewards/margins": 0.001932685961946845, |
|
"rewards/rejected": 0.0001570563472341746, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 20.774413071968993, |
|
"learning_rate": 4.995131923687487e-07, |
|
"logits/chosen": -2.7422165870666504, |
|
"logits/rejected": -2.696645498275757, |
|
"logps/chosen": -548.654296875, |
|
"logps/rejected": -999.8585815429688, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.03212609142065048, |
|
"rewards/margins": 0.042353663593530655, |
|
"rewards/rejected": -0.010227566584944725, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 24.957524647561584, |
|
"learning_rate": 4.909114739839079e-07, |
|
"logits/chosen": -2.9761385917663574, |
|
"logits/rejected": -2.836198329925537, |
|
"logps/chosen": -549.5018310546875, |
|
"logps/rejected": -1060.82763671875, |
|
"loss": 0.6134, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.06840862333774567, |
|
"rewards/margins": 0.18814805150032043, |
|
"rewards/rejected": -0.11973947286605835, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 29.2493335087329, |
|
"learning_rate": 4.719192614212969e-07, |
|
"logits/chosen": -3.023461103439331, |
|
"logits/rejected": -2.9939627647399902, |
|
"logps/chosen": -569.5960083007812, |
|
"logps/rejected": -1105.576416015625, |
|
"loss": 0.4716, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.18537160754203796, |
|
"rewards/margins": 0.7153643369674683, |
|
"rewards/rejected": -0.9007358551025391, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 34.52922011163671, |
|
"learning_rate": 4.4335568741374695e-07, |
|
"logits/chosen": -3.0847573280334473, |
|
"logits/rejected": -3.1392886638641357, |
|
"logps/chosen": -724.968505859375, |
|
"logps/rejected": -1334.053955078125, |
|
"loss": 0.3356, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.0850236415863037, |
|
"rewards/margins": 1.9837610721588135, |
|
"rewards/rejected": -3.068784713745117, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 74.1970434298791, |
|
"learning_rate": 4.0645269681018434e-07, |
|
"logits/chosen": -3.01001238822937, |
|
"logits/rejected": -3.0855369567871094, |
|
"logps/chosen": -763.7335815429688, |
|
"logps/rejected": -1466.1148681640625, |
|
"loss": 0.2659, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -1.215951681137085, |
|
"rewards/margins": 3.7804553508758545, |
|
"rewards/rejected": -4.996407508850098, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 24.166083003223434, |
|
"learning_rate": 3.6280191288478435e-07, |
|
"logits/chosen": -2.9843807220458984, |
|
"logits/rejected": -3.0654196739196777, |
|
"logps/chosen": -652.9537353515625, |
|
"logps/rejected": -1663.2164306640625, |
|
"loss": 0.2156, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.9659550786018372, |
|
"rewards/margins": 4.433414459228516, |
|
"rewards/rejected": -5.399369716644287, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 17.793411473003722, |
|
"learning_rate": 3.142859907420615e-07, |
|
"logits/chosen": -3.0192360877990723, |
|
"logits/rejected": -3.050020217895508, |
|
"logps/chosen": -661.2525634765625, |
|
"logps/rejected": -1447.4609375, |
|
"loss": 0.1989, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.9658153653144836, |
|
"rewards/margins": 3.9476540088653564, |
|
"rewards/rejected": -4.913470268249512, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 55.47746105929639, |
|
"learning_rate": 2.629974185404951e-07, |
|
"logits/chosen": -2.978877305984497, |
|
"logits/rejected": -3.029723644256592, |
|
"logps/chosen": -613.0646362304688, |
|
"logps/rejected": -1531.10546875, |
|
"loss": 0.1844, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -1.270330786705017, |
|
"rewards/margins": 4.801483154296875, |
|
"rewards/rejected": -6.071813583374023, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 33.71679055058231, |
|
"learning_rate": 2.1114826863194878e-07, |
|
"logits/chosen": -2.983506441116333, |
|
"logits/rejected": -3.02791690826416, |
|
"logps/chosen": -676.8287353515625, |
|
"logps/rejected": -1784.007568359375, |
|
"loss": 0.1684, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -1.3490874767303467, |
|
"rewards/margins": 5.845183849334717, |
|
"rewards/rejected": -7.194271087646484, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_logits/chosen": -3.1281025409698486, |
|
"eval_logits/rejected": -2.964585304260254, |
|
"eval_logps/chosen": -755.873291015625, |
|
"eval_logps/rejected": -1595.0167236328125, |
|
"eval_loss": 0.3332486152648926, |
|
"eval_rewards/accuracies": 0.8686440587043762, |
|
"eval_rewards/chosen": -1.8907400369644165, |
|
"eval_rewards/margins": 3.9363222122192383, |
|
"eval_rewards/rejected": -5.827062606811523, |
|
"eval_runtime": 195.0133, |
|
"eval_samples_per_second": 9.579, |
|
"eval_steps_per_second": 0.303, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 36.49070034902309, |
|
"learning_rate": 1.6097479104361326e-07, |
|
"logits/chosen": -2.9343550205230713, |
|
"logits/rejected": -3.0102057456970215, |
|
"logps/chosen": -727.3587646484375, |
|
"logps/rejected": -1638.8382568359375, |
|
"loss": 0.1954, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.3601789474487305, |
|
"rewards/margins": 5.164590835571289, |
|
"rewards/rejected": -6.524770259857178, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 23.81218846817336, |
|
"learning_rate": 1.146409641785882e-07, |
|
"logits/chosen": -2.8368687629699707, |
|
"logits/rejected": -3.0225136280059814, |
|
"logps/chosen": -666.7093505859375, |
|
"logps/rejected": -1738.1021728515625, |
|
"loss": 0.1519, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -1.2082051038742065, |
|
"rewards/margins": 5.624727249145508, |
|
"rewards/rejected": -6.832932472229004, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 20.21152127941066, |
|
"learning_rate": 7.414516258630244e-08, |
|
"logits/chosen": -2.9129068851470947, |
|
"logits/rejected": -3.0287842750549316, |
|
"logps/chosen": -708.4816284179688, |
|
"logps/rejected": -1789.521728515625, |
|
"loss": 0.1424, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.3833436965942383, |
|
"rewards/margins": 5.361766338348389, |
|
"rewards/rejected": -6.745110511779785, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 19.81204521774453, |
|
"learning_rate": 4.1233967214979764e-08, |
|
"logits/chosen": -2.979017734527588, |
|
"logits/rejected": -3.0025482177734375, |
|
"logps/chosen": -664.9674072265625, |
|
"logps/rejected": -1679.622802734375, |
|
"loss": 0.1512, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -1.3218904733657837, |
|
"rewards/margins": 5.435315132141113, |
|
"rewards/rejected": -6.757205963134766, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 33.74454128402628, |
|
"learning_rate": 1.732683550362954e-08, |
|
"logits/chosen": -2.971137523651123, |
|
"logits/rejected": -3.0957703590393066, |
|
"logps/chosen": -662.1959228515625, |
|
"logps/rejected": -1967.736083984375, |
|
"loss": 0.1032, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.2782970666885376, |
|
"rewards/margins": 7.209610939025879, |
|
"rewards/rejected": -8.487907409667969, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 35.57488951709773, |
|
"learning_rate": 3.4548802869627804e-09, |
|
"logits/chosen": -2.9537863731384277, |
|
"logits/rejected": -3.0035552978515625, |
|
"logps/chosen": -699.1692504882812, |
|
"logps/rejected": -1824.681884765625, |
|
"loss": 0.1233, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -1.6038345098495483, |
|
"rewards/margins": 6.514244079589844, |
|
"rewards/rejected": -8.118078231811523, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 168, |
|
"total_flos": 0.0, |
|
"train_loss": 0.05682134947606495, |
|
"train_runtime": 921.1998, |
|
"train_samples_per_second": 11.663, |
|
"train_steps_per_second": 0.182 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 168, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|