|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 55, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"debug/policy_chosen_logits": 1.3768084049224854, |
|
"debug/policy_chosen_logps": -161.25889587402344, |
|
"debug/policy_rejected_logits": 1.3094204664230347, |
|
"debug/policy_rejected_logps": -151.26834106445312, |
|
"debug/reference_chosen_logps": -161.25889587402344, |
|
"debug/reference_rejected_logps": -151.26834106445312, |
|
"epoch": 0.01818181818181818, |
|
"grad_norm": 2.5769607314165515, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 1.3768084049224854, |
|
"logits/rejected": 1.3094204664230347, |
|
"logps/chosen": -161.25889587402344, |
|
"logps/rejected": -151.26834106445312, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.889292299747467, |
|
"debug/policy_chosen_logps": -130.50076293945312, |
|
"debug/policy_rejected_logits": 1.1603094339370728, |
|
"debug/policy_rejected_logps": -134.81027221679688, |
|
"debug/reference_chosen_logps": -130.64517211914062, |
|
"debug/reference_rejected_logps": -135.13914489746094, |
|
"epoch": 0.03636363636363636, |
|
"grad_norm": 2.472634188308473, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.889292299747467, |
|
"logits/rejected": 1.1603094339370728, |
|
"logps/chosen": -130.50076293945312, |
|
"logps/rejected": -134.81027221679688, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0014441299717873335, |
|
"rewards/margins": -0.0018445965833961964, |
|
"rewards/rejected": 0.0032887267880141735, |
|
"step": 2 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.6971374154090881, |
|
"debug/policy_chosen_logps": -137.5977783203125, |
|
"debug/policy_rejected_logits": 1.2003397941589355, |
|
"debug/policy_rejected_logps": -122.20698547363281, |
|
"debug/reference_chosen_logps": -138.00997924804688, |
|
"debug/reference_rejected_logps": -122.32334899902344, |
|
"epoch": 0.05454545454545454, |
|
"grad_norm": 2.960555045135284, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.6971374154090881, |
|
"logits/rejected": 1.2003397941589355, |
|
"logps/chosen": -137.5977783203125, |
|
"logps/rejected": -122.20698547363281, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.004121971316635609, |
|
"rewards/margins": 0.0029583165887743235, |
|
"rewards/rejected": 0.001163654262199998, |
|
"step": 3 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.4667843282222748, |
|
"debug/policy_chosen_logps": -124.55257415771484, |
|
"debug/policy_rejected_logits": 1.1448464393615723, |
|
"debug/policy_rejected_logps": -149.21067810058594, |
|
"debug/reference_chosen_logps": -125.31111907958984, |
|
"debug/reference_rejected_logps": -148.5081787109375, |
|
"epoch": 0.07272727272727272, |
|
"grad_norm": 2.6975903263061793, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.4667843282222748, |
|
"logits/rejected": 1.1448464393615723, |
|
"logps/chosen": -124.55257415771484, |
|
"logps/rejected": -149.21067810058594, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.007585420273244381, |
|
"rewards/margins": 0.014610327780246735, |
|
"rewards/rejected": -0.007024907972663641, |
|
"step": 4 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.7746908664703369, |
|
"debug/policy_chosen_logps": -121.77815246582031, |
|
"debug/policy_rejected_logits": 1.0104609727859497, |
|
"debug/policy_rejected_logps": -127.0907211303711, |
|
"debug/reference_chosen_logps": -122.27123260498047, |
|
"debug/reference_rejected_logps": -126.88908386230469, |
|
"epoch": 0.09090909090909091, |
|
"grad_norm": 2.3195072990326535, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.7746908664703369, |
|
"logits/rejected": 1.0104609727859497, |
|
"logps/chosen": -121.77815246582031, |
|
"logps/rejected": -127.0907211303711, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.004930791910737753, |
|
"rewards/margins": 0.006947212386876345, |
|
"rewards/rejected": -0.0020164204761385918, |
|
"step": 5 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.4436347484588623, |
|
"debug/policy_chosen_logps": -126.17998504638672, |
|
"debug/policy_rejected_logits": 0.6884921789169312, |
|
"debug/policy_rejected_logps": -126.66116333007812, |
|
"debug/reference_chosen_logps": -126.64784240722656, |
|
"debug/reference_rejected_logps": -126.05181884765625, |
|
"epoch": 0.10909090909090909, |
|
"grad_norm": 2.7552991032792282, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.4436347484588623, |
|
"logits/rejected": 0.6884921789169312, |
|
"logps/chosen": -126.17998504638672, |
|
"logps/rejected": -126.66116333007812, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.004678516648709774, |
|
"rewards/margins": 0.01077202707529068, |
|
"rewards/rejected": -0.006093511823564768, |
|
"step": 6 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.3949795365333557, |
|
"debug/policy_chosen_logps": -120.72560119628906, |
|
"debug/policy_rejected_logits": 1.16127610206604, |
|
"debug/policy_rejected_logps": -166.54122924804688, |
|
"debug/reference_chosen_logps": -122.05653381347656, |
|
"debug/reference_rejected_logps": -166.2891387939453, |
|
"epoch": 0.12727272727272726, |
|
"grad_norm": 2.618269906477997, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.3949795365333557, |
|
"logits/rejected": 1.16127610206604, |
|
"logps/chosen": -120.72560119628906, |
|
"logps/rejected": -166.54122924804688, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.013309288769960403, |
|
"rewards/margins": 0.015830213204026222, |
|
"rewards/rejected": -0.002520923502743244, |
|
"step": 7 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.7735803127288818, |
|
"debug/policy_chosen_logps": -135.4464569091797, |
|
"debug/policy_rejected_logits": 0.9391681551933289, |
|
"debug/policy_rejected_logps": -139.32675170898438, |
|
"debug/reference_chosen_logps": -136.41879272460938, |
|
"debug/reference_rejected_logps": -139.55023193359375, |
|
"epoch": 0.14545454545454545, |
|
"grad_norm": 2.5281133640812117, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.7735803127288818, |
|
"logits/rejected": 0.9391681551933289, |
|
"logps/chosen": -135.4464569091797, |
|
"logps/rejected": -139.32675170898438, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.00972352921962738, |
|
"rewards/margins": 0.007488718256354332, |
|
"rewards/rejected": 0.002234811894595623, |
|
"step": 8 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.5073169469833374, |
|
"debug/policy_chosen_logps": -135.45155334472656, |
|
"debug/policy_rejected_logits": 1.3236225843429565, |
|
"debug/policy_rejected_logps": -130.271728515625, |
|
"debug/reference_chosen_logps": -135.94186401367188, |
|
"debug/reference_rejected_logps": -129.65411376953125, |
|
"epoch": 0.16363636363636364, |
|
"grad_norm": 2.5354121562315766, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.5073169469833374, |
|
"logits/rejected": 1.3236225843429565, |
|
"logps/chosen": -135.45155334472656, |
|
"logps/rejected": -130.271728515625, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.004903106484562159, |
|
"rewards/margins": 0.011079207062721252, |
|
"rewards/rejected": -0.006176099181175232, |
|
"step": 9 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.9053974151611328, |
|
"debug/policy_chosen_logps": -129.9651641845703, |
|
"debug/policy_rejected_logits": 1.6513068675994873, |
|
"debug/policy_rejected_logps": -157.7940673828125, |
|
"debug/reference_chosen_logps": -130.6460418701172, |
|
"debug/reference_rejected_logps": -158.63949584960938, |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 2.810542042119807, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.9053974151611328, |
|
"logits/rejected": 1.6513068675994873, |
|
"logps/chosen": -129.9651641845703, |
|
"logps/rejected": -157.7940673828125, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.006808795966207981, |
|
"rewards/margins": -0.0016453838907182217, |
|
"rewards/rejected": 0.008454179391264915, |
|
"step": 10 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.792403519153595, |
|
"debug/policy_chosen_logps": -141.16513061523438, |
|
"debug/policy_rejected_logits": 1.2816684246063232, |
|
"debug/policy_rejected_logps": -152.61949157714844, |
|
"debug/reference_chosen_logps": -141.37376403808594, |
|
"debug/reference_rejected_logps": -153.21987915039062, |
|
"epoch": 0.2, |
|
"grad_norm": 2.488423861858769, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.792403519153595, |
|
"logits/rejected": 1.2816684246063232, |
|
"logps/chosen": -141.16513061523438, |
|
"logps/rejected": -152.61949157714844, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0020863343961536884, |
|
"rewards/margins": -0.00391742680221796, |
|
"rewards/rejected": 0.0060037607327103615, |
|
"step": 11 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.0979342460632324, |
|
"debug/policy_chosen_logps": -127.43941497802734, |
|
"debug/policy_rejected_logits": 0.8406213521957397, |
|
"debug/policy_rejected_logps": -133.73110961914062, |
|
"debug/reference_chosen_logps": -128.33518981933594, |
|
"debug/reference_rejected_logps": -133.88735961914062, |
|
"epoch": 0.21818181818181817, |
|
"grad_norm": 2.5700027163746975, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 1.0979342460632324, |
|
"logits/rejected": 0.8406213521957397, |
|
"logps/chosen": -127.43941497802734, |
|
"logps/rejected": -133.73110961914062, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.008957691490650177, |
|
"rewards/margins": 0.007395162247121334, |
|
"rewards/rejected": 0.0015625286614522338, |
|
"step": 12 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.8384308218955994, |
|
"debug/policy_chosen_logps": -129.3570556640625, |
|
"debug/policy_rejected_logits": 1.2891650199890137, |
|
"debug/policy_rejected_logps": -140.60781860351562, |
|
"debug/reference_chosen_logps": -130.777099609375, |
|
"debug/reference_rejected_logps": -137.655029296875, |
|
"epoch": 0.23636363636363636, |
|
"grad_norm": 2.5021787713492976, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.8384308218955994, |
|
"logits/rejected": 1.2891650199890137, |
|
"logps/chosen": -129.3570556640625, |
|
"logps/rejected": -140.60781860351562, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.014200429432094097, |
|
"rewards/margins": 0.043728265911340714, |
|
"rewards/rejected": -0.029527835547924042, |
|
"step": 13 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.7953339219093323, |
|
"debug/policy_chosen_logps": -123.55655670166016, |
|
"debug/policy_rejected_logits": 1.151228427886963, |
|
"debug/policy_rejected_logps": -121.413330078125, |
|
"debug/reference_chosen_logps": -122.65122985839844, |
|
"debug/reference_rejected_logps": -119.04695892333984, |
|
"epoch": 0.2545454545454545, |
|
"grad_norm": 2.645883535930893, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.7953339219093323, |
|
"logits/rejected": 1.151228427886963, |
|
"logps/chosen": -123.55655670166016, |
|
"logps/rejected": -121.413330078125, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.00905330665409565, |
|
"rewards/margins": 0.014610443264245987, |
|
"rewards/rejected": -0.023663748055696487, |
|
"step": 14 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.7620865106582642, |
|
"debug/policy_chosen_logps": -142.4312286376953, |
|
"debug/policy_rejected_logits": 0.9396656155586243, |
|
"debug/policy_rejected_logps": -148.87515258789062, |
|
"debug/reference_chosen_logps": -139.45565795898438, |
|
"debug/reference_rejected_logps": -145.11453247070312, |
|
"epoch": 0.2727272727272727, |
|
"grad_norm": 2.9294875671025875, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.7620865106582642, |
|
"logits/rejected": 0.9396656155586243, |
|
"logps/chosen": -142.4312286376953, |
|
"logps/rejected": -148.87515258789062, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.02975570783019066, |
|
"rewards/margins": 0.007850446738302708, |
|
"rewards/rejected": -0.03760614991188049, |
|
"step": 15 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.0136598348617554, |
|
"debug/policy_chosen_logps": -130.98876953125, |
|
"debug/policy_rejected_logits": 0.7458176612854004, |
|
"debug/policy_rejected_logps": -134.5099639892578, |
|
"debug/reference_chosen_logps": -131.6763153076172, |
|
"debug/reference_rejected_logps": -133.0446014404297, |
|
"epoch": 0.2909090909090909, |
|
"grad_norm": 2.2028450675710602, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 1.0136598348617554, |
|
"logits/rejected": 0.7458176612854004, |
|
"logps/chosen": -130.98876953125, |
|
"logps/rejected": -134.5099639892578, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.006875476334244013, |
|
"rewards/margins": 0.021529102697968483, |
|
"rewards/rejected": -0.014653624035418034, |
|
"step": 16 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.8773481249809265, |
|
"debug/policy_chosen_logps": -134.43736267089844, |
|
"debug/policy_rejected_logits": 0.8306017518043518, |
|
"debug/policy_rejected_logps": -131.41659545898438, |
|
"debug/reference_chosen_logps": -134.44261169433594, |
|
"debug/reference_rejected_logps": -130.02728271484375, |
|
"epoch": 0.3090909090909091, |
|
"grad_norm": 2.8510973680896896, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.8773481249809265, |
|
"logits/rejected": 0.8306017518043518, |
|
"logps/chosen": -134.43736267089844, |
|
"logps/rejected": -131.41659545898438, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 5.249073728919029e-05, |
|
"rewards/margins": 0.01394571177661419, |
|
"rewards/rejected": -0.013893223367631435, |
|
"step": 17 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.32443925738334656, |
|
"debug/policy_chosen_logps": -117.34490966796875, |
|
"debug/policy_rejected_logits": 0.6942044496536255, |
|
"debug/policy_rejected_logps": -133.7316436767578, |
|
"debug/reference_chosen_logps": -118.01707458496094, |
|
"debug/reference_rejected_logps": -132.0999755859375, |
|
"epoch": 0.32727272727272727, |
|
"grad_norm": 2.9624915134206953, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.32443925738334656, |
|
"logits/rejected": 0.6942044496536255, |
|
"logps/chosen": -117.34490966796875, |
|
"logps/rejected": -133.7316436767578, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.006721640005707741, |
|
"rewards/margins": 0.023038387298583984, |
|
"rewards/rejected": -0.016316747292876244, |
|
"step": 18 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.6036683320999146, |
|
"debug/policy_chosen_logps": -139.1854248046875, |
|
"debug/policy_rejected_logits": 1.0925521850585938, |
|
"debug/policy_rejected_logps": -129.66262817382812, |
|
"debug/reference_chosen_logps": -139.90301513671875, |
|
"debug/reference_rejected_logps": -128.35787963867188, |
|
"epoch": 0.34545454545454546, |
|
"grad_norm": 3.0283747066503546, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.6036683320999146, |
|
"logits/rejected": 1.0925521850585938, |
|
"logps/chosen": -139.1854248046875, |
|
"logps/rejected": -129.66262817382812, |
|
"loss": 0.6771, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.007175750564783812, |
|
"rewards/margins": 0.02022336982190609, |
|
"rewards/rejected": -0.013047618791460991, |
|
"step": 19 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.19202691316604614, |
|
"debug/policy_chosen_logps": -125.41830444335938, |
|
"debug/policy_rejected_logits": 0.6729949116706848, |
|
"debug/policy_rejected_logps": -136.26675415039062, |
|
"debug/reference_chosen_logps": -129.2891845703125, |
|
"debug/reference_rejected_logps": -131.7703857421875, |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 2.5690310299484906, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.19202691316604614, |
|
"logits/rejected": 0.6729949116706848, |
|
"logps/chosen": -125.41830444335938, |
|
"logps/rejected": -136.26675415039062, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.03870879113674164, |
|
"rewards/margins": 0.08367244899272919, |
|
"rewards/rejected": -0.04496365413069725, |
|
"step": 20 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.7662903070449829, |
|
"debug/policy_chosen_logps": -124.8802261352539, |
|
"debug/policy_rejected_logits": 1.350595474243164, |
|
"debug/policy_rejected_logps": -131.6766357421875, |
|
"debug/reference_chosen_logps": -125.80413818359375, |
|
"debug/reference_rejected_logps": -129.85723876953125, |
|
"epoch": 0.38181818181818183, |
|
"grad_norm": 2.9390962336045523, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.7662903070449829, |
|
"logits/rejected": 1.350595474243164, |
|
"logps/chosen": -124.8802261352539, |
|
"logps/rejected": -131.6766357421875, |
|
"loss": 0.6739, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.009239129722118378, |
|
"rewards/margins": 0.027433060109615326, |
|
"rewards/rejected": -0.018193930387496948, |
|
"step": 21 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.21824312210083, |
|
"debug/policy_chosen_logps": -118.64605712890625, |
|
"debug/policy_rejected_logits": 1.3743467330932617, |
|
"debug/policy_rejected_logps": -130.24822998046875, |
|
"debug/reference_chosen_logps": -119.62400817871094, |
|
"debug/reference_rejected_logps": -129.10247802734375, |
|
"epoch": 0.4, |
|
"grad_norm": 2.4995079289310267, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 1.21824312210083, |
|
"logits/rejected": 1.3743467330932617, |
|
"logps/chosen": -118.64605712890625, |
|
"logps/rejected": -130.24822998046875, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.009779386222362518, |
|
"rewards/margins": 0.021236935630440712, |
|
"rewards/rejected": -0.011457548476755619, |
|
"step": 22 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.32036519050598145, |
|
"debug/policy_chosen_logps": -137.20065307617188, |
|
"debug/policy_rejected_logits": 0.4707115590572357, |
|
"debug/policy_rejected_logps": -141.1231231689453, |
|
"debug/reference_chosen_logps": -137.7662353515625, |
|
"debug/reference_rejected_logps": -140.11264038085938, |
|
"epoch": 0.41818181818181815, |
|
"grad_norm": 2.726999800833551, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.32036519050598145, |
|
"logits/rejected": 0.4707115590572357, |
|
"logps/chosen": -137.20065307617188, |
|
"logps/rejected": -141.1231231689453, |
|
"loss": 0.6804, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.00565571803599596, |
|
"rewards/margins": 0.01576041243970394, |
|
"rewards/rejected": -0.010104694403707981, |
|
"step": 23 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.174080491065979, |
|
"debug/policy_chosen_logps": -130.7235870361328, |
|
"debug/policy_rejected_logits": 1.6448463201522827, |
|
"debug/policy_rejected_logps": -149.97439575195312, |
|
"debug/reference_chosen_logps": -131.06661987304688, |
|
"debug/reference_rejected_logps": -147.88232421875, |
|
"epoch": 0.43636363636363634, |
|
"grad_norm": 2.652353634767768, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 1.174080491065979, |
|
"logits/rejected": 1.6448463201522827, |
|
"logps/chosen": -130.7235870361328, |
|
"logps/rejected": -149.97439575195312, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0034304135479032993, |
|
"rewards/margins": 0.02435128018260002, |
|
"rewards/rejected": -0.02092086710035801, |
|
"step": 24 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.5194191336631775, |
|
"debug/policy_chosen_logps": -125.35334014892578, |
|
"debug/policy_rejected_logits": 0.8994636535644531, |
|
"debug/policy_rejected_logps": -129.91224670410156, |
|
"debug/reference_chosen_logps": -127.37261962890625, |
|
"debug/reference_rejected_logps": -127.31116485595703, |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 2.970456164285477, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.5194191336631775, |
|
"logits/rejected": 0.8994636535644531, |
|
"logps/chosen": -125.35334014892578, |
|
"logps/rejected": -129.91224670410156, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.02019280195236206, |
|
"rewards/margins": 0.04620352387428284, |
|
"rewards/rejected": -0.026010721921920776, |
|
"step": 25 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.6115962862968445, |
|
"debug/policy_chosen_logps": -150.66409301757812, |
|
"debug/policy_rejected_logits": 0.6578989028930664, |
|
"debug/policy_rejected_logps": -153.54840087890625, |
|
"debug/reference_chosen_logps": -151.63339233398438, |
|
"debug/reference_rejected_logps": -151.82666015625, |
|
"epoch": 0.4727272727272727, |
|
"grad_norm": 2.580941309384753, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.6115962862968445, |
|
"logits/rejected": 0.6578989028930664, |
|
"logps/chosen": -150.66409301757812, |
|
"logps/rejected": -153.54840087890625, |
|
"loss": 0.6722, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.009693136438727379, |
|
"rewards/margins": 0.026910629123449326, |
|
"rewards/rejected": -0.017217492684721947, |
|
"step": 26 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.41199997067451477, |
|
"debug/policy_chosen_logps": -148.89768981933594, |
|
"debug/policy_rejected_logits": 0.782092273235321, |
|
"debug/policy_rejected_logps": -147.73719787597656, |
|
"debug/reference_chosen_logps": -149.99037170410156, |
|
"debug/reference_rejected_logps": -146.23422241210938, |
|
"epoch": 0.4909090909090909, |
|
"grad_norm": 2.7525802515180575, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.41199997067451477, |
|
"logits/rejected": 0.782092273235321, |
|
"logps/chosen": -148.89768981933594, |
|
"logps/rejected": -147.73719787597656, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.010926798917353153, |
|
"rewards/margins": 0.02595655433833599, |
|
"rewards/rejected": -0.015029754489660263, |
|
"step": 27 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.34080997109413147, |
|
"debug/policy_chosen_logps": -126.34262084960938, |
|
"debug/policy_rejected_logits": 0.36349666118621826, |
|
"debug/policy_rejected_logps": -134.22015380859375, |
|
"debug/reference_chosen_logps": -125.65828704833984, |
|
"debug/reference_rejected_logps": -130.0347442626953, |
|
"epoch": 0.509090909090909, |
|
"grad_norm": 2.680704105439303, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.34080997109413147, |
|
"logits/rejected": 0.36349666118621826, |
|
"logps/chosen": -126.34262084960938, |
|
"logps/rejected": -134.22015380859375, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.006843318697065115, |
|
"rewards/margins": 0.03501085191965103, |
|
"rewards/rejected": -0.04185417294502258, |
|
"step": 28 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.7369721531867981, |
|
"debug/policy_chosen_logps": -137.26649475097656, |
|
"debug/policy_rejected_logits": 0.7872332334518433, |
|
"debug/policy_rejected_logps": -143.5257568359375, |
|
"debug/reference_chosen_logps": -137.09490966796875, |
|
"debug/reference_rejected_logps": -140.14822387695312, |
|
"epoch": 0.5272727272727272, |
|
"grad_norm": 2.8512448335063185, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.7369721531867981, |
|
"logits/rejected": 0.7872332334518433, |
|
"logps/chosen": -137.26649475097656, |
|
"logps/rejected": -143.5257568359375, |
|
"loss": 0.6764, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0017157741822302341, |
|
"rewards/margins": 0.03205941990017891, |
|
"rewards/rejected": -0.033775195479393005, |
|
"step": 29 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.7467297911643982, |
|
"debug/policy_chosen_logps": -142.2055206298828, |
|
"debug/policy_rejected_logits": 0.8480007648468018, |
|
"debug/policy_rejected_logps": -145.74615478515625, |
|
"debug/reference_chosen_logps": -141.9608154296875, |
|
"debug/reference_rejected_logps": -141.98846435546875, |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 3.2448790034806456, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.7467297911643982, |
|
"logits/rejected": 0.8480007648468018, |
|
"logps/chosen": -142.2055206298828, |
|
"logps/rejected": -145.74615478515625, |
|
"loss": 0.6655, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0024469951167702675, |
|
"rewards/margins": 0.03513010963797569, |
|
"rewards/rejected": -0.03757710009813309, |
|
"step": 30 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.45751431584358215, |
|
"debug/policy_chosen_logps": -119.06462097167969, |
|
"debug/policy_rejected_logits": 1.1633354425430298, |
|
"debug/policy_rejected_logps": -134.27395629882812, |
|
"debug/reference_chosen_logps": -118.83071899414062, |
|
"debug/reference_rejected_logps": -132.58956909179688, |
|
"epoch": 0.5636363636363636, |
|
"grad_norm": 2.604825578516903, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.45751431584358215, |
|
"logits/rejected": 1.1633354425430298, |
|
"logps/chosen": -119.06462097167969, |
|
"logps/rejected": -134.27395629882812, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.002338953083381057, |
|
"rewards/margins": 0.01450504269450903, |
|
"rewards/rejected": -0.016843995079398155, |
|
"step": 31 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.47755831480026245, |
|
"debug/policy_chosen_logps": -113.85858154296875, |
|
"debug/policy_rejected_logits": 0.9192254543304443, |
|
"debug/policy_rejected_logps": -142.87522888183594, |
|
"debug/reference_chosen_logps": -114.60741424560547, |
|
"debug/reference_rejected_logps": -141.06182861328125, |
|
"epoch": 0.5818181818181818, |
|
"grad_norm": 2.4614920126129927, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.47755831480026245, |
|
"logits/rejected": 0.9192254543304443, |
|
"logps/chosen": -113.85858154296875, |
|
"logps/rejected": -142.87522888183594, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.007488393224775791, |
|
"rewards/margins": 0.02562214806675911, |
|
"rewards/rejected": -0.018133753910660744, |
|
"step": 32 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.7646514773368835, |
|
"debug/policy_chosen_logps": -140.6458740234375, |
|
"debug/policy_rejected_logits": 1.0664713382720947, |
|
"debug/policy_rejected_logps": -154.0317840576172, |
|
"debug/reference_chosen_logps": -141.03515625, |
|
"debug/reference_rejected_logps": -152.28282165527344, |
|
"epoch": 0.6, |
|
"grad_norm": 2.892367660321636, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.7646514773368835, |
|
"logits/rejected": 1.0664713382720947, |
|
"logps/chosen": -140.6458740234375, |
|
"logps/rejected": -154.0317840576172, |
|
"loss": 0.6726, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0038927835412323475, |
|
"rewards/margins": 0.021382424980401993, |
|
"rewards/rejected": -0.017489641904830933, |
|
"step": 33 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.0055601596832275, |
|
"debug/policy_chosen_logps": -130.6605224609375, |
|
"debug/policy_rejected_logits": 0.745191216468811, |
|
"debug/policy_rejected_logps": -126.47428894042969, |
|
"debug/reference_chosen_logps": -129.04339599609375, |
|
"debug/reference_rejected_logps": -119.90361022949219, |
|
"epoch": 0.6181818181818182, |
|
"grad_norm": 2.9093181223713334, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 1.0055601596832275, |
|
"logits/rejected": 0.745191216468811, |
|
"logps/chosen": -130.6605224609375, |
|
"logps/rejected": -126.47428894042969, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.01617136038839817, |
|
"rewards/margins": 0.049535419791936874, |
|
"rewards/rejected": -0.0657067745923996, |
|
"step": 34 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.665949285030365, |
|
"debug/policy_chosen_logps": -167.20567321777344, |
|
"debug/policy_rejected_logits": 0.8378525972366333, |
|
"debug/policy_rejected_logps": -176.95294189453125, |
|
"debug/reference_chosen_logps": -166.0945281982422, |
|
"debug/reference_rejected_logps": -174.29322814941406, |
|
"epoch": 0.6363636363636364, |
|
"grad_norm": 3.164591716357413, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.665949285030365, |
|
"logits/rejected": 0.8378525972366333, |
|
"logps/chosen": -167.20567321777344, |
|
"logps/rejected": -176.95294189453125, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.011111450381577015, |
|
"rewards/margins": 0.015485725365579128, |
|
"rewards/rejected": -0.026597173884510994, |
|
"step": 35 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.9554933905601501, |
|
"debug/policy_chosen_logps": -136.39425659179688, |
|
"debug/policy_rejected_logits": 0.49876803159713745, |
|
"debug/policy_rejected_logps": -153.6390380859375, |
|
"debug/reference_chosen_logps": -136.20895385742188, |
|
"debug/reference_rejected_logps": -146.877197265625, |
|
"epoch": 0.6545454545454545, |
|
"grad_norm": 3.098324286117154, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.9554933905601501, |
|
"logits/rejected": 0.49876803159713745, |
|
"logps/chosen": -136.39425659179688, |
|
"logps/rejected": -153.6390380859375, |
|
"loss": 0.6738, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0018529421649873257, |
|
"rewards/margins": 0.06576552242040634, |
|
"rewards/rejected": -0.0676184669137001, |
|
"step": 36 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.2520717680454254, |
|
"debug/policy_chosen_logps": -121.43704223632812, |
|
"debug/policy_rejected_logits": 0.5053534507751465, |
|
"debug/policy_rejected_logps": -158.01513671875, |
|
"debug/reference_chosen_logps": -120.23490142822266, |
|
"debug/reference_rejected_logps": -155.57327270507812, |
|
"epoch": 0.6727272727272727, |
|
"grad_norm": 3.1244720060016986, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.2520717680454254, |
|
"logits/rejected": 0.5053534507751465, |
|
"logps/chosen": -121.43704223632812, |
|
"logps/rejected": -158.01513671875, |
|
"loss": 0.6738, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.012021454982459545, |
|
"rewards/margins": 0.012397251091897488, |
|
"rewards/rejected": -0.024418707937002182, |
|
"step": 37 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.35868680477142334, |
|
"debug/policy_chosen_logps": -139.0185546875, |
|
"debug/policy_rejected_logits": 0.39101022481918335, |
|
"debug/policy_rejected_logps": -153.45266723632812, |
|
"debug/reference_chosen_logps": -138.5296630859375, |
|
"debug/reference_rejected_logps": -147.90438842773438, |
|
"epoch": 0.6909090909090909, |
|
"grad_norm": 5.757112422371425, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.35868680477142334, |
|
"logits/rejected": 0.39101022481918335, |
|
"logps/chosen": -139.0185546875, |
|
"logps/rejected": -153.45266723632812, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0048888688907027245, |
|
"rewards/margins": 0.050593726336956024, |
|
"rewards/rejected": -0.05548259615898132, |
|
"step": 38 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.6587602496147156, |
|
"debug/policy_chosen_logps": -140.99093627929688, |
|
"debug/policy_rejected_logits": 0.5164349675178528, |
|
"debug/policy_rejected_logps": -140.2601318359375, |
|
"debug/reference_chosen_logps": -136.22140502929688, |
|
"debug/reference_rejected_logps": -129.1555633544922, |
|
"epoch": 0.7090909090909091, |
|
"grad_norm": 3.224110720714856, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.6587602496147156, |
|
"logits/rejected": 0.5164349675178528, |
|
"logps/chosen": -140.99093627929688, |
|
"logps/rejected": -140.2601318359375, |
|
"loss": 0.6744, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04769532382488251, |
|
"rewards/margins": 0.0633503645658493, |
|
"rewards/rejected": -0.11104568839073181, |
|
"step": 39 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.36872461438179016, |
|
"debug/policy_chosen_logps": -153.08731079101562, |
|
"debug/policy_rejected_logits": 0.7442142367362976, |
|
"debug/policy_rejected_logps": -148.73162841796875, |
|
"debug/reference_chosen_logps": -152.52410888671875, |
|
"debug/reference_rejected_logps": -143.73696899414062, |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 3.030981160478689, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.36872461438179016, |
|
"logits/rejected": 0.7442142367362976, |
|
"logps/chosen": -153.08731079101562, |
|
"logps/rejected": -148.73162841796875, |
|
"loss": 0.6692, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.0056318375281989574, |
|
"rewards/margins": 0.044314805418252945, |
|
"rewards/rejected": -0.04994663968682289, |
|
"step": 40 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.9497181177139282, |
|
"debug/policy_chosen_logps": -147.80657958984375, |
|
"debug/policy_rejected_logits": 1.0767797231674194, |
|
"debug/policy_rejected_logps": -147.0999755859375, |
|
"debug/reference_chosen_logps": -144.19863891601562, |
|
"debug/reference_rejected_logps": -141.43292236328125, |
|
"epoch": 0.7454545454545455, |
|
"grad_norm": 2.594544171794356, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.9497181177139282, |
|
"logits/rejected": 1.0767797231674194, |
|
"logps/chosen": -147.80657958984375, |
|
"logps/rejected": -147.0999755859375, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0360795296728611, |
|
"rewards/margins": 0.020590826869010925, |
|
"rewards/rejected": -0.05667036026716232, |
|
"step": 41 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.2258227914571762, |
|
"debug/policy_chosen_logps": -125.74639129638672, |
|
"debug/policy_rejected_logits": 0.5640020966529846, |
|
"debug/policy_rejected_logps": -163.89407348632812, |
|
"debug/reference_chosen_logps": -124.42830657958984, |
|
"debug/reference_rejected_logps": -156.05613708496094, |
|
"epoch": 0.7636363636363637, |
|
"grad_norm": 2.6702311112671437, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.2258227914571762, |
|
"logits/rejected": 0.5640020966529846, |
|
"logps/chosen": -125.74639129638672, |
|
"logps/rejected": -163.89407348632812, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.013180827721953392, |
|
"rewards/margins": 0.06519865989685059, |
|
"rewards/rejected": -0.07837948203086853, |
|
"step": 42 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.5549467206001282, |
|
"debug/policy_chosen_logps": -146.0997314453125, |
|
"debug/policy_rejected_logits": 0.725266695022583, |
|
"debug/policy_rejected_logps": -147.44781494140625, |
|
"debug/reference_chosen_logps": -145.01519775390625, |
|
"debug/reference_rejected_logps": -139.47506713867188, |
|
"epoch": 0.7818181818181819, |
|
"grad_norm": 3.292126654382054, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.5549467206001282, |
|
"logits/rejected": 0.725266695022583, |
|
"logps/chosen": -146.0997314453125, |
|
"logps/rejected": -147.44781494140625, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.010845517739653587, |
|
"rewards/margins": 0.06888195872306824, |
|
"rewards/rejected": -0.07972747832536697, |
|
"step": 43 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.019000336527824402, |
|
"debug/policy_chosen_logps": -190.69863891601562, |
|
"debug/policy_rejected_logits": 0.052988436073064804, |
|
"debug/policy_rejected_logps": -200.59532165527344, |
|
"debug/reference_chosen_logps": -116.88156127929688, |
|
"debug/reference_rejected_logps": -129.81407165527344, |
|
"epoch": 0.8, |
|
"grad_norm": 6.5675247662578915, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.019000336527824402, |
|
"logits/rejected": 0.052988436073064804, |
|
"logps/chosen": -190.69863891601562, |
|
"logps/rejected": -200.59532165527344, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7381708025932312, |
|
"rewards/margins": -0.030358243733644485, |
|
"rewards/rejected": -0.7078125476837158, |
|
"step": 44 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -0.10405106842517853, |
|
"debug/policy_chosen_logps": -157.44500732421875, |
|
"debug/policy_rejected_logits": 0.3257129192352295, |
|
"debug/policy_rejected_logps": -170.44888305664062, |
|
"debug/reference_chosen_logps": -139.64646911621094, |
|
"debug/reference_rejected_logps": -142.53656005859375, |
|
"epoch": 0.8181818181818182, |
|
"grad_norm": 4.692245763612819, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.10405106842517853, |
|
"logits/rejected": 0.3257129192352295, |
|
"logps/chosen": -157.44500732421875, |
|
"logps/rejected": -170.44888305664062, |
|
"loss": 0.6487, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.17798534035682678, |
|
"rewards/margins": 0.10113789886236191, |
|
"rewards/rejected": -0.2791232466697693, |
|
"step": 45 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.4848513901233673, |
|
"debug/policy_chosen_logps": -132.71551513671875, |
|
"debug/policy_rejected_logits": 0.5532675981521606, |
|
"debug/policy_rejected_logps": -146.56289672851562, |
|
"debug/reference_chosen_logps": -128.2412109375, |
|
"debug/reference_rejected_logps": -136.38479614257812, |
|
"epoch": 0.8363636363636363, |
|
"grad_norm": 3.7539173764574945, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.4848513901233673, |
|
"logits/rejected": 0.5532675981521606, |
|
"logps/chosen": -132.71551513671875, |
|
"logps/rejected": -146.56289672851562, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.04474309831857681, |
|
"rewards/margins": 0.05703797936439514, |
|
"rewards/rejected": -0.10178108513355255, |
|
"step": 46 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.0264931917190552, |
|
"debug/policy_chosen_logps": -127.01493072509766, |
|
"debug/policy_rejected_logits": 1.2245111465454102, |
|
"debug/policy_rejected_logps": -139.18240356445312, |
|
"debug/reference_chosen_logps": -120.42625427246094, |
|
"debug/reference_rejected_logps": -133.286865234375, |
|
"epoch": 0.8545454545454545, |
|
"grad_norm": 3.509455580959997, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 1.0264931917190552, |
|
"logits/rejected": 1.2245111465454102, |
|
"logps/chosen": -127.01493072509766, |
|
"logps/rejected": -139.18240356445312, |
|
"loss": 0.6752, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.06588685512542725, |
|
"rewards/margins": -0.0069314176216721535, |
|
"rewards/rejected": -0.05895543843507767, |
|
"step": 47 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.8054248094558716, |
|
"debug/policy_chosen_logps": -141.4749755859375, |
|
"debug/policy_rejected_logits": 0.9782359004020691, |
|
"debug/policy_rejected_logps": -170.6127471923828, |
|
"debug/reference_chosen_logps": -137.53990173339844, |
|
"debug/reference_rejected_logps": -161.91265869140625, |
|
"epoch": 0.8727272727272727, |
|
"grad_norm": 2.906297231648741, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.8054248094558716, |
|
"logits/rejected": 0.9782359004020691, |
|
"logps/chosen": -141.4749755859375, |
|
"logps/rejected": -170.6127471923828, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.039350755512714386, |
|
"rewards/margins": 0.04765000939369202, |
|
"rewards/rejected": -0.087000772356987, |
|
"step": 48 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.7787018418312073, |
|
"debug/policy_chosen_logps": -144.430419921875, |
|
"debug/policy_rejected_logits": 1.1615475416183472, |
|
"debug/policy_rejected_logps": -154.2880859375, |
|
"debug/reference_chosen_logps": -143.35760498046875, |
|
"debug/reference_rejected_logps": -149.20867919921875, |
|
"epoch": 0.8909090909090909, |
|
"grad_norm": 3.916602695749529, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.7787018418312073, |
|
"logits/rejected": 1.1615475416183472, |
|
"logps/chosen": -144.430419921875, |
|
"logps/rejected": -154.2880859375, |
|
"loss": 0.6714, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.010728321969509125, |
|
"rewards/margins": 0.04006559029221535, |
|
"rewards/rejected": -0.05079391598701477, |
|
"step": 49 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.4613797962665558, |
|
"debug/policy_chosen_logps": -136.51910400390625, |
|
"debug/policy_rejected_logits": 0.8358896970748901, |
|
"debug/policy_rejected_logps": -144.94639587402344, |
|
"debug/reference_chosen_logps": -134.311279296875, |
|
"debug/reference_rejected_logps": -135.14649963378906, |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 3.261755038151918, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.4613797962665558, |
|
"logits/rejected": 0.8358896970748901, |
|
"logps/chosen": -136.51910400390625, |
|
"logps/rejected": -144.94639587402344, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.022078264504671097, |
|
"rewards/margins": 0.07592067122459412, |
|
"rewards/rejected": -0.09799893200397491, |
|
"step": 50 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.2970171570777893, |
|
"debug/policy_chosen_logps": -131.04600524902344, |
|
"debug/policy_rejected_logits": 0.780690610408783, |
|
"debug/policy_rejected_logps": -150.49591064453125, |
|
"debug/reference_chosen_logps": -130.48240661621094, |
|
"debug/reference_rejected_logps": -142.41004943847656, |
|
"epoch": 0.9272727272727272, |
|
"grad_norm": 2.7673993762098075, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.2970171570777893, |
|
"logits/rejected": 0.780690610408783, |
|
"logps/chosen": -131.04600524902344, |
|
"logps/rejected": -150.49591064453125, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.005636042915284634, |
|
"rewards/margins": 0.07522260397672653, |
|
"rewards/rejected": -0.08085864782333374, |
|
"step": 51 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.5221680998802185, |
|
"debug/policy_chosen_logps": -143.44683837890625, |
|
"debug/policy_rejected_logits": 1.1260162591934204, |
|
"debug/policy_rejected_logps": -153.56045532226562, |
|
"debug/reference_chosen_logps": -140.38705444335938, |
|
"debug/reference_rejected_logps": -145.02029418945312, |
|
"epoch": 0.9454545454545454, |
|
"grad_norm": 2.977638074764286, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.5221680998802185, |
|
"logits/rejected": 1.1260162591934204, |
|
"logps/chosen": -143.44683837890625, |
|
"logps/rejected": -153.56045532226562, |
|
"loss": 0.6616, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0305978674441576, |
|
"rewards/margins": 0.05480368435382843, |
|
"rewards/rejected": -0.08540154993534088, |
|
"step": 52 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.917246401309967, |
|
"debug/policy_chosen_logps": -144.25767517089844, |
|
"debug/policy_rejected_logits": 1.1883795261383057, |
|
"debug/policy_rejected_logps": -146.60035705566406, |
|
"debug/reference_chosen_logps": -142.2523193359375, |
|
"debug/reference_rejected_logps": -136.43551635742188, |
|
"epoch": 0.9636363636363636, |
|
"grad_norm": 2.933035711917657, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.917246401309967, |
|
"logits/rejected": 1.1883795261383057, |
|
"logps/chosen": -144.25767517089844, |
|
"logps/rejected": -146.60035705566406, |
|
"loss": 0.6687, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02005346305668354, |
|
"rewards/margins": 0.08159493654966354, |
|
"rewards/rejected": -0.10164839029312134, |
|
"step": 53 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.44729718565940857, |
|
"debug/policy_chosen_logps": -159.4932098388672, |
|
"debug/policy_rejected_logits": 0.5373156666755676, |
|
"debug/policy_rejected_logps": -145.55308532714844, |
|
"debug/reference_chosen_logps": -154.90708923339844, |
|
"debug/reference_rejected_logps": -141.65737915039062, |
|
"epoch": 0.9818181818181818, |
|
"grad_norm": 3.1066391658700105, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.44729718565940857, |
|
"logits/rejected": 0.5373156666755676, |
|
"logps/chosen": -159.4932098388672, |
|
"logps/rejected": -145.55308532714844, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.04586116969585419, |
|
"rewards/margins": -0.006904135458171368, |
|
"rewards/rejected": -0.038957033306360245, |
|
"step": 54 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.4478115737438202, |
|
"debug/policy_chosen_logps": -131.2347412109375, |
|
"debug/policy_rejected_logits": 1.390880823135376, |
|
"debug/policy_rejected_logps": -158.60877990722656, |
|
"debug/reference_chosen_logps": -128.52542114257812, |
|
"debug/reference_rejected_logps": -150.23410034179688, |
|
"epoch": 1.0, |
|
"grad_norm": 3.235468961904597, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.4478115737438202, |
|
"logits/rejected": 1.390880823135376, |
|
"logps/chosen": -131.2347412109375, |
|
"logps/rejected": -158.60877990722656, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.02709309384226799, |
|
"rewards/margins": 0.056653641164302826, |
|
"rewards/rejected": -0.08374673128128052, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 55, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6775240507992831, |
|
"train_runtime": 440.7264, |
|
"train_samples_per_second": 7.907, |
|
"train_steps_per_second": 0.125 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 55, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|