|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 158, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"debug/policy_chosen_logits": 1.5589828491210938, |
|
"debug/policy_chosen_logps": -258.5330810546875, |
|
"debug/policy_rejected_logits": 1.9977812767028809, |
|
"debug/policy_rejected_logps": -304.0617980957031, |
|
"debug/reference_chosen_logps": -258.5330810546875, |
|
"debug/reference_rejected_logps": -304.0617980957031, |
|
"epoch": 0.006329113924050633, |
|
"grad_norm": 5.915865288930895, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 1.5589828491210938, |
|
"logits/rejected": 1.9977812767028809, |
|
"logps/chosen": -258.5330810546875, |
|
"logps/rejected": -304.0617980957031, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.8514629602432251, |
|
"debug/policy_chosen_logps": -222.75827026367188, |
|
"debug/policy_rejected_logits": 1.458482027053833, |
|
"debug/policy_rejected_logps": -292.2978210449219, |
|
"debug/reference_chosen_logps": -222.56484985351562, |
|
"debug/reference_rejected_logps": -288.334716796875, |
|
"epoch": 0.03164556962025317, |
|
"grad_norm": 6.379094662882782, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.8514629602432251, |
|
"logits/rejected": 1.458482027053833, |
|
"logps/chosen": -222.75827026367188, |
|
"logps/rejected": -292.2978210449219, |
|
"loss": 0.4816, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0019342182204127312, |
|
"rewards/margins": 0.03769642859697342, |
|
"rewards/rejected": -0.039630644023418427, |
|
"step": 5 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.1418471336364746, |
|
"debug/policy_chosen_logps": -261.1085510253906, |
|
"debug/policy_rejected_logits": 1.316489338874817, |
|
"debug/policy_rejected_logps": -285.4795837402344, |
|
"debug/reference_chosen_logps": -260.5736999511719, |
|
"debug/reference_rejected_logps": -280.2572937011719, |
|
"epoch": 0.06329113924050633, |
|
"grad_norm": 12.885197123935471, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 1.1418471336364746, |
|
"logits/rejected": 1.316489338874817, |
|
"logps/chosen": -261.1085510253906, |
|
"logps/rejected": -285.4795837402344, |
|
"loss": 0.4629, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.005348391830921173, |
|
"rewards/margins": 0.04687455669045448, |
|
"rewards/rejected": -0.05222295597195625, |
|
"step": 10 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.4202030897140503, |
|
"debug/policy_chosen_logps": -305.30096435546875, |
|
"debug/policy_rejected_logits": 1.608795404434204, |
|
"debug/policy_rejected_logps": -339.3628845214844, |
|
"debug/reference_chosen_logps": -305.89739990234375, |
|
"debug/reference_rejected_logps": -336.0830078125, |
|
"epoch": 0.0949367088607595, |
|
"grad_norm": 6.031873391940916, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 1.4202030897140503, |
|
"logits/rejected": 1.608795404434204, |
|
"logps/chosen": -305.30096435546875, |
|
"logps/rejected": -339.3628845214844, |
|
"loss": 0.462, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.005964324809610844, |
|
"rewards/margins": 0.038763098418712616, |
|
"rewards/rejected": -0.032798778265714645, |
|
"step": 15 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.2072508335113525, |
|
"debug/policy_chosen_logps": -259.9560546875, |
|
"debug/policy_rejected_logits": 1.4596980810165405, |
|
"debug/policy_rejected_logps": -266.99896240234375, |
|
"debug/reference_chosen_logps": -262.2249450683594, |
|
"debug/reference_rejected_logps": -262.94488525390625, |
|
"epoch": 0.12658227848101267, |
|
"grad_norm": 5.929430664241562, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 1.2072508335113525, |
|
"logits/rejected": 1.4596980810165405, |
|
"logps/chosen": -259.9560546875, |
|
"logps/rejected": -266.99896240234375, |
|
"loss": 0.4568, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.02268880605697632, |
|
"rewards/margins": 0.06322960555553436, |
|
"rewards/rejected": -0.04054080322384834, |
|
"step": 20 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.9236510992050171, |
|
"debug/policy_chosen_logps": -263.56951904296875, |
|
"debug/policy_rejected_logits": 1.2153400182724, |
|
"debug/policy_rejected_logps": -276.596923828125, |
|
"debug/reference_chosen_logps": -264.62982177734375, |
|
"debug/reference_rejected_logps": -272.1346130371094, |
|
"epoch": 0.15822784810126583, |
|
"grad_norm": 6.795022163630081, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.9236510992050171, |
|
"logits/rejected": 1.2153400182724, |
|
"logps/chosen": -263.56951904296875, |
|
"logps/rejected": -276.596923828125, |
|
"loss": 0.4609, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.010603101924061775, |
|
"rewards/margins": 0.05522637441754341, |
|
"rewards/rejected": -0.04462327063083649, |
|
"step": 25 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.8845943212509155, |
|
"debug/policy_chosen_logps": -232.0923309326172, |
|
"debug/policy_rejected_logits": 1.284155011177063, |
|
"debug/policy_rejected_logps": -287.80389404296875, |
|
"debug/reference_chosen_logps": -233.78652954101562, |
|
"debug/reference_rejected_logps": -284.5167236328125, |
|
"epoch": 0.189873417721519, |
|
"grad_norm": 6.4445556777608255, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.8845943212509155, |
|
"logits/rejected": 1.284155011177063, |
|
"logps/chosen": -232.0923309326172, |
|
"logps/rejected": -287.80389404296875, |
|
"loss": 0.4609, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.016941774636507034, |
|
"rewards/margins": 0.04981378838419914, |
|
"rewards/rejected": -0.03287201002240181, |
|
"step": 30 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.1807546615600586, |
|
"debug/policy_chosen_logps": -263.7032165527344, |
|
"debug/policy_rejected_logits": 1.3615357875823975, |
|
"debug/policy_rejected_logps": -295.0924377441406, |
|
"debug/reference_chosen_logps": -264.52520751953125, |
|
"debug/reference_rejected_logps": -289.96612548828125, |
|
"epoch": 0.22151898734177214, |
|
"grad_norm": 6.39988158389298, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 1.1807546615600586, |
|
"logits/rejected": 1.3615357875823975, |
|
"logps/chosen": -263.7032165527344, |
|
"logps/rejected": -295.0924377441406, |
|
"loss": 0.4495, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.008219520561397076, |
|
"rewards/margins": 0.05948234722018242, |
|
"rewards/rejected": -0.05126282572746277, |
|
"step": 35 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.918303370475769, |
|
"debug/policy_chosen_logps": -224.531982421875, |
|
"debug/policy_rejected_logits": 1.2155705690383911, |
|
"debug/policy_rejected_logps": -266.7242431640625, |
|
"debug/reference_chosen_logps": -227.6628875732422, |
|
"debug/reference_rejected_logps": -259.6141052246094, |
|
"epoch": 0.25316455696202533, |
|
"grad_norm": 8.66786179216246, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.918303370475769, |
|
"logits/rejected": 1.2155705690383911, |
|
"logps/chosen": -224.531982421875, |
|
"logps/rejected": -266.7242431640625, |
|
"loss": 0.4495, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.031309086829423904, |
|
"rewards/margins": 0.10241049528121948, |
|
"rewards/rejected": -0.07110141217708588, |
|
"step": 40 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.8259471654891968, |
|
"debug/policy_chosen_logps": -230.60250854492188, |
|
"debug/policy_rejected_logits": 1.2626183032989502, |
|
"debug/policy_rejected_logps": -303.4950866699219, |
|
"debug/reference_chosen_logps": -230.0920867919922, |
|
"debug/reference_rejected_logps": -302.10784912109375, |
|
"epoch": 0.2848101265822785, |
|
"grad_norm": 6.143825464676947, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.8259471654891968, |
|
"logits/rejected": 1.2626183032989502, |
|
"logps/chosen": -230.60250854492188, |
|
"logps/rejected": -303.4950866699219, |
|
"loss": 0.4802, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0051041776314377785, |
|
"rewards/margins": 0.008768384344875813, |
|
"rewards/rejected": -0.013872561976313591, |
|
"step": 45 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.9409104585647583, |
|
"debug/policy_chosen_logps": -241.2617950439453, |
|
"debug/policy_rejected_logits": 1.2857184410095215, |
|
"debug/policy_rejected_logps": -291.4665222167969, |
|
"debug/reference_chosen_logps": -244.69577026367188, |
|
"debug/reference_rejected_logps": -284.1947021484375, |
|
"epoch": 0.31645569620253167, |
|
"grad_norm": 8.46649937885156, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.9409104585647583, |
|
"logits/rejected": 1.2857184410095215, |
|
"logps/chosen": -241.2617950439453, |
|
"logps/rejected": -291.4665222167969, |
|
"loss": 0.4411, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.034339673817157745, |
|
"rewards/margins": 0.10705772787332535, |
|
"rewards/rejected": -0.0727180689573288, |
|
"step": 50 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.8741863369941711, |
|
"debug/policy_chosen_logps": -250.87057495117188, |
|
"debug/policy_rejected_logits": 1.258837103843689, |
|
"debug/policy_rejected_logps": -289.27069091796875, |
|
"debug/reference_chosen_logps": -255.7415771484375, |
|
"debug/reference_rejected_logps": -283.4430847167969, |
|
"epoch": 0.34810126582278483, |
|
"grad_norm": 9.716442001601763, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.8741863369941711, |
|
"logits/rejected": 1.258837103843689, |
|
"logps/chosen": -250.87057495117188, |
|
"logps/rejected": -289.27069091796875, |
|
"loss": 0.4436, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.048710085451602936, |
|
"rewards/margins": 0.10698604583740234, |
|
"rewards/rejected": -0.05827596038579941, |
|
"step": 55 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.6640017628669739, |
|
"debug/policy_chosen_logps": -269.62237548828125, |
|
"debug/policy_rejected_logits": 0.8445190191268921, |
|
"debug/policy_rejected_logps": -291.27325439453125, |
|
"debug/reference_chosen_logps": -269.4212951660156, |
|
"debug/reference_rejected_logps": -285.77349853515625, |
|
"epoch": 0.379746835443038, |
|
"grad_norm": 7.925495242886814, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.6640017628669739, |
|
"logits/rejected": 0.8445190191268921, |
|
"logps/chosen": -269.62237548828125, |
|
"logps/rejected": -291.27325439453125, |
|
"loss": 0.438, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.002010857220739126, |
|
"rewards/margins": 0.052986472845077515, |
|
"rewards/rejected": -0.05499732494354248, |
|
"step": 60 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.0082881450653076, |
|
"debug/policy_chosen_logps": -241.1085662841797, |
|
"debug/policy_rejected_logits": 1.5921090841293335, |
|
"debug/policy_rejected_logps": -303.08465576171875, |
|
"debug/reference_chosen_logps": -245.0981903076172, |
|
"debug/reference_rejected_logps": -300.36328125, |
|
"epoch": 0.41139240506329117, |
|
"grad_norm": 7.096776814684128, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 1.0082881450653076, |
|
"logits/rejected": 1.5921090841293335, |
|
"logps/chosen": -241.1085662841797, |
|
"logps/rejected": -303.08465576171875, |
|
"loss": 0.4602, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.03989603370428085, |
|
"rewards/margins": 0.06710983067750931, |
|
"rewards/rejected": -0.027213791385293007, |
|
"step": 65 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.7952272295951843, |
|
"debug/policy_chosen_logps": -252.08798217773438, |
|
"debug/policy_rejected_logits": 1.0696840286254883, |
|
"debug/policy_rejected_logps": -287.27301025390625, |
|
"debug/reference_chosen_logps": -253.79379272460938, |
|
"debug/reference_rejected_logps": -279.5188903808594, |
|
"epoch": 0.4430379746835443, |
|
"grad_norm": 7.584678181203943, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.7952272295951843, |
|
"logits/rejected": 1.0696840286254883, |
|
"logps/chosen": -252.08798217773438, |
|
"logps/rejected": -287.27301025390625, |
|
"loss": 0.4335, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.017058206722140312, |
|
"rewards/margins": 0.09459935128688812, |
|
"rewards/rejected": -0.07754113525152206, |
|
"step": 70 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.9075101613998413, |
|
"debug/policy_chosen_logps": -218.43185424804688, |
|
"debug/policy_rejected_logits": 1.0321990251541138, |
|
"debug/policy_rejected_logps": -245.87973022460938, |
|
"debug/reference_chosen_logps": -221.93466186523438, |
|
"debug/reference_rejected_logps": -243.0590057373047, |
|
"epoch": 0.47468354430379744, |
|
"grad_norm": 6.725884442562555, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.9075101613998413, |
|
"logits/rejected": 1.0321990251541138, |
|
"logps/chosen": -218.43185424804688, |
|
"logps/rejected": -245.87973022460938, |
|
"loss": 0.4441, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.03502799943089485, |
|
"rewards/margins": 0.06323517113924026, |
|
"rewards/rejected": -0.028207167983055115, |
|
"step": 75 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.6510931253433228, |
|
"debug/policy_chosen_logps": -218.7671356201172, |
|
"debug/policy_rejected_logits": 0.8215225338935852, |
|
"debug/policy_rejected_logps": -276.33111572265625, |
|
"debug/reference_chosen_logps": -222.28018188476562, |
|
"debug/reference_rejected_logps": -267.1961364746094, |
|
"epoch": 0.5063291139240507, |
|
"grad_norm": 7.155350358859657, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.6510931253433228, |
|
"logits/rejected": 0.8215225338935852, |
|
"logps/chosen": -218.7671356201172, |
|
"logps/rejected": -276.33111572265625, |
|
"loss": 0.4348, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.03513062372803688, |
|
"rewards/margins": 0.1264806091785431, |
|
"rewards/rejected": -0.09134997427463531, |
|
"step": 80 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.9534305334091187, |
|
"debug/policy_chosen_logps": -250.000244140625, |
|
"debug/policy_rejected_logits": 1.0431879758834839, |
|
"debug/policy_rejected_logps": -275.9551086425781, |
|
"debug/reference_chosen_logps": -250.7502899169922, |
|
"debug/reference_rejected_logps": -268.43548583984375, |
|
"epoch": 0.5379746835443038, |
|
"grad_norm": 26.837408837144096, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.9534305334091187, |
|
"logits/rejected": 1.0431879758834839, |
|
"logps/chosen": -250.000244140625, |
|
"logps/rejected": -275.9551086425781, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.007500249892473221, |
|
"rewards/margins": 0.08269646763801575, |
|
"rewards/rejected": -0.07519622147083282, |
|
"step": 85 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.1253650188446045, |
|
"debug/policy_chosen_logps": -240.8356475830078, |
|
"debug/policy_rejected_logits": 1.2428481578826904, |
|
"debug/policy_rejected_logps": -265.67266845703125, |
|
"debug/reference_chosen_logps": -245.643798828125, |
|
"debug/reference_rejected_logps": -261.6888122558594, |
|
"epoch": 0.569620253164557, |
|
"grad_norm": 8.938690009286978, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 1.1253650188446045, |
|
"logits/rejected": 1.2428481578826904, |
|
"logps/chosen": -240.8356475830078, |
|
"logps/rejected": -265.67266845703125, |
|
"loss": 0.4314, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.04808169603347778, |
|
"rewards/margins": 0.08791980892419815, |
|
"rewards/rejected": -0.03983811289072037, |
|
"step": 90 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.9913564920425415, |
|
"debug/policy_chosen_logps": -247.68453979492188, |
|
"debug/policy_rejected_logits": 1.167474389076233, |
|
"debug/policy_rejected_logps": -284.51300048828125, |
|
"debug/reference_chosen_logps": -250.7725067138672, |
|
"debug/reference_rejected_logps": -276.8506774902344, |
|
"epoch": 0.6012658227848101, |
|
"grad_norm": 7.214786092625251, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.9913564920425415, |
|
"logits/rejected": 1.167474389076233, |
|
"logps/chosen": -247.68453979492188, |
|
"logps/rejected": -284.51300048828125, |
|
"loss": 0.4481, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.030879342928528786, |
|
"rewards/margins": 0.10750222206115723, |
|
"rewards/rejected": -0.07662288844585419, |
|
"step": 95 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.548004388809204, |
|
"debug/policy_chosen_logps": -286.9696350097656, |
|
"debug/policy_rejected_logits": 1.2569023370742798, |
|
"debug/policy_rejected_logps": -255.9474639892578, |
|
"debug/reference_chosen_logps": -288.26263427734375, |
|
"debug/reference_rejected_logps": -252.56982421875, |
|
"epoch": 0.6329113924050633, |
|
"grad_norm": 7.098617456221662, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 1.548004388809204, |
|
"logits/rejected": 1.2569023370742798, |
|
"logps/chosen": -286.9696350097656, |
|
"logps/rejected": -255.9474639892578, |
|
"loss": 0.4429, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.012930279597640038, |
|
"rewards/margins": 0.04670674726366997, |
|
"rewards/rejected": -0.03377646952867508, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6329113924050633, |
|
"eval_debug/policy_chosen_logits": 1.2252188920974731, |
|
"eval_debug/policy_chosen_logps": -250.68939208984375, |
|
"eval_debug/policy_rejected_logits": 1.4343616962432861, |
|
"eval_debug/policy_rejected_logps": -287.45086669921875, |
|
"eval_debug/reference_chosen_logps": -255.34970092773438, |
|
"eval_debug/reference_rejected_logps": -283.57049560546875, |
|
"eval_logits/chosen": 1.2252188920974731, |
|
"eval_logits/rejected": 1.4343616962432861, |
|
"eval_logps/chosen": -250.68939208984375, |
|
"eval_logps/rejected": -287.45086669921875, |
|
"eval_loss": 0.43653252720832825, |
|
"eval_rewards/accuracies": 0.5769230723381042, |
|
"eval_rewards/chosen": 0.04660310223698616, |
|
"eval_rewards/margins": 0.08540700376033783, |
|
"eval_rewards/rejected": -0.03880389407277107, |
|
"eval_runtime": 19.8549, |
|
"eval_samples_per_second": 20.146, |
|
"eval_steps_per_second": 0.655, |
|
"step": 100 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.011919617652893, |
|
"debug/policy_chosen_logps": -279.73260498046875, |
|
"debug/policy_rejected_logits": 1.211625337600708, |
|
"debug/policy_rejected_logps": -298.412109375, |
|
"debug/reference_chosen_logps": -281.5310974121094, |
|
"debug/reference_rejected_logps": -292.20550537109375, |
|
"epoch": 0.6645569620253164, |
|
"grad_norm": 6.340425768293679, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 1.011919617652893, |
|
"logits/rejected": 1.211625337600708, |
|
"logps/chosen": -279.73260498046875, |
|
"logps/rejected": -298.412109375, |
|
"loss": 0.4362, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.017984820529818535, |
|
"rewards/margins": 0.08005066215991974, |
|
"rewards/rejected": -0.06206584721803665, |
|
"step": 105 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.0565037727355957, |
|
"debug/policy_chosen_logps": -251.0978546142578, |
|
"debug/policy_rejected_logits": 1.3947855234146118, |
|
"debug/policy_rejected_logps": -316.4710998535156, |
|
"debug/reference_chosen_logps": -253.4007110595703, |
|
"debug/reference_rejected_logps": -309.9458923339844, |
|
"epoch": 0.6962025316455697, |
|
"grad_norm": 20.34165260676491, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 1.0565037727355957, |
|
"logits/rejected": 1.3947855234146118, |
|
"logps/chosen": -251.0978546142578, |
|
"logps/rejected": -316.4710998535156, |
|
"loss": 0.4383, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.023028511554002762, |
|
"rewards/margins": 0.08828048408031464, |
|
"rewards/rejected": -0.06525196880102158, |
|
"step": 110 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.8845629692077637, |
|
"debug/policy_chosen_logps": -241.9716339111328, |
|
"debug/policy_rejected_logits": 1.229775071144104, |
|
"debug/policy_rejected_logps": -321.60186767578125, |
|
"debug/reference_chosen_logps": -246.28433227539062, |
|
"debug/reference_rejected_logps": -314.5198974609375, |
|
"epoch": 0.7278481012658228, |
|
"grad_norm": 7.789166803514712, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.8845629692077637, |
|
"logits/rejected": 1.229775071144104, |
|
"logps/chosen": -241.9716339111328, |
|
"logps/rejected": -321.60186767578125, |
|
"loss": 0.4426, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.043126728385686874, |
|
"rewards/margins": 0.11394629627466202, |
|
"rewards/rejected": -0.07081956416368484, |
|
"step": 115 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.6471331119537354, |
|
"debug/policy_chosen_logps": -232.4429168701172, |
|
"debug/policy_rejected_logits": 0.9131924510002136, |
|
"debug/policy_rejected_logps": -279.41290283203125, |
|
"debug/reference_chosen_logps": -237.39102172851562, |
|
"debug/reference_rejected_logps": -273.61090087890625, |
|
"epoch": 0.759493670886076, |
|
"grad_norm": 7.468046301754059, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.6471331119537354, |
|
"logits/rejected": 0.9131924510002136, |
|
"logps/chosen": -232.4429168701172, |
|
"logps/rejected": -279.41290283203125, |
|
"loss": 0.4131, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.04948071017861366, |
|
"rewards/margins": 0.1075005754828453, |
|
"rewards/rejected": -0.05801987648010254, |
|
"step": 120 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.9338349103927612, |
|
"debug/policy_chosen_logps": -260.35235595703125, |
|
"debug/policy_rejected_logits": 1.0534359216690063, |
|
"debug/policy_rejected_logps": -297.56683349609375, |
|
"debug/reference_chosen_logps": -264.9391174316406, |
|
"debug/reference_rejected_logps": -289.8217468261719, |
|
"epoch": 0.7911392405063291, |
|
"grad_norm": 8.935461685140815, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.9338349103927612, |
|
"logits/rejected": 1.0534359216690063, |
|
"logps/chosen": -260.35235595703125, |
|
"logps/rejected": -297.56683349609375, |
|
"loss": 0.4303, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.045867711305618286, |
|
"rewards/margins": 0.12331867218017578, |
|
"rewards/rejected": -0.0774509608745575, |
|
"step": 125 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.8780291676521301, |
|
"debug/policy_chosen_logps": -284.29205322265625, |
|
"debug/policy_rejected_logits": 0.8824840784072876, |
|
"debug/policy_rejected_logps": -287.76690673828125, |
|
"debug/reference_chosen_logps": -286.41943359375, |
|
"debug/reference_rejected_logps": -283.56903076171875, |
|
"epoch": 0.8227848101265823, |
|
"grad_norm": 6.948216331668783, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.8780291676521301, |
|
"logits/rejected": 0.8824840784072876, |
|
"logps/chosen": -284.29205322265625, |
|
"logps/rejected": -287.76690673828125, |
|
"loss": 0.4375, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.02127380482852459, |
|
"rewards/margins": 0.06325232237577438, |
|
"rewards/rejected": -0.041978511959314346, |
|
"step": 130 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.165907859802246, |
|
"debug/policy_chosen_logps": -255.9198455810547, |
|
"debug/policy_rejected_logits": 1.4020473957061768, |
|
"debug/policy_rejected_logps": -301.6413879394531, |
|
"debug/reference_chosen_logps": -260.84521484375, |
|
"debug/reference_rejected_logps": -295.99700927734375, |
|
"epoch": 0.8544303797468354, |
|
"grad_norm": 6.0797186914906485, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 1.165907859802246, |
|
"logits/rejected": 1.4020473957061768, |
|
"logps/chosen": -255.9198455810547, |
|
"logps/rejected": -301.6413879394531, |
|
"loss": 0.4418, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.049253594130277634, |
|
"rewards/margins": 0.10569741576910019, |
|
"rewards/rejected": -0.056443821638822556, |
|
"step": 135 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.9684173464775085, |
|
"debug/policy_chosen_logps": -240.7368927001953, |
|
"debug/policy_rejected_logits": 1.522164225578308, |
|
"debug/policy_rejected_logps": -300.8490295410156, |
|
"debug/reference_chosen_logps": -244.41757202148438, |
|
"debug/reference_rejected_logps": -289.0794372558594, |
|
"epoch": 0.8860759493670886, |
|
"grad_norm": 6.850074566718433, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.9684173464775085, |
|
"logits/rejected": 1.522164225578308, |
|
"logps/chosen": -240.7368927001953, |
|
"logps/rejected": -300.8490295410156, |
|
"loss": 0.43, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.03680698946118355, |
|
"rewards/margins": 0.1545029729604721, |
|
"rewards/rejected": -0.11769597232341766, |
|
"step": 140 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 1.095474123954773, |
|
"debug/policy_chosen_logps": -281.7500305175781, |
|
"debug/policy_rejected_logits": 1.0368950366973877, |
|
"debug/policy_rejected_logps": -281.8016052246094, |
|
"debug/reference_chosen_logps": -285.4373474121094, |
|
"debug/reference_rejected_logps": -278.67181396484375, |
|
"epoch": 0.9177215189873418, |
|
"grad_norm": 6.330596887372699, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 1.095474123954773, |
|
"logits/rejected": 1.0368950366973877, |
|
"logps/chosen": -281.7500305175781, |
|
"logps/rejected": -281.8016052246094, |
|
"loss": 0.4243, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.036873430013656616, |
|
"rewards/margins": 0.06817178428173065, |
|
"rewards/rejected": -0.03129836544394493, |
|
"step": 145 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.9509929418563843, |
|
"debug/policy_chosen_logps": -247.018310546875, |
|
"debug/policy_rejected_logits": 1.1111629009246826, |
|
"debug/policy_rejected_logps": -272.07684326171875, |
|
"debug/reference_chosen_logps": -250.40658569335938, |
|
"debug/reference_rejected_logps": -265.6427001953125, |
|
"epoch": 0.9493670886075949, |
|
"grad_norm": 8.073046871358697, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.9509929418563843, |
|
"logits/rejected": 1.1111629009246826, |
|
"logps/chosen": -247.018310546875, |
|
"logps/rejected": -272.07684326171875, |
|
"loss": 0.4234, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.03388286381959915, |
|
"rewards/margins": 0.09822405129671097, |
|
"rewards/rejected": -0.06434118002653122, |
|
"step": 150 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 0.6622827053070068, |
|
"debug/policy_chosen_logps": -237.2403106689453, |
|
"debug/policy_rejected_logits": 0.8520939946174622, |
|
"debug/policy_rejected_logps": -286.5059509277344, |
|
"debug/reference_chosen_logps": -241.94467163085938, |
|
"debug/reference_rejected_logps": -278.73272705078125, |
|
"epoch": 0.9810126582278481, |
|
"grad_norm": 7.904037537559287, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 0.6622827053070068, |
|
"logits/rejected": 0.8520939946174622, |
|
"logps/chosen": -237.2403106689453, |
|
"logps/rejected": -286.5059509277344, |
|
"loss": 0.423, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.04704369604587555, |
|
"rewards/margins": 0.12477605044841766, |
|
"rewards/rejected": -0.07773236930370331, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 158, |
|
"total_flos": 0.0, |
|
"train_loss": 0.44511839181562013, |
|
"train_runtime": 1281.3009, |
|
"train_samples_per_second": 7.867, |
|
"train_steps_per_second": 0.123 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 158, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|