|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9968652037617555, |
|
"eval_steps": 500, |
|
"global_step": 159, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-09, |
|
"logits/chosen": -2.6344056129455566, |
|
"logits/rejected": -2.5906338691711426, |
|
"logps/chosen": -158.64126586914062, |
|
"logps/pi_response": -86.09246826171875, |
|
"logps/ref_response": -86.09246826171875, |
|
"logps/rejected": -148.42047119140625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.6937406063079834, |
|
"logits/rejected": -2.697803258895874, |
|
"logps/chosen": -232.31423950195312, |
|
"logps/pi_response": -118.80592346191406, |
|
"logps/ref_response": -118.89913940429688, |
|
"logps/rejected": -251.14076232910156, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": 0.00028485539951361716, |
|
"rewards/margins": -0.0006583214271813631, |
|
"rewards/rejected": 0.0009431770886294544, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990353313429303e-08, |
|
"logits/chosen": -2.7444348335266113, |
|
"logits/rejected": -2.718367338180542, |
|
"logps/chosen": -232.554443359375, |
|
"logps/pi_response": -131.306884765625, |
|
"logps/ref_response": -131.12376403808594, |
|
"logps/rejected": -254.0548553466797, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.00253383070230484, |
|
"rewards/margins": 0.0006815333035774529, |
|
"rewards/rejected": -0.003215363947674632, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8826812513685484e-08, |
|
"logits/chosen": -2.6866040229797363, |
|
"logits/rejected": -2.6289615631103516, |
|
"logps/chosen": -227.64889526367188, |
|
"logps/pi_response": -114.9122085571289, |
|
"logps/ref_response": -113.8835220336914, |
|
"logps/rejected": -221.958251953125, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.013358505442738533, |
|
"rewards/margins": 0.004771661013364792, |
|
"rewards/rejected": -0.018130164593458176, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6604720940421204e-08, |
|
"logits/chosen": -2.658876895904541, |
|
"logits/rejected": -2.6415140628814697, |
|
"logps/chosen": -206.0329132080078, |
|
"logps/pi_response": -113.687744140625, |
|
"logps/ref_response": -110.53662109375, |
|
"logps/rejected": -263.42254638671875, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.03745666891336441, |
|
"rewards/margins": 0.013648083433508873, |
|
"rewards/rejected": -0.051104746758937836, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3344075855595095e-08, |
|
"logits/chosen": -2.6783342361450195, |
|
"logits/rejected": -2.6550586223602295, |
|
"logps/chosen": -232.83877563476562, |
|
"logps/pi_response": -125.37564849853516, |
|
"logps/ref_response": -120.1254653930664, |
|
"logps/rejected": -245.05154418945312, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0704035758972168, |
|
"rewards/margins": 0.005661136005073786, |
|
"rewards/rejected": -0.07606470584869385, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920161866827889e-08, |
|
"logits/chosen": -2.647156238555908, |
|
"logits/rejected": -2.6198782920837402, |
|
"logps/chosen": -222.61935424804688, |
|
"logps/pi_response": -114.81854248046875, |
|
"logps/ref_response": -107.5267333984375, |
|
"logps/rejected": -237.662109375, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.08966623246669769, |
|
"rewards/margins": 0.0316656231880188, |
|
"rewards/rejected": -0.12133185565471649, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.437648009023905e-08, |
|
"logits/chosen": -2.644777536392212, |
|
"logits/rejected": -2.615166664123535, |
|
"logps/chosen": -226.83267211914062, |
|
"logps/pi_response": -119.3505859375, |
|
"logps/ref_response": -112.31196594238281, |
|
"logps/rejected": -212.1949920654297, |
|
"loss": 0.6752, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.08632274717092514, |
|
"rewards/margins": 0.04537486657500267, |
|
"rewards/rejected": -0.1316976249217987, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9100607788275543e-08, |
|
"logits/chosen": -2.640345573425293, |
|
"logits/rejected": -2.597229480743408, |
|
"logps/chosen": -205.22775268554688, |
|
"logps/pi_response": -108.34332275390625, |
|
"logps/ref_response": -99.59819793701172, |
|
"logps/rejected": -236.5616912841797, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.09881611168384552, |
|
"rewards/margins": 0.06056177616119385, |
|
"rewards/rejected": -0.15937790274620056, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.362761650339181e-08, |
|
"logits/chosen": -2.681976556777954, |
|
"logits/rejected": -2.656203508377075, |
|
"logps/chosen": -221.44039916992188, |
|
"logps/pi_response": -112.46112060546875, |
|
"logps/ref_response": -100.86174011230469, |
|
"logps/rejected": -234.1958465576172, |
|
"loss": 0.6698, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.13516300916671753, |
|
"rewards/margins": 0.055410224944353104, |
|
"rewards/rejected": -0.19057324528694153, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089574e-08, |
|
"logits/chosen": -2.590778112411499, |
|
"logits/rejected": -2.5902531147003174, |
|
"logps/chosen": -220.1934814453125, |
|
"logps/pi_response": -122.29930114746094, |
|
"logps/ref_response": -112.6884765625, |
|
"logps/rejected": -272.1497802734375, |
|
"loss": 0.6681, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.11862450838088989, |
|
"rewards/margins": 0.06530530005693436, |
|
"rewards/rejected": -0.18392980098724365, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135997e-08, |
|
"logits/chosen": -2.6394224166870117, |
|
"logits/rejected": -2.660050392150879, |
|
"logps/chosen": -230.77310180664062, |
|
"logps/pi_response": -112.18680572509766, |
|
"logps/ref_response": -100.60249328613281, |
|
"logps/rejected": -226.88265991210938, |
|
"loss": 0.6736, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.14069566130638123, |
|
"rewards/margins": 0.02159450575709343, |
|
"rewards/rejected": -0.16229018568992615, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367876e-09, |
|
"logits/chosen": -2.631422519683838, |
|
"logits/rejected": -2.5770680904388428, |
|
"logps/chosen": -250.26956176757812, |
|
"logps/pi_response": -141.0382537841797, |
|
"logps/ref_response": -130.39340209960938, |
|
"logps/rejected": -256.5991516113281, |
|
"loss": 0.6699, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.12506115436553955, |
|
"rewards/margins": 0.058550190180540085, |
|
"rewards/rejected": -0.18361134827136993, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-09, |
|
"logits/chosen": -2.669673442840576, |
|
"logits/rejected": -2.6708672046661377, |
|
"logps/chosen": -225.036865234375, |
|
"logps/pi_response": -123.17768859863281, |
|
"logps/ref_response": -113.8158950805664, |
|
"logps/rejected": -251.93417358398438, |
|
"loss": 0.67, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.11341943591833115, |
|
"rewards/margins": 0.06858594715595245, |
|
"rewards/rejected": -0.182005375623703, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020853e-09, |
|
"logits/chosen": -2.6793627738952637, |
|
"logits/rejected": -2.6371798515319824, |
|
"logps/chosen": -242.1341552734375, |
|
"logps/pi_response": -131.11148071289062, |
|
"logps/ref_response": -119.49796295166016, |
|
"logps/rejected": -253.3504180908203, |
|
"loss": 0.6699, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.13750413060188293, |
|
"rewards/margins": 0.06477151811122894, |
|
"rewards/rejected": -0.20227563381195068, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.870879364444108e-10, |
|
"logits/chosen": -2.6344664096832275, |
|
"logits/rejected": -2.61822509765625, |
|
"logps/chosen": -228.68997192382812, |
|
"logps/pi_response": -112.9517593383789, |
|
"logps/ref_response": -101.72931671142578, |
|
"logps/rejected": -230.86636352539062, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.13467349112033844, |
|
"rewards/margins": 0.053079742938280106, |
|
"rewards/rejected": -0.18775323033332825, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 159, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6778795974059675, |
|
"train_runtime": 4527.8424, |
|
"train_samples_per_second": 4.501, |
|
"train_steps_per_second": 0.035 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|