|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -2.980285167694092, |
|
"logits/rejected": -2.87275767326355, |
|
"logps/chosen": -313.4390563964844, |
|
"logps/rejected": -236.1754150390625, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9767171129220025e-06, |
|
"logits/chosen": -2.8189077377319336, |
|
"logits/rejected": -2.828260660171509, |
|
"logps/chosen": -285.1784362792969, |
|
"logps/rejected": -271.1445617675781, |
|
"loss": 0.6803, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.5738636255264282, |
|
"rewards/chosen": 0.01021653600037098, |
|
"rewards/margins": 0.029710056260228157, |
|
"rewards/rejected": -0.019493522122502327, |
|
"step": 100, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.394644935972061e-06, |
|
"logits/chosen": -2.8255398273468018, |
|
"logits/rejected": -2.813203811645508, |
|
"logps/chosen": -278.2582092285156, |
|
"logps/rejected": -262.1144714355469, |
|
"loss": 0.6236, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6868749856948853, |
|
"rewards/chosen": 0.05237884446978569, |
|
"rewards/margins": 0.20995216071605682, |
|
"rewards/rejected": -0.15757331252098083, |
|
"step": 200, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.812572759022119e-06, |
|
"logits/chosen": -2.809633255004883, |
|
"logits/rejected": -2.80940580368042, |
|
"logps/chosen": -283.6641540527344, |
|
"logps/rejected": -257.23779296875, |
|
"loss": 0.5901, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6956250071525574, |
|
"rewards/chosen": 0.04034877195954323, |
|
"rewards/margins": 0.3768764138221741, |
|
"rewards/rejected": -0.33652764558792114, |
|
"step": 300, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2305005820721774e-06, |
|
"logits/chosen": -2.821267604827881, |
|
"logits/rejected": -2.8011868000030518, |
|
"logps/chosen": -284.2022399902344, |
|
"logps/rejected": -267.48358154296875, |
|
"loss": 0.5757, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.7081249952316284, |
|
"rewards/chosen": 0.022827474400401115, |
|
"rewards/margins": 0.4984941780567169, |
|
"rewards/rejected": -0.47566673159599304, |
|
"step": 400, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6484284051222353e-06, |
|
"logits/chosen": -2.7993886470794678, |
|
"logits/rejected": -2.7845959663391113, |
|
"logps/chosen": -270.09637451171875, |
|
"logps/rejected": -256.13458251953125, |
|
"loss": 0.5798, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6612499952316284, |
|
"rewards/chosen": -0.004339172504842281, |
|
"rewards/margins": 0.4106566905975342, |
|
"rewards/rejected": -0.41499578952789307, |
|
"step": 500, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0663562281722936e-06, |
|
"logits/chosen": -2.824444532394409, |
|
"logits/rejected": -2.808506965637207, |
|
"logps/chosen": -284.5413513183594, |
|
"logps/rejected": -270.1356201171875, |
|
"loss": 0.5643, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.06895674020051956, |
|
"rewards/margins": 0.5562920570373535, |
|
"rewards/rejected": -0.4873352348804474, |
|
"step": 600, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4842840512223516e-06, |
|
"logits/chosen": -2.8160746097564697, |
|
"logits/rejected": -2.795208692550659, |
|
"logps/chosen": -280.79766845703125, |
|
"logps/rejected": -253.41058349609375, |
|
"loss": 0.5631, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.7231249809265137, |
|
"rewards/chosen": 0.07620371133089066, |
|
"rewards/margins": 0.5511507391929626, |
|
"rewards/rejected": -0.474947065114975, |
|
"step": 700, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.022118742724098e-07, |
|
"logits/chosen": -2.8168938159942627, |
|
"logits/rejected": -2.8180816173553467, |
|
"logps/chosen": -287.77679443359375, |
|
"logps/rejected": -266.3511657714844, |
|
"loss": 0.5548, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.7112500071525574, |
|
"rewards/chosen": 0.09802371263504028, |
|
"rewards/margins": 0.5919383764266968, |
|
"rewards/rejected": -0.49391472339630127, |
|
"step": 800, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.2013969732246806e-07, |
|
"logits/chosen": -2.808311700820923, |
|
"logits/rejected": -2.812298536300659, |
|
"logps/chosen": -276.976806640625, |
|
"logps/rejected": -261.3187561035156, |
|
"loss": 0.5689, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 0.05313897505402565, |
|
"rewards/margins": 0.551435649394989, |
|
"rewards/rejected": -0.4982966482639313, |
|
"step": 900, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.8302695751190186, |
|
"eval_logits/rejected": -2.8291618824005127, |
|
"eval_logps/chosen": -283.5841369628906, |
|
"eval_logps/rejected": -264.64422607421875, |
|
"eval_loss": 0.5655555129051208, |
|
"eval_pred_label": 0.0, |
|
"eval_rewards/accuracies": 0.699999988079071, |
|
"eval_rewards/chosen": 0.06732505559921265, |
|
"eval_rewards/margins": 0.6017746329307556, |
|
"eval_rewards/rejected": -0.534449577331543, |
|
"eval_runtime": 457.335, |
|
"eval_samples_per_second": 4.373, |
|
"eval_steps_per_second": 0.273, |
|
"eval_use_label": 0.0, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5870625535855118, |
|
"train_runtime": 25384.4334, |
|
"train_samples_per_second": 2.408, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 955, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|