|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.980285167694092, |
|
"logits/rejected": -2.87275767326355, |
|
"logps/chosen": -313.4390563964844, |
|
"logps/rejected": -236.1754150390625, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.953434225844005e-06, |
|
"logits/chosen": -2.8180909156799316, |
|
"logits/rejected": -2.8273613452911377, |
|
"logps/chosen": -285.13623046875, |
|
"logps/rejected": -271.66839599609375, |
|
"loss": 0.6624, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6041666865348816, |
|
"rewards/chosen": 0.014434419572353363, |
|
"rewards/margins": 0.08631344139575958, |
|
"rewards/rejected": -0.07187902927398682, |
|
"step": 100, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.789289871944122e-06, |
|
"logits/chosen": -2.8230364322662354, |
|
"logits/rejected": -2.8086395263671875, |
|
"logps/chosen": -278.2524108886719, |
|
"logps/rejected": -263.9921569824219, |
|
"loss": 0.5868, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6949999928474426, |
|
"rewards/chosen": 0.05295524746179581, |
|
"rewards/margins": 0.39829620718955994, |
|
"rewards/rejected": -0.3453409671783447, |
|
"step": 200, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.625145518044238e-06, |
|
"logits/chosen": -2.803905725479126, |
|
"logits/rejected": -2.802032232284546, |
|
"logps/chosen": -284.01385498046875, |
|
"logps/rejected": -259.5546569824219, |
|
"loss": 0.562, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.7056249976158142, |
|
"rewards/chosen": 0.005377008114010096, |
|
"rewards/margins": 0.5735920667648315, |
|
"rewards/rejected": -0.5682151317596436, |
|
"step": 300, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.461001164144355e-06, |
|
"logits/chosen": -2.8141045570373535, |
|
"logits/rejected": -2.7911813259124756, |
|
"logps/chosen": -284.3139953613281, |
|
"logps/rejected": -269.4837951660156, |
|
"loss": 0.5527, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.011648621410131454, |
|
"rewards/margins": 0.6873368620872498, |
|
"rewards/rejected": -0.6756882667541504, |
|
"step": 400, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.2968568102444705e-06, |
|
"logits/chosen": -2.7915823459625244, |
|
"logits/rejected": -2.776299476623535, |
|
"logps/chosen": -269.73016357421875, |
|
"logps/rejected": -257.2498474121094, |
|
"loss": 0.5556, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.6862499713897705, |
|
"rewards/chosen": 0.03227977454662323, |
|
"rewards/margins": 0.5588020086288452, |
|
"rewards/rejected": -0.5265222191810608, |
|
"step": 500, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.132712456344587e-06, |
|
"logits/chosen": -2.8199498653411865, |
|
"logits/rejected": -2.8022332191467285, |
|
"logps/chosen": -284.0947265625, |
|
"logps/rejected": -271.3774108886719, |
|
"loss": 0.5422, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.7143750190734863, |
|
"rewards/chosen": 0.11361943930387497, |
|
"rewards/margins": 0.7251341938972473, |
|
"rewards/rejected": -0.6115147471427917, |
|
"step": 600, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.9685681024447033e-06, |
|
"logits/chosen": -2.8110527992248535, |
|
"logits/rejected": -2.788975477218628, |
|
"logps/chosen": -280.3959045410156, |
|
"logps/rejected": -254.49673461914062, |
|
"loss": 0.5404, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.7212499976158142, |
|
"rewards/chosen": 0.11637673527002335, |
|
"rewards/margins": 0.6999369263648987, |
|
"rewards/rejected": -0.5835601687431335, |
|
"step": 700, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.8044237485448196e-06, |
|
"logits/chosen": -2.8146722316741943, |
|
"logits/rejected": -2.812129020690918, |
|
"logps/chosen": -287.4331359863281, |
|
"logps/rejected": -267.59161376953125, |
|
"loss": 0.5343, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": 0.13239255547523499, |
|
"rewards/margins": 0.7503484487533569, |
|
"rewards/rejected": -0.6179558634757996, |
|
"step": 800, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.402793946449361e-07, |
|
"logits/chosen": -2.8043179512023926, |
|
"logits/rejected": -2.8079681396484375, |
|
"logps/chosen": -276.8100891113281, |
|
"logps/rejected": -262.690673828125, |
|
"loss": 0.545, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.7193750143051147, |
|
"rewards/chosen": 0.06981150805950165, |
|
"rewards/margins": 0.7052963972091675, |
|
"rewards/rejected": -0.6354848742485046, |
|
"step": 900, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.827404737472534, |
|
"eval_logits/rejected": -2.818655014038086, |
|
"eval_logps/chosen": -283.37945556640625, |
|
"eval_logps/rejected": -265.9969787597656, |
|
"eval_loss": 0.5459502935409546, |
|
"eval_pred_label": 0.0, |
|
"eval_rewards/accuracies": 0.7139999866485596, |
|
"eval_rewards/chosen": 0.08778975158929825, |
|
"eval_rewards/margins": 0.7575166821479797, |
|
"eval_rewards/rejected": -0.6697269678115845, |
|
"eval_runtime": 479.5351, |
|
"eval_samples_per_second": 4.171, |
|
"eval_steps_per_second": 0.261, |
|
"eval_use_label": 0.0, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5628191218950361, |
|
"train_runtime": 25746.1298, |
|
"train_samples_per_second": 2.375, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 955, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|