|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.208333333333333e-09, |
|
"logits/chosen": -2.7525930404663086, |
|
"logits/rejected": -2.6732418537139893, |
|
"logps/chosen": -297.177001953125, |
|
"logps/rejected": -236.72621154785156, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 17.0 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.976717112922002e-07, |
|
"logits/chosen": -2.6616106033325195, |
|
"logits/rejected": -2.6597719192504883, |
|
"logps/chosen": -270.4000244140625, |
|
"logps/rejected": -249.33827209472656, |
|
"loss": 0.6829, |
|
"pred_label": 333.43182373046875, |
|
"rewards/accuracies": 0.4965277910232544, |
|
"rewards/chosen": 0.0011782451765611768, |
|
"rewards/margins": 0.001073930412530899, |
|
"rewards/rejected": 0.00010431456030346453, |
|
"step": 100, |
|
"use_label": 1283.5682373046875 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3946449359720607e-07, |
|
"logits/chosen": -2.6945221424102783, |
|
"logits/rejected": -2.678621530532837, |
|
"logps/chosen": -271.6979064941406, |
|
"logps/rejected": -254.37026977539062, |
|
"loss": 0.6799, |
|
"pred_label": 1038.7462158203125, |
|
"rewards/accuracies": 0.5350000262260437, |
|
"rewards/chosen": 0.004888341296464205, |
|
"rewards/margins": 0.007898561656475067, |
|
"rewards/rejected": -0.0030102210585027933, |
|
"step": 200, |
|
"use_label": 3762.253662109375 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.812572759022118e-07, |
|
"logits/chosen": -2.6708526611328125, |
|
"logits/rejected": -2.6628105640411377, |
|
"logps/chosen": -272.3077392578125, |
|
"logps/rejected": -253.75027465820312, |
|
"loss": 0.6728, |
|
"pred_label": 1884.596923828125, |
|
"rewards/accuracies": 0.5653125047683716, |
|
"rewards/chosen": 0.010109632275998592, |
|
"rewards/margins": 0.016557401046156883, |
|
"rewards/rejected": -0.006447767838835716, |
|
"step": 300, |
|
"use_label": 6116.4033203125 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.230500582072177e-07, |
|
"logits/chosen": -2.668009042739868, |
|
"logits/rejected": -2.650494337081909, |
|
"logps/chosen": -267.6447448730469, |
|
"logps/rejected": -253.59107971191406, |
|
"loss": 0.6616, |
|
"pred_label": 3012.675537109375, |
|
"rewards/accuracies": 0.6193749904632568, |
|
"rewards/chosen": 0.017754318192601204, |
|
"rewards/margins": 0.030351871624588966, |
|
"rewards/rejected": -0.012597555294632912, |
|
"step": 400, |
|
"use_label": 8188.32421875 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.648428405122235e-07, |
|
"logits/chosen": -2.6697680950164795, |
|
"logits/rejected": -2.6707708835601807, |
|
"logps/chosen": -271.2095642089844, |
|
"logps/rejected": -247.21224975585938, |
|
"loss": 0.6528, |
|
"pred_label": 4377.916015625, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.024391591548919678, |
|
"rewards/margins": 0.04303843528032303, |
|
"rewards/rejected": -0.01864684373140335, |
|
"step": 500, |
|
"use_label": 10023.083984375 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0663562281722933e-07, |
|
"logits/chosen": -2.659043073654175, |
|
"logits/rejected": -2.6555004119873047, |
|
"logps/chosen": -272.95050048828125, |
|
"logps/rejected": -251.1392364501953, |
|
"loss": 0.6442, |
|
"pred_label": 5962.0673828125, |
|
"rewards/accuracies": 0.6553124785423279, |
|
"rewards/chosen": 0.030743848532438278, |
|
"rewards/margins": 0.0554736964404583, |
|
"rewards/rejected": -0.024729840457439423, |
|
"step": 600, |
|
"use_label": 11638.9326171875 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4842840512223514e-07, |
|
"logits/chosen": -2.6591668128967285, |
|
"logits/rejected": -2.6622869968414307, |
|
"logps/chosen": -269.9889221191406, |
|
"logps/rejected": -245.4040985107422, |
|
"loss": 0.64, |
|
"pred_label": 7640.8505859375, |
|
"rewards/accuracies": 0.6478124856948853, |
|
"rewards/chosen": 0.03263993561267853, |
|
"rewards/margins": 0.061180587857961655, |
|
"rewards/rejected": -0.02854064851999283, |
|
"step": 700, |
|
"use_label": 13160.150390625 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.022118742724097e-08, |
|
"logits/chosen": -2.650268793106079, |
|
"logits/rejected": -2.6555473804473877, |
|
"logps/chosen": -272.705322265625, |
|
"logps/rejected": -252.30169677734375, |
|
"loss": 0.6368, |
|
"pred_label": 9366.9609375, |
|
"rewards/accuracies": 0.6415625214576721, |
|
"rewards/chosen": 0.031398553401231766, |
|
"rewards/margins": 0.06083739921450615, |
|
"rewards/rejected": -0.029438842087984085, |
|
"step": 800, |
|
"use_label": 14634.0390625 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.20139697322468e-08, |
|
"logits/chosen": -2.6563680171966553, |
|
"logits/rejected": -2.6590001583099365, |
|
"logps/chosen": -269.04559326171875, |
|
"logps/rejected": -253.2301025390625, |
|
"loss": 0.6377, |
|
"pred_label": 11126.677734375, |
|
"rewards/accuracies": 0.6418750286102295, |
|
"rewards/chosen": 0.02964354306459427, |
|
"rewards/margins": 0.05687180534005165, |
|
"rewards/rejected": -0.027228260412812233, |
|
"step": 900, |
|
"use_label": 16074.322265625 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.4939169883728027, |
|
"eval_logits/rejected": -2.495774507522583, |
|
"eval_logps/chosen": -269.28546142578125, |
|
"eval_logps/rejected": -253.23594665527344, |
|
"eval_loss": 0.6354129910469055, |
|
"eval_pred_label": 13234.32421875, |
|
"eval_rewards/accuracies": 0.6259999871253967, |
|
"eval_rewards/chosen": 0.027118388563394547, |
|
"eval_rewards/margins": 0.056793875992298126, |
|
"eval_rewards/rejected": -0.029675481840968132, |
|
"eval_runtime": 1016.337, |
|
"eval_samples_per_second": 1.968, |
|
"eval_steps_per_second": 0.246, |
|
"eval_use_label": 17827.67578125, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6554346030919339, |
|
"train_runtime": 50166.5495, |
|
"train_samples_per_second": 1.219, |
|
"train_steps_per_second": 0.019 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 955, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|