|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.208333333333333e-09, |
|
"logits/chosen": -2.980285167694092, |
|
"logits/rejected": -2.87275767326355, |
|
"logps/chosen": -313.4390563964844, |
|
"logps/rejected": -236.1754150390625, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 10.0 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.976717112922002e-07, |
|
"logits/chosen": -2.8194870948791504, |
|
"logits/rejected": -2.8288567066192627, |
|
"logps/chosen": -285.2724304199219, |
|
"logps/rejected": -270.956298828125, |
|
"loss": 0.6838, |
|
"pred_label": 150.7020263671875, |
|
"rewards/accuracies": 0.5050504803657532, |
|
"rewards/chosen": 0.000817809603177011, |
|
"rewards/margins": 0.0014873194741085172, |
|
"rewards/rejected": -0.0006695101037621498, |
|
"step": 100, |
|
"use_label": 659.2979736328125 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3946449359720607e-07, |
|
"logits/chosen": -2.828075647354126, |
|
"logits/rejected": -2.816530227661133, |
|
"logps/chosen": -278.7549133300781, |
|
"logps/rejected": -260.5694274902344, |
|
"loss": 0.683, |
|
"pred_label": 452.552490234375, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": 0.002707230392843485, |
|
"rewards/margins": 0.005774380639195442, |
|
"rewards/rejected": -0.003067150479182601, |
|
"step": 200, |
|
"use_label": 1949.447509765625 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.812572759022118e-07, |
|
"logits/chosen": -2.8141846656799316, |
|
"logits/rejected": -2.8159701824188232, |
|
"logps/chosen": -284.0125732421875, |
|
"logps/rejected": -253.9112091064453, |
|
"loss": 0.6807, |
|
"pred_label": 775.85498046875, |
|
"rewards/accuracies": 0.5575000047683716, |
|
"rewards/chosen": 0.005504029802978039, |
|
"rewards/margins": 0.009370613843202591, |
|
"rewards/rejected": -0.0038665838073939085, |
|
"step": 300, |
|
"use_label": 3226.14501953125 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.230500582072177e-07, |
|
"logits/chosen": -2.826817512512207, |
|
"logits/rejected": -2.8094358444213867, |
|
"logps/chosen": -284.3566589355469, |
|
"logps/rejected": -262.80731201171875, |
|
"loss": 0.6769, |
|
"pred_label": 1149.0574951171875, |
|
"rewards/accuracies": 0.5774999856948853, |
|
"rewards/chosen": 0.007384983357042074, |
|
"rewards/margins": 0.015422500669956207, |
|
"rewards/rejected": -0.008037514984607697, |
|
"step": 400, |
|
"use_label": 4452.9423828125 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.648428405122235e-07, |
|
"logits/chosen": -2.807734966278076, |
|
"logits/rejected": -2.796409845352173, |
|
"logps/chosen": -269.9852600097656, |
|
"logps/rejected": -252.07232666015625, |
|
"loss": 0.6728, |
|
"pred_label": 1592.5675048828125, |
|
"rewards/accuracies": 0.5756250023841858, |
|
"rewards/chosen": 0.006774631794542074, |
|
"rewards/margins": 0.01554279588162899, |
|
"rewards/rejected": -0.008768163621425629, |
|
"step": 500, |
|
"use_label": 5609.4326171875 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0663562281722933e-07, |
|
"logits/chosen": -2.8339650630950928, |
|
"logits/rejected": -2.82075572013855, |
|
"logps/chosen": -285.0927734375, |
|
"logps/rejected": -265.4134826660156, |
|
"loss": 0.6681, |
|
"pred_label": 2111.6650390625, |
|
"rewards/accuracies": 0.6206250190734863, |
|
"rewards/chosen": 0.013815036043524742, |
|
"rewards/margins": 0.0289370846003294, |
|
"rewards/rejected": -0.015122047625482082, |
|
"step": 600, |
|
"use_label": 6690.3349609375 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4842840512223514e-07, |
|
"logits/chosen": -2.827232599258423, |
|
"logits/rejected": -2.811751127243042, |
|
"logps/chosen": -281.4178771972656, |
|
"logps/rejected": -248.81068420410156, |
|
"loss": 0.6659, |
|
"pred_label": 2680.2724609375, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.01417633332312107, |
|
"rewards/margins": 0.029135096818208694, |
|
"rewards/rejected": -0.014958759769797325, |
|
"step": 700, |
|
"use_label": 7721.7275390625 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.022118742724097e-08, |
|
"logits/chosen": -2.8300516605377197, |
|
"logits/rejected": -2.835542678833008, |
|
"logps/chosen": -288.608642578125, |
|
"logps/rejected": -261.5773010253906, |
|
"loss": 0.6646, |
|
"pred_label": 3286.232421875, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": 0.014839441515505314, |
|
"rewards/margins": 0.03136582300066948, |
|
"rewards/rejected": -0.01652638241648674, |
|
"step": 800, |
|
"use_label": 8715.767578125 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.20139697322468e-08, |
|
"logits/chosen": -2.8211710453033447, |
|
"logits/rejected": -2.8280835151672363, |
|
"logps/chosen": -277.363525390625, |
|
"logps/rejected": -256.4843444824219, |
|
"loss": 0.6641, |
|
"pred_label": 3882.75244140625, |
|
"rewards/accuracies": 0.6331250071525574, |
|
"rewards/chosen": 0.01446867547929287, |
|
"rewards/margins": 0.02932187356054783, |
|
"rewards/rejected": -0.014853193424642086, |
|
"step": 900, |
|
"use_label": 9719.2470703125 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.842418670654297, |
|
"eval_logits/rejected": -2.846235752105713, |
|
"eval_logps/chosen": -284.122314453125, |
|
"eval_logps/rejected": -259.4594421386719, |
|
"eval_loss": 0.6635700464248657, |
|
"eval_pred_label": 4600.50390625, |
|
"eval_rewards/accuracies": 0.628000020980835, |
|
"eval_rewards/chosen": 0.013506044633686543, |
|
"eval_rewards/margins": 0.029479000717401505, |
|
"eval_rewards/rejected": -0.015972958877682686, |
|
"eval_runtime": 438.8322, |
|
"eval_samples_per_second": 4.558, |
|
"eval_steps_per_second": 0.285, |
|
"eval_use_label": 10931.49609375, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6728555943953429, |
|
"train_runtime": 24272.064, |
|
"train_samples_per_second": 2.519, |
|
"train_steps_per_second": 0.039 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 955, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|