|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 53, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -2.8462421894073486, |
|
"logits/rejected": -2.8283610343933105, |
|
"logps/chosen": -274.7393798828125, |
|
"logps/rejected": -204.42575073242188, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.911172937635942e-07, |
|
"logits/chosen": -2.8527991771698, |
|
"logits/rejected": -2.8377315998077393, |
|
"logps/chosen": -305.9073181152344, |
|
"logps/rejected": -295.8478698730469, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.4513888955116272, |
|
"rewards/chosen": 0.0023197412956506014, |
|
"rewards/margins": 0.0025084479711949825, |
|
"rewards/rejected": -0.00018870655912905931, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.982949361823388e-07, |
|
"logits/chosen": -2.859750270843506, |
|
"logits/rejected": -2.880180835723877, |
|
"logps/chosen": -295.7957458496094, |
|
"logps/rejected": -332.6015930175781, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.02228003740310669, |
|
"rewards/margins": 0.059415679425001144, |
|
"rewards/rejected": -0.037135638296604156, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.416462557480814e-07, |
|
"logits/chosen": -2.843632936477661, |
|
"logits/rejected": -2.8286781311035156, |
|
"logps/chosen": -310.9751892089844, |
|
"logps/rejected": -322.77532958984375, |
|
"loss": 0.6283, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.027804672718048096, |
|
"rewards/margins": 0.1940310001373291, |
|
"rewards/rejected": -0.1662263423204422, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.859303711029939e-08, |
|
"logits/chosen": -2.8175368309020996, |
|
"logits/rejected": -2.821326494216919, |
|
"logps/chosen": -274.8536682128906, |
|
"logps/rejected": -349.11505126953125, |
|
"loss": 0.6099, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.002673505572602153, |
|
"rewards/margins": 0.2130366563796997, |
|
"rewards/rejected": -0.21036314964294434, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.009573740853313e-09, |
|
"logits/chosen": -2.8560073375701904, |
|
"logits/rejected": -2.867896556854248, |
|
"logps/chosen": -307.2721862792969, |
|
"logps/rejected": -350.257568359375, |
|
"loss": 0.5984, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.0023462946992367506, |
|
"rewards/margins": 0.33910489082336426, |
|
"rewards/rejected": -0.336758553981781, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 53, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6385756753525644, |
|
"train_runtime": 422.6241, |
|
"train_samples_per_second": 15.972, |
|
"train_steps_per_second": 0.125 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 53, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|