|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9947089947089947, |
|
"eval_steps": 500, |
|
"global_step": 94, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 25.005165779900267, |
|
"learning_rate": 1e-08, |
|
"logits/chosen": -1.9501205682754517, |
|
"logits/rejected": -2.513594388961792, |
|
"logps/chosen": -348.5884704589844, |
|
"logps/rejected": -166.58517456054688, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 23.68906488321, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -2.640984535217285, |
|
"logits/rejected": -2.219906806945801, |
|
"logps/chosen": -213.95584106445312, |
|
"logps/rejected": -198.17874145507812, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4930555522441864, |
|
"rewards/chosen": 0.0007135343039408326, |
|
"rewards/margins": 0.0006534373969770968, |
|
"rewards/rejected": 6.009703065501526e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 22.52231020802604, |
|
"learning_rate": 9.65436874322102e-08, |
|
"logits/chosen": -2.397062301635742, |
|
"logits/rejected": -2.3303606510162354, |
|
"logps/chosen": -257.8389587402344, |
|
"logps/rejected": -222.90444946289062, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.0030278589110821486, |
|
"rewards/margins": 0.002860091160982847, |
|
"rewards/rejected": 0.00016776802658569068, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 24.931881082321315, |
|
"learning_rate": 8.665259359149131e-08, |
|
"logits/chosen": -2.534593105316162, |
|
"logits/rejected": -2.4346184730529785, |
|
"logps/chosen": -227.6776885986328, |
|
"logps/rejected": -204.8966064453125, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.00954088568687439, |
|
"rewards/margins": 0.00823338981717825, |
|
"rewards/rejected": 0.0013074951712042093, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 24.657287539381663, |
|
"learning_rate": 7.16941869558779e-08, |
|
"logits/chosen": -2.358189105987549, |
|
"logits/rejected": -2.440410614013672, |
|
"logps/chosen": -230.328857421875, |
|
"logps/rejected": -210.7056427001953, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.018901044502854347, |
|
"rewards/margins": 0.01690484955906868, |
|
"rewards/rejected": 0.0019961954094469547, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 24.026917311824274, |
|
"learning_rate": 5.373650467932121e-08, |
|
"logits/chosen": -2.379296064376831, |
|
"logits/rejected": -2.6683709621429443, |
|
"logps/chosen": -233.249267578125, |
|
"logps/rejected": -199.204833984375, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.02693324163556099, |
|
"rewards/margins": 0.024523768573999405, |
|
"rewards/rejected": 0.002409472828730941, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 23.069042390909715, |
|
"learning_rate": 3.5262241279454787e-08, |
|
"logits/chosen": -2.233121156692505, |
|
"logits/rejected": -2.644624710083008, |
|
"logps/chosen": -250.5662078857422, |
|
"logps/rejected": -173.79067993164062, |
|
"loss": 0.6762, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.03840692713856697, |
|
"rewards/margins": 0.03592415899038315, |
|
"rewards/rejected": 0.0024827648885548115, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 23.60279896856753, |
|
"learning_rate": 1.8825509907063325e-08, |
|
"logits/chosen": -2.354218006134033, |
|
"logits/rejected": -2.420661449432373, |
|
"logps/chosen": -241.6086883544922, |
|
"logps/rejected": -207.097900390625, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.04547766596078873, |
|
"rewards/margins": 0.04097073897719383, |
|
"rewards/rejected": 0.004506924655288458, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 23.640093404183123, |
|
"learning_rate": 6.698729810778064e-09, |
|
"logits/chosen": -2.2659950256347656, |
|
"logits/rejected": -2.467071056365967, |
|
"logps/chosen": -243.5118408203125, |
|
"logps/rejected": -205.4396209716797, |
|
"loss": 0.6739, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 0.0479864627122879, |
|
"rewards/margins": 0.04288201406598091, |
|
"rewards/rejected": 0.005104447714984417, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 22.97821730211384, |
|
"learning_rate": 5.584586887435739e-10, |
|
"logits/chosen": -2.3436455726623535, |
|
"logits/rejected": -2.367281436920166, |
|
"logps/chosen": -226.5314178466797, |
|
"logps/rejected": -200.30862426757812, |
|
"loss": 0.6745, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.043580617755651474, |
|
"rewards/margins": 0.037381939589977264, |
|
"rewards/rejected": 0.006198678631335497, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 94, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6816894855905087, |
|
"train_runtime": 1070.9433, |
|
"train_samples_per_second": 5.639, |
|
"train_steps_per_second": 0.088 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 94, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|