Minbyul's picture
Model save
319f07e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9947089947089947,
"eval_steps": 500,
"global_step": 94,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 25.005165779900267,
"learning_rate": 1e-08,
"logits/chosen": -1.9501205682754517,
"logits/rejected": -2.513594388961792,
"logps/chosen": -348.5884704589844,
"logps/rejected": -166.58517456054688,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.11,
"grad_norm": 23.68906488321,
"learning_rate": 1e-07,
"logits/chosen": -2.640984535217285,
"logits/rejected": -2.219906806945801,
"logps/chosen": -213.95584106445312,
"logps/rejected": -198.17874145507812,
"loss": 0.6932,
"rewards/accuracies": 0.4930555522441864,
"rewards/chosen": 0.0007135343039408326,
"rewards/margins": 0.0006534373969770968,
"rewards/rejected": 6.009703065501526e-05,
"step": 10
},
{
"epoch": 0.21,
"grad_norm": 22.52231020802604,
"learning_rate": 9.65436874322102e-08,
"logits/chosen": -2.397062301635742,
"logits/rejected": -2.3303606510162354,
"logps/chosen": -257.8389587402344,
"logps/rejected": -222.90444946289062,
"loss": 0.6917,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": 0.0030278589110821486,
"rewards/margins": 0.002860091160982847,
"rewards/rejected": 0.00016776802658569068,
"step": 20
},
{
"epoch": 0.32,
"grad_norm": 24.931881082321315,
"learning_rate": 8.665259359149131e-08,
"logits/chosen": -2.534593105316162,
"logits/rejected": -2.4346184730529785,
"logps/chosen": -227.6776885986328,
"logps/rejected": -204.8966064453125,
"loss": 0.689,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.00954088568687439,
"rewards/margins": 0.00823338981717825,
"rewards/rejected": 0.0013074951712042093,
"step": 30
},
{
"epoch": 0.42,
"grad_norm": 24.657287539381663,
"learning_rate": 7.16941869558779e-08,
"logits/chosen": -2.358189105987549,
"logits/rejected": -2.440410614013672,
"logps/chosen": -230.328857421875,
"logps/rejected": -210.7056427001953,
"loss": 0.6846,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.018901044502854347,
"rewards/margins": 0.01690484955906868,
"rewards/rejected": 0.0019961954094469547,
"step": 40
},
{
"epoch": 0.53,
"grad_norm": 24.026917311824274,
"learning_rate": 5.373650467932121e-08,
"logits/chosen": -2.379296064376831,
"logits/rejected": -2.6683709621429443,
"logps/chosen": -233.249267578125,
"logps/rejected": -199.204833984375,
"loss": 0.6805,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": 0.02693324163556099,
"rewards/margins": 0.024523768573999405,
"rewards/rejected": 0.002409472828730941,
"step": 50
},
{
"epoch": 0.63,
"grad_norm": 23.069042390909715,
"learning_rate": 3.5262241279454787e-08,
"logits/chosen": -2.233121156692505,
"logits/rejected": -2.644624710083008,
"logps/chosen": -250.5662078857422,
"logps/rejected": -173.79067993164062,
"loss": 0.6762,
"rewards/accuracies": 0.90625,
"rewards/chosen": 0.03840692713856697,
"rewards/margins": 0.03592415899038315,
"rewards/rejected": 0.0024827648885548115,
"step": 60
},
{
"epoch": 0.74,
"grad_norm": 23.60279896856753,
"learning_rate": 1.8825509907063325e-08,
"logits/chosen": -2.354218006134033,
"logits/rejected": -2.420661449432373,
"logps/chosen": -241.6086883544922,
"logps/rejected": -207.097900390625,
"loss": 0.6725,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 0.04547766596078873,
"rewards/margins": 0.04097073897719383,
"rewards/rejected": 0.004506924655288458,
"step": 70
},
{
"epoch": 0.85,
"grad_norm": 23.640093404183123,
"learning_rate": 6.698729810778064e-09,
"logits/chosen": -2.2659950256347656,
"logits/rejected": -2.467071056365967,
"logps/chosen": -243.5118408203125,
"logps/rejected": -205.4396209716797,
"loss": 0.6739,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": 0.0479864627122879,
"rewards/margins": 0.04288201406598091,
"rewards/rejected": 0.005104447714984417,
"step": 80
},
{
"epoch": 0.95,
"grad_norm": 22.97821730211384,
"learning_rate": 5.584586887435739e-10,
"logits/chosen": -2.3436455726623535,
"logits/rejected": -2.367281436920166,
"logps/chosen": -226.5314178466797,
"logps/rejected": -200.30862426757812,
"loss": 0.6745,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": 0.043580617755651474,
"rewards/margins": 0.037381939589977264,
"rewards/rejected": 0.006198678631335497,
"step": 90
},
{
"epoch": 0.99,
"step": 94,
"total_flos": 0.0,
"train_loss": 0.6816894855905087,
"train_runtime": 1070.9433,
"train_samples_per_second": 5.639,
"train_steps_per_second": 0.088
}
],
"logging_steps": 10,
"max_steps": 94,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}