|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 177, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"logits/chosen": -54.362911224365234, |
|
"logits/rejected": -50.76668930053711, |
|
"logps/chosen": -1319.19677734375, |
|
"logps/rejected": -33.71879577636719, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.40909090638160706, |
|
"rewards/chosen": 0.008085771463811398, |
|
"rewards/margins": 0.007936443202197552, |
|
"rewards/rejected": 0.0001493280433351174, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.9811320754716983e-05, |
|
"logits/chosen": -54.190120697021484, |
|
"logits/rejected": -52.07868957519531, |
|
"logps/chosen": -1108.5838623046875, |
|
"logps/rejected": -34.123477935791016, |
|
"loss": 0.6329, |
|
"rewards/accuracies": 0.8636363744735718, |
|
"rewards/chosen": 0.12481586635112762, |
|
"rewards/margins": 0.1290697455406189, |
|
"rewards/rejected": -0.004253899212926626, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.7924528301886794e-05, |
|
"logits/chosen": -55.231224060058594, |
|
"logits/rejected": -52.04852294921875, |
|
"logps/chosen": -879.3148803710938, |
|
"logps/rejected": -34.018951416015625, |
|
"loss": 0.5471, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3259323835372925, |
|
"rewards/margins": 0.3459864854812622, |
|
"rewards/rejected": -0.020054107531905174, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.5849056603773585e-05, |
|
"logits/chosen": -54.642879486083984, |
|
"logits/rejected": -50.72136306762695, |
|
"logps/chosen": -1260.130615234375, |
|
"logps/rejected": -32.81765365600586, |
|
"loss": 0.3731, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9432396292686462, |
|
"rewards/margins": 0.9746879935264587, |
|
"rewards/rejected": -0.03144851326942444, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.377358490566038e-05, |
|
"logits/chosen": -54.97840881347656, |
|
"logits/rejected": -52.43299865722656, |
|
"logps/chosen": -1015.5433959960938, |
|
"logps/rejected": -34.17496109008789, |
|
"loss": 0.3539, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0532668828964233, |
|
"rewards/margins": 1.1037672758102417, |
|
"rewards/rejected": -0.0505005344748497, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.169811320754717e-05, |
|
"logits/chosen": -55.37051773071289, |
|
"logits/rejected": -52.23636245727539, |
|
"logps/chosen": -1029.4456787109375, |
|
"logps/rejected": -34.25499725341797, |
|
"loss": 0.2602, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5273464918136597, |
|
"rewards/margins": 1.6002427339553833, |
|
"rewards/rejected": -0.07289613038301468, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.9622641509433963e-05, |
|
"logits/chosen": -55.24615478515625, |
|
"logits/rejected": -51.99899673461914, |
|
"logps/chosen": -1143.59716796875, |
|
"logps/rejected": -35.575958251953125, |
|
"loss": 0.2115, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0695841312408447, |
|
"rewards/margins": 2.1615474224090576, |
|
"rewards/rejected": -0.09196347743272781, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.7547169811320753e-05, |
|
"logits/chosen": -55.43460464477539, |
|
"logits/rejected": -50.45402908325195, |
|
"logps/chosen": -1283.4525146484375, |
|
"logps/rejected": -34.708351135253906, |
|
"loss": 0.1256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.9994964599609375, |
|
"rewards/margins": 3.1131112575531006, |
|
"rewards/rejected": -0.11361455917358398, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.5471698113207547e-05, |
|
"logits/chosen": -55.35545349121094, |
|
"logits/rejected": -52.1571044921875, |
|
"logps/chosen": -875.93359375, |
|
"logps/rejected": -33.44974136352539, |
|
"loss": 0.1917, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.1976325511932373, |
|
"rewards/margins": 2.317676544189453, |
|
"rewards/rejected": -0.12004398554563522, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.339622641509434e-05, |
|
"logits/chosen": -55.0767936706543, |
|
"logits/rejected": -51.604347229003906, |
|
"logps/chosen": -1365.019775390625, |
|
"logps/rejected": -35.252044677734375, |
|
"loss": 0.1243, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.4024481773376465, |
|
"rewards/margins": 3.554830312728882, |
|
"rewards/rejected": -0.15238191187381744, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.1320754716981132e-05, |
|
"logits/chosen": -56.40107727050781, |
|
"logits/rejected": -54.048580169677734, |
|
"logps/chosen": -858.4847412109375, |
|
"logps/rejected": -34.9173698425293, |
|
"loss": 0.1969, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.170722484588623, |
|
"rewards/margins": 2.324540138244629, |
|
"rewards/rejected": -0.15381723642349243, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 9.245283018867924e-06, |
|
"logits/chosen": -55.104923248291016, |
|
"logits/rejected": -52.74159240722656, |
|
"logps/chosen": -809.7835693359375, |
|
"logps/rejected": -35.847755432128906, |
|
"loss": 0.1303, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.388211727142334, |
|
"rewards/margins": 2.551792860031128, |
|
"rewards/rejected": -0.16358119249343872, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 7.169811320754717e-06, |
|
"logits/chosen": -55.185218811035156, |
|
"logits/rejected": -52.399497985839844, |
|
"logps/chosen": -959.4953002929688, |
|
"logps/rejected": -34.56352615356445, |
|
"loss": 0.1291, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.7163705825805664, |
|
"rewards/margins": 2.8906729221343994, |
|
"rewards/rejected": -0.1743021458387375, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 5.094339622641509e-06, |
|
"logits/chosen": -53.87266540527344, |
|
"logits/rejected": -51.906978607177734, |
|
"logps/chosen": -1213.17529296875, |
|
"logps/rejected": -34.25074768066406, |
|
"loss": 0.069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.6375014781951904, |
|
"rewards/margins": 3.8169896602630615, |
|
"rewards/rejected": -0.17948788404464722, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.018867924528302e-06, |
|
"logits/chosen": -55.073856353759766, |
|
"logits/rejected": -50.98554611206055, |
|
"logps/chosen": -1360.9793701171875, |
|
"logps/rejected": -36.670528411865234, |
|
"loss": 0.1152, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.9220426082611084, |
|
"rewards/margins": 4.106159687042236, |
|
"rewards/rejected": -0.18411725759506226, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 9.433962264150943e-07, |
|
"logits/chosen": -55.68719482421875, |
|
"logits/rejected": -54.916015625, |
|
"logps/chosen": -787.702880859375, |
|
"logps/rejected": -35.69668960571289, |
|
"loss": 0.159, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.3595094680786133, |
|
"rewards/margins": 2.553260564804077, |
|
"rewards/rejected": -0.19375087320804596, |
|
"step": 176 |
|
} |
|
], |
|
"logging_steps": 11, |
|
"max_steps": 177, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|