|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 164, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.5958876428735564, |
|
"learning_rate": 2.941176470588235e-08, |
|
"logits/chosen": -1.6130714416503906, |
|
"logits/rejected": -1.7848026752471924, |
|
"logps/chosen": -143.55209350585938, |
|
"logps/rejected": -137.43441772460938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.967532383605112, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": -1.8283494710922241, |
|
"logits/rejected": -1.7852643728256226, |
|
"logps/chosen": -158.81536865234375, |
|
"logps/rejected": -151.6327362060547, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 8.654648991068825e-05, |
|
"rewards/margins": 0.0005829257424920797, |
|
"rewards/rejected": -0.0004963793326169252, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 5.606818404653461, |
|
"learning_rate": 4.994863481875841e-07, |
|
"logits/chosen": -1.8151414394378662, |
|
"logits/rejected": -1.7734615802764893, |
|
"logps/chosen": -151.97584533691406, |
|
"logps/rejected": -164.20437622070312, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.003920617047697306, |
|
"rewards/margins": 0.0024364024866372347, |
|
"rewards/rejected": 0.001484214561060071, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 6.452038531330129, |
|
"learning_rate": 4.904133592102591e-07, |
|
"logits/chosen": -1.8305763006210327, |
|
"logits/rejected": -1.7172702550888062, |
|
"logps/chosen": -154.3677520751953, |
|
"logps/rejected": -148.50753784179688, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.016027290374040604, |
|
"rewards/margins": 0.00950100552290678, |
|
"rewards/rejected": 0.006526285316795111, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 6.2953570308846825, |
|
"learning_rate": 4.704015606870022e-07, |
|
"logits/chosen": -1.7697455883026123, |
|
"logits/rejected": -1.7966588735580444, |
|
"logps/chosen": -143.58848571777344, |
|
"logps/rejected": -166.49522399902344, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.03490619733929634, |
|
"rewards/margins": 0.02003355883061886, |
|
"rewards/rejected": 0.014872634783387184, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 6.274119591898531, |
|
"learning_rate": 4.4036148959228356e-07, |
|
"logits/chosen": -1.7394487857818604, |
|
"logits/rejected": -1.804693579673767, |
|
"logps/chosen": -159.61492919921875, |
|
"logps/rejected": -136.1581268310547, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.06023404002189636, |
|
"rewards/margins": 0.042321957647800446, |
|
"rewards/rejected": 0.017912080511450768, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 6.180992532830828, |
|
"learning_rate": 4.016599693735638e-07, |
|
"logits/chosen": -1.6605278253555298, |
|
"logits/rejected": -1.724905252456665, |
|
"logps/chosen": -146.7899932861328, |
|
"logps/rejected": -148.02505493164062, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.0686994269490242, |
|
"rewards/margins": 0.04312276840209961, |
|
"rewards/rejected": 0.02557666040956974, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 5.590599679916071, |
|
"learning_rate": 3.5605791947475926e-07, |
|
"logits/chosen": -1.7533237934112549, |
|
"logits/rejected": -1.702845811843872, |
|
"logps/chosen": -146.6136474609375, |
|
"logps/rejected": -140.97921752929688, |
|
"loss": 0.6631, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.07920090854167938, |
|
"rewards/margins": 0.053236376494169235, |
|
"rewards/rejected": 0.0259645227342844, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 5.096416269116106, |
|
"learning_rate": 3.056302334890786e-07, |
|
"logits/chosen": -1.616193413734436, |
|
"logits/rejected": -1.6094154119491577, |
|
"logps/chosen": -142.79188537597656, |
|
"logps/rejected": -140.85447692871094, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.09861920028924942, |
|
"rewards/margins": 0.0706188827753067, |
|
"rewards/rejected": 0.028000324964523315, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 5.517912420297569, |
|
"learning_rate": 2.526713714858433e-07, |
|
"logits/chosen": -1.608278512954712, |
|
"logits/rejected": -1.5585658550262451, |
|
"logps/chosen": -132.39981079101562, |
|
"logps/rejected": -143.10488891601562, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.1142318844795227, |
|
"rewards/margins": 0.07896542549133301, |
|
"rewards/rejected": 0.0352664515376091, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 5.179137970855667, |
|
"learning_rate": 1.9959096206109175e-07, |
|
"logits/chosen": -1.5899827480316162, |
|
"logits/rejected": -1.5742290019989014, |
|
"logps/chosen": -136.0356903076172, |
|
"logps/rejected": -162.7815704345703, |
|
"loss": 0.6508, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.11762702465057373, |
|
"rewards/margins": 0.08622404932975769, |
|
"rewards/rejected": 0.03140297532081604, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_logits/chosen": -1.6967989206314087, |
|
"eval_logits/rejected": -1.6722551584243774, |
|
"eval_logps/chosen": -158.87005615234375, |
|
"eval_logps/rejected": -170.24278259277344, |
|
"eval_loss": 0.6690559983253479, |
|
"eval_rewards/accuracies": 0.6940954923629761, |
|
"eval_rewards/chosen": 0.07056128978729248, |
|
"eval_rewards/margins": 0.050339534878730774, |
|
"eval_rewards/rejected": 0.020221758633852005, |
|
"eval_runtime": 1977.6877, |
|
"eval_samples_per_second": 9.659, |
|
"eval_steps_per_second": 0.302, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 5.410829812028072, |
|
"learning_rate": 1.4880416421940154e-07, |
|
"logits/chosen": -1.6502714157104492, |
|
"logits/rejected": -1.6523603200912476, |
|
"logps/chosen": -134.38687133789062, |
|
"logps/rejected": -157.00936889648438, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.11995081603527069, |
|
"rewards/margins": 0.09394902735948563, |
|
"rewards/rejected": 0.026001790538430214, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 5.845780336717107, |
|
"learning_rate": 1.0262177762208507e-07, |
|
"logits/chosen": -1.565212607383728, |
|
"logits/rejected": -1.6423566341400146, |
|
"logps/chosen": -143.96304321289062, |
|
"logps/rejected": -149.28546142578125, |
|
"loss": 0.6496, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.13488885760307312, |
|
"rewards/margins": 0.10831846296787262, |
|
"rewards/rejected": 0.026570383459329605, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 5.76403048084688, |
|
"learning_rate": 6.31451011862412e-08, |
|
"logits/chosen": -1.6332323551177979, |
|
"logits/rejected": -1.6044152975082397, |
|
"logps/chosen": -137.62985229492188, |
|
"logps/rejected": -159.90980529785156, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.13013367354869843, |
|
"rewards/margins": 0.10071909427642822, |
|
"rewards/rejected": 0.02941458486020565, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 5.119446644831888, |
|
"learning_rate": 3.217032396915265e-08, |
|
"logits/chosen": -1.569746971130371, |
|
"logits/rejected": -1.6146259307861328, |
|
"logps/chosen": -130.83258056640625, |
|
"logps/rejected": -160.59701538085938, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.1322535276412964, |
|
"rewards/margins": 0.10249896347522736, |
|
"rewards/rejected": 0.029754554852843285, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 5.590191167835734, |
|
"learning_rate": 1.1106798553464802e-08, |
|
"logits/chosen": -1.6109774112701416, |
|
"logits/rejected": -1.607143759727478, |
|
"logps/chosen": -145.5422821044922, |
|
"logps/rejected": -155.8082733154297, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.14719954133033752, |
|
"rewards/margins": 0.11081697046756744, |
|
"rewards/rejected": 0.03638254478573799, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 5.417981503927173, |
|
"learning_rate": 9.129154946982687e-10, |
|
"logits/chosen": -1.5755327939987183, |
|
"logits/rejected": -1.6533405780792236, |
|
"logps/chosen": -144.75936889648438, |
|
"logps/rejected": -150.3732452392578, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.1261114478111267, |
|
"rewards/margins": 0.10229575634002686, |
|
"rewards/rejected": 0.023815687745809555, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 164, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2519006322069866, |
|
"train_runtime": 787.0698, |
|
"train_samples_per_second": 13.311, |
|
"train_steps_per_second": 0.208 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 164, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|