Minbyul's picture
Model save
0720715 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 164,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 6.5958876428735564,
"learning_rate": 2.941176470588235e-08,
"logits/chosen": -1.6130714416503906,
"logits/rejected": -1.7848026752471924,
"logps/chosen": -143.55209350585938,
"logps/rejected": -137.43441772460938,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"grad_norm": 5.967532383605112,
"learning_rate": 2.941176470588235e-07,
"logits/chosen": -1.8283494710922241,
"logits/rejected": -1.7852643728256226,
"logps/chosen": -158.81536865234375,
"logps/rejected": -151.6327362060547,
"loss": 0.693,
"rewards/accuracies": 0.4722222089767456,
"rewards/chosen": 8.654648991068825e-05,
"rewards/margins": 0.0005829257424920797,
"rewards/rejected": -0.0004963793326169252,
"step": 10
},
{
"epoch": 0.12,
"grad_norm": 5.606818404653461,
"learning_rate": 4.994863481875841e-07,
"logits/chosen": -1.8151414394378662,
"logits/rejected": -1.7734615802764893,
"logps/chosen": -151.97584533691406,
"logps/rejected": -164.20437622070312,
"loss": 0.6923,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.003920617047697306,
"rewards/margins": 0.0024364024866372347,
"rewards/rejected": 0.001484214561060071,
"step": 20
},
{
"epoch": 0.18,
"grad_norm": 6.452038531330129,
"learning_rate": 4.904133592102591e-07,
"logits/chosen": -1.8305763006210327,
"logits/rejected": -1.7172702550888062,
"logps/chosen": -154.3677520751953,
"logps/rejected": -148.50753784179688,
"loss": 0.6882,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": 0.016027290374040604,
"rewards/margins": 0.00950100552290678,
"rewards/rejected": 0.006526285316795111,
"step": 30
},
{
"epoch": 0.24,
"grad_norm": 6.2953570308846825,
"learning_rate": 4.704015606870022e-07,
"logits/chosen": -1.7697455883026123,
"logits/rejected": -1.7966588735580444,
"logps/chosen": -143.58848571777344,
"logps/rejected": -166.49522399902344,
"loss": 0.6829,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.03490619733929634,
"rewards/margins": 0.02003355883061886,
"rewards/rejected": 0.014872634783387184,
"step": 40
},
{
"epoch": 0.3,
"grad_norm": 6.274119591898531,
"learning_rate": 4.4036148959228356e-07,
"logits/chosen": -1.7394487857818604,
"logits/rejected": -1.804693579673767,
"logps/chosen": -159.61492919921875,
"logps/rejected": -136.1581268310547,
"loss": 0.6763,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": 0.06023404002189636,
"rewards/margins": 0.042321957647800446,
"rewards/rejected": 0.017912080511450768,
"step": 50
},
{
"epoch": 0.37,
"grad_norm": 6.180992532830828,
"learning_rate": 4.016599693735638e-07,
"logits/chosen": -1.6605278253555298,
"logits/rejected": -1.724905252456665,
"logps/chosen": -146.7899932861328,
"logps/rejected": -148.02505493164062,
"loss": 0.6733,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.0686994269490242,
"rewards/margins": 0.04312276840209961,
"rewards/rejected": 0.02557666040956974,
"step": 60
},
{
"epoch": 0.43,
"grad_norm": 5.590599679916071,
"learning_rate": 3.5605791947475926e-07,
"logits/chosen": -1.7533237934112549,
"logits/rejected": -1.702845811843872,
"logps/chosen": -146.6136474609375,
"logps/rejected": -140.97921752929688,
"loss": 0.6631,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.07920090854167938,
"rewards/margins": 0.053236376494169235,
"rewards/rejected": 0.0259645227342844,
"step": 70
},
{
"epoch": 0.49,
"grad_norm": 5.096416269116106,
"learning_rate": 3.056302334890786e-07,
"logits/chosen": -1.616193413734436,
"logits/rejected": -1.6094154119491577,
"logps/chosen": -142.79188537597656,
"logps/rejected": -140.85447692871094,
"loss": 0.6609,
"rewards/accuracies": 0.78125,
"rewards/chosen": 0.09861920028924942,
"rewards/margins": 0.0706188827753067,
"rewards/rejected": 0.028000324964523315,
"step": 80
},
{
"epoch": 0.55,
"grad_norm": 5.517912420297569,
"learning_rate": 2.526713714858433e-07,
"logits/chosen": -1.608278512954712,
"logits/rejected": -1.5585658550262451,
"logps/chosen": -132.39981079101562,
"logps/rejected": -143.10488891601562,
"loss": 0.6557,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": 0.1142318844795227,
"rewards/margins": 0.07896542549133301,
"rewards/rejected": 0.0352664515376091,
"step": 90
},
{
"epoch": 0.61,
"grad_norm": 5.179137970855667,
"learning_rate": 1.9959096206109175e-07,
"logits/chosen": -1.5899827480316162,
"logits/rejected": -1.5742290019989014,
"logps/chosen": -136.0356903076172,
"logps/rejected": -162.7815704345703,
"loss": 0.6508,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": 0.11762702465057373,
"rewards/margins": 0.08622404932975769,
"rewards/rejected": 0.03140297532081604,
"step": 100
},
{
"epoch": 0.61,
"eval_logits/chosen": -1.6967989206314087,
"eval_logits/rejected": -1.6722551584243774,
"eval_logps/chosen": -158.87005615234375,
"eval_logps/rejected": -170.24278259277344,
"eval_loss": 0.6690559983253479,
"eval_rewards/accuracies": 0.6940954923629761,
"eval_rewards/chosen": 0.07056128978729248,
"eval_rewards/margins": 0.050339534878730774,
"eval_rewards/rejected": 0.020221758633852005,
"eval_runtime": 1977.6877,
"eval_samples_per_second": 9.659,
"eval_steps_per_second": 0.302,
"step": 100
},
{
"epoch": 0.67,
"grad_norm": 5.410829812028072,
"learning_rate": 1.4880416421940154e-07,
"logits/chosen": -1.6502714157104492,
"logits/rejected": -1.6523603200912476,
"logps/chosen": -134.38687133789062,
"logps/rejected": -157.00936889648438,
"loss": 0.6512,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": 0.11995081603527069,
"rewards/margins": 0.09394902735948563,
"rewards/rejected": 0.026001790538430214,
"step": 110
},
{
"epoch": 0.73,
"grad_norm": 5.845780336717107,
"learning_rate": 1.0262177762208507e-07,
"logits/chosen": -1.565212607383728,
"logits/rejected": -1.6423566341400146,
"logps/chosen": -143.96304321289062,
"logps/rejected": -149.28546142578125,
"loss": 0.6496,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 0.13488885760307312,
"rewards/margins": 0.10831846296787262,
"rewards/rejected": 0.026570383459329605,
"step": 120
},
{
"epoch": 0.79,
"grad_norm": 5.76403048084688,
"learning_rate": 6.31451011862412e-08,
"logits/chosen": -1.6332323551177979,
"logits/rejected": -1.6044152975082397,
"logps/chosen": -137.62985229492188,
"logps/rejected": -159.90980529785156,
"loss": 0.6439,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.13013367354869843,
"rewards/margins": 0.10071909427642822,
"rewards/rejected": 0.02941458486020565,
"step": 130
},
{
"epoch": 0.85,
"grad_norm": 5.119446644831888,
"learning_rate": 3.217032396915265e-08,
"logits/chosen": -1.569746971130371,
"logits/rejected": -1.6146259307861328,
"logps/chosen": -130.83258056640625,
"logps/rejected": -160.59701538085938,
"loss": 0.6439,
"rewards/accuracies": 0.78125,
"rewards/chosen": 0.1322535276412964,
"rewards/margins": 0.10249896347522736,
"rewards/rejected": 0.029754554852843285,
"step": 140
},
{
"epoch": 0.91,
"grad_norm": 5.590191167835734,
"learning_rate": 1.1106798553464802e-08,
"logits/chosen": -1.6109774112701416,
"logits/rejected": -1.607143759727478,
"logps/chosen": -145.5422821044922,
"logps/rejected": -155.8082733154297,
"loss": 0.6426,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": 0.14719954133033752,
"rewards/margins": 0.11081697046756744,
"rewards/rejected": 0.03638254478573799,
"step": 150
},
{
"epoch": 0.98,
"grad_norm": 5.417981503927173,
"learning_rate": 9.129154946982687e-10,
"logits/chosen": -1.5755327939987183,
"logits/rejected": -1.6533405780792236,
"logps/chosen": -144.75936889648438,
"logps/rejected": -150.3732452392578,
"loss": 0.6439,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": 0.1261114478111267,
"rewards/margins": 0.10229575634002686,
"rewards/rejected": 0.023815687745809555,
"step": 160
},
{
"epoch": 1.0,
"step": 164,
"total_flos": 0.0,
"train_loss": 0.2519006322069866,
"train_runtime": 787.0698,
"train_samples_per_second": 13.311,
"train_steps_per_second": 0.208
}
],
"logging_steps": 10,
"max_steps": 164,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}