Minbyul's picture
Model save
8b48651 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 122,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 43.23719376156736,
"learning_rate": 3.846153846153846e-08,
"logits/chosen": -3.6897170543670654,
"logits/rejected": -3.519662618637085,
"logps/chosen": -584.1221923828125,
"logps/rejected": -1429.938720703125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.08,
"grad_norm": 36.75962682478825,
"learning_rate": 3.8461538461538463e-07,
"logits/chosen": -3.6686697006225586,
"logits/rejected": -3.5728933811187744,
"logps/chosen": -948.7052001953125,
"logps/rejected": -1359.1160888671875,
"loss": 0.6873,
"rewards/accuracies": 0.5902777910232544,
"rewards/chosen": 0.0016961859073489904,
"rewards/margins": 0.011211401782929897,
"rewards/rejected": -0.009515216574072838,
"step": 10
},
{
"epoch": 0.16,
"grad_norm": 26.68557894354543,
"learning_rate": 4.949291683053768e-07,
"logits/chosen": -3.7271945476531982,
"logits/rejected": -3.6335723400115967,
"logps/chosen": -889.2982177734375,
"logps/rejected": -1387.8297119140625,
"loss": 0.5822,
"rewards/accuracies": 0.90625,
"rewards/chosen": 0.022394303232431412,
"rewards/margins": 0.24209070205688477,
"rewards/rejected": -0.21969637274742126,
"step": 20
},
{
"epoch": 0.25,
"grad_norm": 24.2663500160121,
"learning_rate": 4.70586371748506e-07,
"logits/chosen": -3.849905490875244,
"logits/rejected": -3.791762590408325,
"logps/chosen": -940.0631103515625,
"logps/rejected": -1561.598876953125,
"loss": 0.3698,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.10165198147296906,
"rewards/margins": 1.3781466484069824,
"rewards/rejected": -1.4797985553741455,
"step": 30
},
{
"epoch": 0.33,
"grad_norm": 20.782644086151006,
"learning_rate": 4.280458575653296e-07,
"logits/chosen": -4.025510311126709,
"logits/rejected": -3.9784233570098877,
"logps/chosen": -968.7717895507812,
"logps/rejected": -1640.0823974609375,
"loss": 0.2677,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.5273348093032837,
"rewards/margins": 2.6067256927490234,
"rewards/rejected": -3.1340603828430176,
"step": 40
},
{
"epoch": 0.41,
"grad_norm": 18.40887250285513,
"learning_rate": 3.7081709127108767e-07,
"logits/chosen": -4.091545104980469,
"logits/rejected": -4.069024085998535,
"logps/chosen": -968.97900390625,
"logps/rejected": -1849.377197265625,
"loss": 0.1851,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.7245203256607056,
"rewards/margins": 3.8392529487609863,
"rewards/rejected": -4.5637736320495605,
"step": 50
},
{
"epoch": 0.49,
"grad_norm": 30.82289433036107,
"learning_rate": 3.0362127536287636e-07,
"logits/chosen": -4.057796001434326,
"logits/rejected": -4.068426132202148,
"logps/chosen": -992.41796875,
"logps/rejected": -1906.330810546875,
"loss": 0.1831,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.9383566975593567,
"rewards/margins": 4.514256477355957,
"rewards/rejected": -5.452613353729248,
"step": 60
},
{
"epoch": 0.57,
"grad_norm": 14.214509503993076,
"learning_rate": 2.3200186419770823e-07,
"logits/chosen": -4.036534309387207,
"logits/rejected": -4.083151817321777,
"logps/chosen": -1086.2425537109375,
"logps/rejected": -1876.317138671875,
"loss": 0.1335,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -1.1720283031463623,
"rewards/margins": 4.419107437133789,
"rewards/rejected": -5.591135501861572,
"step": 70
},
{
"epoch": 0.66,
"grad_norm": 16.47757248085571,
"learning_rate": 1.6186724554503237e-07,
"logits/chosen": -4.064330101013184,
"logits/rejected": -4.05181360244751,
"logps/chosen": -1022.8089599609375,
"logps/rejected": -2001.3753662109375,
"loss": 0.1218,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.9989339113235474,
"rewards/margins": 5.62686014175415,
"rewards/rejected": -6.625794410705566,
"step": 80
},
{
"epoch": 0.74,
"grad_norm": 18.42221667739246,
"learning_rate": 9.900331622138063e-08,
"logits/chosen": -4.04154109954834,
"logits/rejected": -4.0591230392456055,
"logps/chosen": -946.9400634765625,
"logps/rejected": -2018.881591796875,
"loss": 0.1367,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.8327393531799316,
"rewards/margins": 5.7661213874816895,
"rewards/rejected": -6.598860263824463,
"step": 90
},
{
"epoch": 0.82,
"grad_norm": 27.496315989869185,
"learning_rate": 4.859616286322094e-08,
"logits/chosen": -4.036691188812256,
"logits/rejected": -4.036020755767822,
"logps/chosen": -1030.328369140625,
"logps/rejected": -2058.629638671875,
"loss": 0.1183,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -1.1028516292572021,
"rewards/margins": 5.824145317077637,
"rewards/rejected": -6.92699670791626,
"step": 100
},
{
"epoch": 0.82,
"eval_logits/chosen": -5.204982280731201,
"eval_logits/rejected": -4.0475640296936035,
"eval_logps/chosen": -89.41363525390625,
"eval_logps/rejected": -486.17486572265625,
"eval_loss": 0.4731297492980957,
"eval_rewards/accuracies": 0.75,
"eval_rewards/chosen": -0.1248578280210495,
"eval_rewards/margins": 0.583656907081604,
"eval_rewards/rejected": -0.7085147500038147,
"eval_runtime": 5.5516,
"eval_samples_per_second": 0.721,
"eval_steps_per_second": 0.18,
"step": 100
},
{
"epoch": 0.9,
"grad_norm": 16.740929534156027,
"learning_rate": 1.4804225250339281e-08,
"logits/chosen": -4.010983467102051,
"logits/rejected": -4.0438127517700195,
"logps/chosen": -967.0818481445312,
"logps/rejected": -1875.3775634765625,
"loss": 0.1294,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.9154699444770813,
"rewards/margins": 4.917794704437256,
"rewards/rejected": -5.8332648277282715,
"step": 110
},
{
"epoch": 0.98,
"grad_norm": 17.270638468872217,
"learning_rate": 4.152374292708538e-10,
"logits/chosen": -4.0482587814331055,
"logits/rejected": -4.022861480712891,
"logps/chosen": -1003.4119262695312,
"logps/rejected": -2065.379638671875,
"loss": 0.1025,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.9699057340621948,
"rewards/margins": 5.419854164123535,
"rewards/rejected": -6.3897600173950195,
"step": 120
},
{
"epoch": 1.0,
"step": 122,
"total_flos": 0.0,
"train_loss": 0.24888706256131657,
"train_runtime": 2705.2939,
"train_samples_per_second": 2.886,
"train_steps_per_second": 0.045
}
],
"logging_steps": 10,
"max_steps": 122,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}