Minbyul's picture
Model save
544fffb verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 165,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 4.652278347218786,
"learning_rate": 2.941176470588235e-08,
"logits/chosen": -0.8284896612167358,
"logits/rejected": -0.9010236263275146,
"logps/chosen": -1066.3585205078125,
"logps/rejected": -1448.19970703125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"grad_norm": 4.746771519966634,
"learning_rate": 2.941176470588235e-07,
"logits/chosen": -0.8115520477294922,
"logits/rejected": -0.8255029320716858,
"logps/chosen": -1131.291259765625,
"logps/rejected": -1369.7412109375,
"loss": 0.6932,
"rewards/accuracies": 0.4652777910232544,
"rewards/chosen": 0.0002041943371295929,
"rewards/margins": -4.850090044783428e-05,
"rewards/rejected": 0.0002526953467167914,
"step": 10
},
{
"epoch": 0.12,
"grad_norm": 4.451745733301935,
"learning_rate": 4.994932636402031e-07,
"logits/chosen": -0.7243806719779968,
"logits/rejected": -0.8158847093582153,
"logps/chosen": -1020.7599487304688,
"logps/rejected": -1355.944091796875,
"loss": 0.6921,
"rewards/accuracies": 0.59375,
"rewards/chosen": 0.001858971663750708,
"rewards/margins": 0.0021505323238670826,
"rewards/rejected": -0.00029156063101254404,
"step": 20
},
{
"epoch": 0.18,
"grad_norm": 4.532594184835871,
"learning_rate": 4.905416503522123e-07,
"logits/chosen": -0.7353666424751282,
"logits/rejected": -0.8100309371948242,
"logps/chosen": -1033.032470703125,
"logps/rejected": -1331.6929931640625,
"loss": 0.688,
"rewards/accuracies": 0.78125,
"rewards/chosen": 0.008143061771988869,
"rewards/margins": 0.010795501992106438,
"rewards/rejected": -0.002652441617101431,
"step": 30
},
{
"epoch": 0.24,
"grad_norm": 4.452955387273571,
"learning_rate": 4.707922373336523e-07,
"logits/chosen": -0.7547545433044434,
"logits/rejected": -0.7800291776657104,
"logps/chosen": -1057.7445068359375,
"logps/rejected": -1296.575439453125,
"loss": 0.6825,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.008273174986243248,
"rewards/margins": 0.016675911843776703,
"rewards/rejected": -0.008402736857533455,
"step": 40
},
{
"epoch": 0.3,
"grad_norm": 4.931805222376995,
"learning_rate": 4.4113156629677313e-07,
"logits/chosen": -0.7371411919593811,
"logits/rejected": -0.6845098733901978,
"logps/chosen": -1045.1011962890625,
"logps/rejected": -1151.344970703125,
"loss": 0.6718,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": 0.017855554819107056,
"rewards/margins": 0.04448147863149643,
"rewards/rejected": -0.026625927537679672,
"step": 50
},
{
"epoch": 0.36,
"grad_norm": 4.689422676957636,
"learning_rate": 4.0289109058972283e-07,
"logits/chosen": -0.7692807912826538,
"logits/rejected": -0.7662399411201477,
"logps/chosen": -999.9730224609375,
"logps/rejected": -1286.947509765625,
"loss": 0.6595,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": 0.027734506875276566,
"rewards/margins": 0.07751207053661346,
"rewards/rejected": -0.0497775673866272,
"step": 60
},
{
"epoch": 0.42,
"grad_norm": 4.956697872711078,
"learning_rate": 3.577874068920446e-07,
"logits/chosen": -0.7923519611358643,
"logits/rejected": -0.8132171630859375,
"logps/chosen": -1077.121337890625,
"logps/rejected": -1317.41845703125,
"loss": 0.6474,
"rewards/accuracies": 0.78125,
"rewards/chosen": 0.009537232108414173,
"rewards/margins": 0.09025295078754425,
"rewards/rejected": -0.08071572333574295,
"step": 70
},
{
"epoch": 0.48,
"grad_norm": 6.011926472486656,
"learning_rate": 3.078451980100854e-07,
"logits/chosen": -0.7588658928871155,
"logits/rejected": -0.8289008140563965,
"logps/chosen": -1011.5177612304688,
"logps/rejected": -1298.1904296875,
"loss": 0.6262,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 0.023491863161325455,
"rewards/margins": 0.1584763377904892,
"rewards/rejected": -0.13498449325561523,
"step": 80
},
{
"epoch": 0.55,
"grad_norm": 5.367792800931542,
"learning_rate": 2.553063458334059e-07,
"logits/chosen": -0.7922073006629944,
"logits/rejected": -0.8237783312797546,
"logps/chosen": -1067.434326171875,
"logps/rejected": -1301.37109375,
"loss": 0.5865,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.015568578615784645,
"rewards/margins": 0.31723320484161377,
"rewards/rejected": -0.3328017592430115,
"step": 90
},
{
"epoch": 0.61,
"grad_norm": 5.346586894544636,
"learning_rate": 2.0252929432814287e-07,
"logits/chosen": -0.779016375541687,
"logits/rejected": -0.9120697975158691,
"logps/chosen": -1015.7185668945312,
"logps/rejected": -1394.1680908203125,
"loss": 0.5615,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.05275765806436539,
"rewards/margins": 0.5331910848617554,
"rewards/rejected": -0.5859487056732178,
"step": 100
},
{
"epoch": 0.61,
"eval_logits/chosen": -0.6675596237182617,
"eval_logits/rejected": -0.8939424753189087,
"eval_logps/chosen": -826.0933837890625,
"eval_logps/rejected": -1433.1563720703125,
"eval_loss": 0.6218963861465454,
"eval_rewards/accuracies": 0.7459239363670349,
"eval_rewards/chosen": -0.044515106827020645,
"eval_rewards/margins": 0.19978085160255432,
"eval_rewards/rejected": -0.24429598450660706,
"eval_runtime": 353.1381,
"eval_samples_per_second": 8.289,
"eval_steps_per_second": 0.261,
"step": 100
},
{
"epoch": 0.67,
"grad_norm": 5.41704752041782,
"learning_rate": 1.5188318011445906e-07,
"logits/chosen": -0.7974969744682312,
"logits/rejected": -0.8456804156303406,
"logps/chosen": -1040.8197021484375,
"logps/rejected": -1285.4654541015625,
"loss": 0.5406,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": -0.08253589272499084,
"rewards/margins": 0.788347601890564,
"rewards/rejected": -0.870883584022522,
"step": 110
},
{
"epoch": 0.73,
"grad_norm": 6.860188078136068,
"learning_rate": 1.0564148305586295e-07,
"logits/chosen": -0.7957097291946411,
"logits/rejected": -0.8646506071090698,
"logps/chosen": -979.1201171875,
"logps/rejected": -1402.442138671875,
"loss": 0.5115,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.07362432777881622,
"rewards/margins": 0.8031437993049622,
"rewards/rejected": -0.8767681121826172,
"step": 120
},
{
"epoch": 0.79,
"grad_norm": 6.241835191835041,
"learning_rate": 6.587997083462196e-08,
"logits/chosen": -0.828117847442627,
"logits/rejected": -0.8768518567085266,
"logps/chosen": -1065.3460693359375,
"logps/rejected": -1385.936767578125,
"loss": 0.5146,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.0992397740483284,
"rewards/margins": 1.1248447895050049,
"rewards/rejected": -1.2240846157073975,
"step": 130
},
{
"epoch": 0.85,
"grad_norm": 6.81375264804862,
"learning_rate": 3.438351873250492e-08,
"logits/chosen": -0.8043051958084106,
"logits/rejected": -0.8954287767410278,
"logps/chosen": -1059.5267333984375,
"logps/rejected": -1414.9984130859375,
"loss": 0.5123,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.17837993800640106,
"rewards/margins": 0.9624137878417969,
"rewards/rejected": -1.1407936811447144,
"step": 140
},
{
"epoch": 0.91,
"grad_norm": 7.068585992863548,
"learning_rate": 1.256598743236703e-08,
"logits/chosen": -0.8356617093086243,
"logits/rejected": -0.8929821252822876,
"logps/chosen": -1066.185302734375,
"logps/rejected": -1415.41064453125,
"loss": 0.5079,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.16673186421394348,
"rewards/margins": 0.8981729745864868,
"rewards/rejected": -1.0649049282073975,
"step": 150
},
{
"epoch": 0.97,
"grad_norm": 6.546330593670502,
"learning_rate": 1.406755487774386e-09,
"logits/chosen": -0.816728949546814,
"logits/rejected": -0.8898676633834839,
"logps/chosen": -1021.1590576171875,
"logps/rejected": -1444.29296875,
"loss": 0.4992,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.196590393781662,
"rewards/margins": 0.9931901097297668,
"rewards/rejected": -1.189780592918396,
"step": 160
},
{
"epoch": 1.0,
"step": 165,
"total_flos": 0.0,
"train_loss": 0.20238888480446554,
"train_runtime": 919.6394,
"train_samples_per_second": 11.461,
"train_steps_per_second": 0.179
}
],
"logging_steps": 10,
"max_steps": 165,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}