|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 165, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.652278347218786, |
|
"learning_rate": 2.941176470588235e-08, |
|
"logits/chosen": -0.8284896612167358, |
|
"logits/rejected": -0.9010236263275146, |
|
"logps/chosen": -1066.3585205078125, |
|
"logps/rejected": -1448.19970703125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.746771519966634, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": -0.8115520477294922, |
|
"logits/rejected": -0.8255029320716858, |
|
"logps/chosen": -1131.291259765625, |
|
"logps/rejected": -1369.7412109375, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4652777910232544, |
|
"rewards/chosen": 0.0002041943371295929, |
|
"rewards/margins": -4.850090044783428e-05, |
|
"rewards/rejected": 0.0002526953467167914, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.451745733301935, |
|
"learning_rate": 4.994932636402031e-07, |
|
"logits/chosen": -0.7243806719779968, |
|
"logits/rejected": -0.8158847093582153, |
|
"logps/chosen": -1020.7599487304688, |
|
"logps/rejected": -1355.944091796875, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.001858971663750708, |
|
"rewards/margins": 0.0021505323238670826, |
|
"rewards/rejected": -0.00029156063101254404, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 4.532594184835871, |
|
"learning_rate": 4.905416503522123e-07, |
|
"logits/chosen": -0.7353666424751282, |
|
"logits/rejected": -0.8100309371948242, |
|
"logps/chosen": -1033.032470703125, |
|
"logps/rejected": -1331.6929931640625, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.008143061771988869, |
|
"rewards/margins": 0.010795501992106438, |
|
"rewards/rejected": -0.002652441617101431, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 4.452955387273571, |
|
"learning_rate": 4.707922373336523e-07, |
|
"logits/chosen": -0.7547545433044434, |
|
"logits/rejected": -0.7800291776657104, |
|
"logps/chosen": -1057.7445068359375, |
|
"logps/rejected": -1296.575439453125, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.008273174986243248, |
|
"rewards/margins": 0.016675911843776703, |
|
"rewards/rejected": -0.008402736857533455, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 4.931805222376995, |
|
"learning_rate": 4.4113156629677313e-07, |
|
"logits/chosen": -0.7371411919593811, |
|
"logits/rejected": -0.6845098733901978, |
|
"logps/chosen": -1045.1011962890625, |
|
"logps/rejected": -1151.344970703125, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.017855554819107056, |
|
"rewards/margins": 0.04448147863149643, |
|
"rewards/rejected": -0.026625927537679672, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 4.689422676957636, |
|
"learning_rate": 4.0289109058972283e-07, |
|
"logits/chosen": -0.7692807912826538, |
|
"logits/rejected": -0.7662399411201477, |
|
"logps/chosen": -999.9730224609375, |
|
"logps/rejected": -1286.947509765625, |
|
"loss": 0.6595, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.027734506875276566, |
|
"rewards/margins": 0.07751207053661346, |
|
"rewards/rejected": -0.0497775673866272, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 4.956697872711078, |
|
"learning_rate": 3.577874068920446e-07, |
|
"logits/chosen": -0.7923519611358643, |
|
"logits/rejected": -0.8132171630859375, |
|
"logps/chosen": -1077.121337890625, |
|
"logps/rejected": -1317.41845703125, |
|
"loss": 0.6474, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.009537232108414173, |
|
"rewards/margins": 0.09025295078754425, |
|
"rewards/rejected": -0.08071572333574295, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 6.011926472486656, |
|
"learning_rate": 3.078451980100854e-07, |
|
"logits/chosen": -0.7588658928871155, |
|
"logits/rejected": -0.8289008140563965, |
|
"logps/chosen": -1011.5177612304688, |
|
"logps/rejected": -1298.1904296875, |
|
"loss": 0.6262, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.023491863161325455, |
|
"rewards/margins": 0.1584763377904892, |
|
"rewards/rejected": -0.13498449325561523, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 5.367792800931542, |
|
"learning_rate": 2.553063458334059e-07, |
|
"logits/chosen": -0.7922073006629944, |
|
"logits/rejected": -0.8237783312797546, |
|
"logps/chosen": -1067.434326171875, |
|
"logps/rejected": -1301.37109375, |
|
"loss": 0.5865, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.015568578615784645, |
|
"rewards/margins": 0.31723320484161377, |
|
"rewards/rejected": -0.3328017592430115, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 5.346586894544636, |
|
"learning_rate": 2.0252929432814287e-07, |
|
"logits/chosen": -0.779016375541687, |
|
"logits/rejected": -0.9120697975158691, |
|
"logps/chosen": -1015.7185668945312, |
|
"logps/rejected": -1394.1680908203125, |
|
"loss": 0.5615, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.05275765806436539, |
|
"rewards/margins": 0.5331910848617554, |
|
"rewards/rejected": -0.5859487056732178, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_logits/chosen": -0.6675596237182617, |
|
"eval_logits/rejected": -0.8939424753189087, |
|
"eval_logps/chosen": -826.0933837890625, |
|
"eval_logps/rejected": -1433.1563720703125, |
|
"eval_loss": 0.6218963861465454, |
|
"eval_rewards/accuracies": 0.7459239363670349, |
|
"eval_rewards/chosen": -0.044515106827020645, |
|
"eval_rewards/margins": 0.19978085160255432, |
|
"eval_rewards/rejected": -0.24429598450660706, |
|
"eval_runtime": 353.1381, |
|
"eval_samples_per_second": 8.289, |
|
"eval_steps_per_second": 0.261, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 5.41704752041782, |
|
"learning_rate": 1.5188318011445906e-07, |
|
"logits/chosen": -0.7974969744682312, |
|
"logits/rejected": -0.8456804156303406, |
|
"logps/chosen": -1040.8197021484375, |
|
"logps/rejected": -1285.4654541015625, |
|
"loss": 0.5406, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.08253589272499084, |
|
"rewards/margins": 0.788347601890564, |
|
"rewards/rejected": -0.870883584022522, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 6.860188078136068, |
|
"learning_rate": 1.0564148305586295e-07, |
|
"logits/chosen": -0.7957097291946411, |
|
"logits/rejected": -0.8646506071090698, |
|
"logps/chosen": -979.1201171875, |
|
"logps/rejected": -1402.442138671875, |
|
"loss": 0.5115, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.07362432777881622, |
|
"rewards/margins": 0.8031437993049622, |
|
"rewards/rejected": -0.8767681121826172, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 6.241835191835041, |
|
"learning_rate": 6.587997083462196e-08, |
|
"logits/chosen": -0.828117847442627, |
|
"logits/rejected": -0.8768518567085266, |
|
"logps/chosen": -1065.3460693359375, |
|
"logps/rejected": -1385.936767578125, |
|
"loss": 0.5146, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.0992397740483284, |
|
"rewards/margins": 1.1248447895050049, |
|
"rewards/rejected": -1.2240846157073975, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 6.81375264804862, |
|
"learning_rate": 3.438351873250492e-08, |
|
"logits/chosen": -0.8043051958084106, |
|
"logits/rejected": -0.8954287767410278, |
|
"logps/chosen": -1059.5267333984375, |
|
"logps/rejected": -1414.9984130859375, |
|
"loss": 0.5123, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.17837993800640106, |
|
"rewards/margins": 0.9624137878417969, |
|
"rewards/rejected": -1.1407936811447144, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 7.068585992863548, |
|
"learning_rate": 1.256598743236703e-08, |
|
"logits/chosen": -0.8356617093086243, |
|
"logits/rejected": -0.8929821252822876, |
|
"logps/chosen": -1066.185302734375, |
|
"logps/rejected": -1415.41064453125, |
|
"loss": 0.5079, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.16673186421394348, |
|
"rewards/margins": 0.8981729745864868, |
|
"rewards/rejected": -1.0649049282073975, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 6.546330593670502, |
|
"learning_rate": 1.406755487774386e-09, |
|
"logits/chosen": -0.816728949546814, |
|
"logits/rejected": -0.8898676633834839, |
|
"logps/chosen": -1021.1590576171875, |
|
"logps/rejected": -1444.29296875, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.196590393781662, |
|
"rewards/margins": 0.9931901097297668, |
|
"rewards/rejected": -1.189780592918396, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 165, |
|
"total_flos": 0.0, |
|
"train_loss": 0.20238888480446554, |
|
"train_runtime": 919.6394, |
|
"train_samples_per_second": 11.461, |
|
"train_steps_per_second": 0.179 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 165, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|