|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 396, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.0009370408370159566, |
|
"learning_rate": 1.95e-05, |
|
"logits/chosen": -22.68578338623047, |
|
"logits/rejected": -22.825130462646484, |
|
"logps/chosen": -78.43010711669922, |
|
"logps/rejected": -102.63253784179688, |
|
"loss": 0.1807, |
|
"rewards/accuracies": 0.9230769276618958, |
|
"rewards/chosen": 2.8142247200012207, |
|
"rewards/margins": 4.811609268188477, |
|
"rewards/rejected": -1.997384786605835, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.192274041590281e-05, |
|
"learning_rate": 2.8988764044943823e-05, |
|
"logits/chosen": -23.15672492980957, |
|
"logits/rejected": -23.271657943725586, |
|
"logps/chosen": -43.96305465698242, |
|
"logps/rejected": -155.8493194580078, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.304109573364258, |
|
"rewards/margins": 13.57724380493164, |
|
"rewards/rejected": -7.273132801055908, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.914922904688865e-05, |
|
"learning_rate": 2.6797752808988762e-05, |
|
"logits/chosen": -23.230398178100586, |
|
"logits/rejected": -23.34272575378418, |
|
"logps/chosen": -42.47319030761719, |
|
"logps/rejected": -166.07025146484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.426936149597168, |
|
"rewards/margins": 14.743646621704102, |
|
"rewards/rejected": -8.316710472106934, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.923706804518588e-05, |
|
"learning_rate": 2.4606741573033708e-05, |
|
"logits/chosen": -23.297359466552734, |
|
"logits/rejected": -23.406126022338867, |
|
"logps/chosen": -42.298927307128906, |
|
"logps/rejected": -167.82479858398438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.4410505294799805, |
|
"rewards/margins": 14.930893898010254, |
|
"rewards/rejected": -8.48984432220459, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.00013795163249596953, |
|
"learning_rate": 2.2415730337078654e-05, |
|
"logits/chosen": -23.36400032043457, |
|
"logits/rejected": -23.47435760498047, |
|
"logps/chosen": -42.36582565307617, |
|
"logps/rejected": -167.81008911132812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.42643404006958, |
|
"rewards/margins": 14.924281120300293, |
|
"rewards/rejected": -8.497847557067871, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 1.9170562154613435e-05, |
|
"learning_rate": 2.0224719101123596e-05, |
|
"logits/chosen": -23.275028228759766, |
|
"logits/rejected": -23.385255813598633, |
|
"logps/chosen": -42.21509552001953, |
|
"logps/rejected": -167.64584350585938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.436044692993164, |
|
"rewards/margins": 14.92562198638916, |
|
"rewards/rejected": -8.48957633972168, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.7988468243856914e-05, |
|
"learning_rate": 1.803370786516854e-05, |
|
"logits/chosen": -23.300710678100586, |
|
"logits/rejected": -23.410503387451172, |
|
"logps/chosen": -42.11836624145508, |
|
"logps/rejected": -169.15122985839844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.481032371520996, |
|
"rewards/margins": 15.097738265991211, |
|
"rewards/rejected": -8.616707801818848, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 2.1618798200506717e-05, |
|
"learning_rate": 1.5842696629213484e-05, |
|
"logits/chosen": -23.332603454589844, |
|
"logits/rejected": -23.443927764892578, |
|
"logps/chosen": -42.54871368408203, |
|
"logps/rejected": -167.39431762695312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.385127067565918, |
|
"rewards/margins": 14.849527359008789, |
|
"rewards/rejected": -8.464401245117188, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 1.6424854038632475e-05, |
|
"learning_rate": 1.3651685393258428e-05, |
|
"logits/chosen": -23.301807403564453, |
|
"logits/rejected": -23.41258430480957, |
|
"logps/chosen": -42.13775634765625, |
|
"logps/rejected": -168.62362670898438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.500583171844482, |
|
"rewards/margins": 15.051546096801758, |
|
"rewards/rejected": -8.550962448120117, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 1.6196921933442354e-05, |
|
"learning_rate": 1.146067415730337e-05, |
|
"logits/chosen": -23.335044860839844, |
|
"logits/rejected": -23.44767951965332, |
|
"logps/chosen": -42.26608657836914, |
|
"logps/rejected": -167.9331817626953, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.42073917388916, |
|
"rewards/margins": 14.927839279174805, |
|
"rewards/rejected": -8.507099151611328, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 1.7393831512890756e-05, |
|
"learning_rate": 9.269662921348314e-06, |
|
"logits/chosen": -23.317138671875, |
|
"logits/rejected": -23.424781799316406, |
|
"logps/chosen": -41.94506072998047, |
|
"logps/rejected": -168.30055236816406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.4749298095703125, |
|
"rewards/margins": 15.034900665283203, |
|
"rewards/rejected": -8.55997085571289, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 1.7613503587199375e-05, |
|
"learning_rate": 7.078651685393258e-06, |
|
"logits/chosen": -23.292570114135742, |
|
"logits/rejected": -23.40188217163086, |
|
"logps/chosen": -42.209102630615234, |
|
"logps/rejected": -169.81747436523438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.467267990112305, |
|
"rewards/margins": 15.145313262939453, |
|
"rewards/rejected": -8.678045272827148, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.0001227569446200505, |
|
"learning_rate": 4.8876404494382024e-06, |
|
"logits/chosen": -23.324228286743164, |
|
"logits/rejected": -23.43165397644043, |
|
"logps/chosen": -41.959617614746094, |
|
"logps/rejected": -169.5789337158203, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.512591361999512, |
|
"rewards/margins": 15.166027069091797, |
|
"rewards/rejected": -8.653436660766602, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 1.6931946447584778e-05, |
|
"learning_rate": 2.696629213483146e-06, |
|
"logits/chosen": -23.30262565612793, |
|
"logits/rejected": -23.414592742919922, |
|
"logps/chosen": -42.351627349853516, |
|
"logps/rejected": -168.41354370117188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.420273780822754, |
|
"rewards/margins": 14.962029457092285, |
|
"rewards/rejected": -8.541756629943848, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 1.576123213453684e-05, |
|
"learning_rate": 5.056179775280899e-07, |
|
"logits/chosen": -23.35226058959961, |
|
"logits/rejected": -23.466854095458984, |
|
"logps/chosen": -42.21808624267578, |
|
"logps/rejected": -168.39918518066406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.449341297149658, |
|
"rewards/margins": 15.006133079528809, |
|
"rewards/rejected": -8.556791305541992, |
|
"step": 390 |
|
} |
|
], |
|
"logging_steps": 26, |
|
"max_steps": 396, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|