|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.971563981042654, |
|
"eval_steps": 100, |
|
"global_step": 104, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018957345971563982, |
|
"grad_norm": 15.786988646394411, |
|
"learning_rate": 4.545454545454545e-08, |
|
"logits/chosen": -13.905267715454102, |
|
"logits/rejected": -14.118387222290039, |
|
"logps/chosen": -350.8895263671875, |
|
"logps/rejected": -446.6286926269531, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 15.908099576913655, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -14.040081024169922, |
|
"logits/rejected": -14.157392501831055, |
|
"logps/chosen": -416.2701416015625, |
|
"logps/rejected": -449.4697265625, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5138888955116272, |
|
"rewards/chosen": 0.0006088384543545544, |
|
"rewards/margins": 0.008209776133298874, |
|
"rewards/rejected": -0.007600938435643911, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 14.229474825008781, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": -13.39338207244873, |
|
"logits/rejected": -13.542058944702148, |
|
"logps/chosen": -392.9753723144531, |
|
"logps/rejected": -427.68096923828125, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.010071685537695885, |
|
"rewards/margins": 0.003802267834544182, |
|
"rewards/rejected": 0.006269416771829128, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 15.853985724357454, |
|
"learning_rate": 4.5025027361734613e-07, |
|
"logits/chosen": -14.269427299499512, |
|
"logits/rejected": -13.808093070983887, |
|
"logps/chosen": -412.9443359375, |
|
"logps/rejected": -428.38494873046875, |
|
"loss": 0.674, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.04771440848708153, |
|
"rewards/margins": 0.035354893654584885, |
|
"rewards/rejected": 0.012359511107206345, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 14.687978809678542, |
|
"learning_rate": 3.893311157806091e-07, |
|
"logits/chosen": -13.886492729187012, |
|
"logits/rejected": -13.28197956085205, |
|
"logps/chosen": -374.98211669921875, |
|
"logps/rejected": -366.5968322753906, |
|
"loss": 0.657, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.13442906737327576, |
|
"rewards/margins": 0.07902240008115768, |
|
"rewards/rejected": 0.05540664866566658, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 15.872142673244408, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": -14.917936325073242, |
|
"logits/rejected": -14.90648078918457, |
|
"logps/chosen": -429.6836853027344, |
|
"logps/rejected": -480.3504943847656, |
|
"loss": 0.6344, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.24091288447380066, |
|
"rewards/margins": 0.1330389827489853, |
|
"rewards/rejected": 0.10787389427423477, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1374407582938388, |
|
"grad_norm": 14.061428605486398, |
|
"learning_rate": 2.2891223348923882e-07, |
|
"logits/chosen": -14.622962951660156, |
|
"logits/rejected": -14.403157234191895, |
|
"logps/chosen": -415.7464904785156, |
|
"logps/rejected": -441.731201171875, |
|
"loss": 0.6063, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.3395090103149414, |
|
"rewards/margins": 0.22218124568462372, |
|
"rewards/rejected": 0.11732780933380127, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.3270142180094786, |
|
"grad_norm": 12.963152293888875, |
|
"learning_rate": 1.4754491880085317e-07, |
|
"logits/chosen": -14.022384643554688, |
|
"logits/rejected": -13.828951835632324, |
|
"logps/chosen": -382.23468017578125, |
|
"logps/rejected": -418.2818908691406, |
|
"loss": 0.6011, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.3396778702735901, |
|
"rewards/margins": 0.22157195210456848, |
|
"rewards/rejected": 0.118105947971344, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.5165876777251186, |
|
"grad_norm": 12.394681314131397, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": -13.705121040344238, |
|
"logits/rejected": -14.205709457397461, |
|
"logps/chosen": -367.263427734375, |
|
"logps/rejected": -423.30841064453125, |
|
"loss": 0.5788, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.36119210720062256, |
|
"rewards/margins": 0.3365553319454193, |
|
"rewards/rejected": 0.024636749178171158, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.7061611374407581, |
|
"grad_norm": 14.456589635016153, |
|
"learning_rate": 2.7440387297912122e-08, |
|
"logits/chosen": -13.98394775390625, |
|
"logits/rejected": -14.161648750305176, |
|
"logps/chosen": -399.45458984375, |
|
"logps/rejected": -447.48828125, |
|
"loss": 0.5766, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.3995341658592224, |
|
"rewards/margins": 0.34082064032554626, |
|
"rewards/rejected": 0.05871356278657913, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"grad_norm": 13.44211674398592, |
|
"learning_rate": 2.27878296044029e-09, |
|
"logits/chosen": -14.160197257995605, |
|
"logits/rejected": -14.141824722290039, |
|
"logps/chosen": -392.3072509765625, |
|
"logps/rejected": -421.604248046875, |
|
"loss": 0.5732, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4029002785682678, |
|
"rewards/margins": 0.27652695775032043, |
|
"rewards/rejected": 0.1263733208179474, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"eval_logits/chosen": -13.292621612548828, |
|
"eval_logits/rejected": -12.66539478302002, |
|
"eval_logps/chosen": -372.0066833496094, |
|
"eval_logps/rejected": -373.4093933105469, |
|
"eval_loss": 0.5971602201461792, |
|
"eval_rewards/accuracies": 0.6770833134651184, |
|
"eval_rewards/chosen": 0.3533553183078766, |
|
"eval_rewards/margins": 0.24372106790542603, |
|
"eval_rewards/rejected": 0.10963428020477295, |
|
"eval_runtime": 20.0916, |
|
"eval_samples_per_second": 37.329, |
|
"eval_steps_per_second": 1.195, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.971563981042654, |
|
"step": 104, |
|
"total_flos": 0.0, |
|
"train_loss": 0.627926590350958, |
|
"train_runtime": 756.4701, |
|
"train_samples_per_second": 17.846, |
|
"train_steps_per_second": 0.137 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 104, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|