|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9915966386554622, |
|
"eval_steps": 100, |
|
"global_step": 59, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.892402047143212, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -1.1214768886566162, |
|
"logits/rejected": -1.0666239261627197, |
|
"logps/chosen": -773.5914306640625, |
|
"logps/rejected": -765.6082763671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 6.623544286492592, |
|
"learning_rate": 4.930057285201027e-07, |
|
"logits/chosen": -1.280522346496582, |
|
"logits/rejected": -0.9248583912849426, |
|
"logps/chosen": -503.0466613769531, |
|
"logps/rejected": -899.9366455078125, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.5347222089767456, |
|
"rewards/chosen": -0.0006784469587728381, |
|
"rewards/margins": 0.004367371555417776, |
|
"rewards/rejected": -0.005045818164944649, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 7.366007707770025, |
|
"learning_rate": 4.187457503795526e-07, |
|
"logits/chosen": -1.253061294555664, |
|
"logits/rejected": -0.9847872853279114, |
|
"logps/chosen": -543.08447265625, |
|
"logps/rejected": -933.7767333984375, |
|
"loss": 0.6527, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.020030761137604713, |
|
"rewards/margins": 0.08102954924106598, |
|
"rewards/rejected": -0.10106030851602554, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 8.97929873252708, |
|
"learning_rate": 2.8691164100062034e-07, |
|
"logits/chosen": -1.188706398010254, |
|
"logits/rejected": -0.9695678949356079, |
|
"logps/chosen": -547.2942504882812, |
|
"logps/rejected": -972.0201416015625, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08773749321699142, |
|
"rewards/margins": 0.4440121054649353, |
|
"rewards/rejected": -0.5317496061325073, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 7.373164759260948, |
|
"learning_rate": 1.4248369943086995e-07, |
|
"logits/chosen": -1.121267557144165, |
|
"logits/rejected": -0.9862432479858398, |
|
"logps/chosen": -544.4817504882812, |
|
"logps/rejected": -1049.937744140625, |
|
"loss": 0.4145, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.19056306779384613, |
|
"rewards/margins": 0.9457426071166992, |
|
"rewards/rejected": -1.136305570602417, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 6.631179730087488, |
|
"learning_rate": 3.473909705816111e-08, |
|
"logits/chosen": -1.0745445489883423, |
|
"logits/rejected": -1.0211777687072754, |
|
"logps/chosen": -568.6431884765625, |
|
"logps/rejected": -1094.7789306640625, |
|
"loss": 0.3982, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.33096131682395935, |
|
"rewards/margins": 1.3629460334777832, |
|
"rewards/rejected": -1.6939074993133545, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 59, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5081947011462713, |
|
"train_runtime": 896.7003, |
|
"train_samples_per_second": 4.227, |
|
"train_steps_per_second": 0.066 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 59, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|