{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9915966386554622, "eval_steps": 100, "global_step": 59, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 6.892402047143212, "learning_rate": 8.333333333333333e-08, "logits/chosen": -1.1214768886566162, "logits/rejected": -1.0666239261627197, "logps/chosen": -773.5914306640625, "logps/rejected": -765.6082763671875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.17, "grad_norm": 6.623544286492592, "learning_rate": 4.930057285201027e-07, "logits/chosen": -1.280522346496582, "logits/rejected": -0.9248583912849426, "logps/chosen": -503.0466613769531, "logps/rejected": -899.9366455078125, "loss": 0.6904, "rewards/accuracies": 0.5347222089767456, "rewards/chosen": -0.0006784469587728381, "rewards/margins": 0.004367371555417776, "rewards/rejected": -0.005045818164944649, "step": 10 }, { "epoch": 0.34, "grad_norm": 7.366007707770025, "learning_rate": 4.187457503795526e-07, "logits/chosen": -1.253061294555664, "logits/rejected": -0.9847872853279114, "logps/chosen": -543.08447265625, "logps/rejected": -933.7767333984375, "loss": 0.6527, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.020030761137604713, "rewards/margins": 0.08102954924106598, "rewards/rejected": -0.10106030851602554, "step": 20 }, { "epoch": 0.5, "grad_norm": 8.97929873252708, "learning_rate": 2.8691164100062034e-07, "logits/chosen": -1.188706398010254, "logits/rejected": -0.9695678949356079, "logps/chosen": -547.2942504882812, "logps/rejected": -972.0201416015625, "loss": 0.5278, "rewards/accuracies": 0.875, "rewards/chosen": -0.08773749321699142, "rewards/margins": 0.4440121054649353, "rewards/rejected": -0.5317496061325073, "step": 30 }, { "epoch": 0.67, "grad_norm": 7.373164759260948, "learning_rate": 1.4248369943086995e-07, "logits/chosen": -1.121267557144165, "logits/rejected": -0.9862432479858398, "logps/chosen": -544.4817504882812, "logps/rejected": -1049.937744140625, "loss": 0.4145, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19056306779384613, "rewards/margins": 0.9457426071166992, "rewards/rejected": -1.136305570602417, "step": 40 }, { "epoch": 0.84, "grad_norm": 6.631179730087488, "learning_rate": 3.473909705816111e-08, "logits/chosen": -1.0745445489883423, "logits/rejected": -1.0211777687072754, "logps/chosen": -568.6431884765625, "logps/rejected": -1094.7789306640625, "loss": 0.3982, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.33096131682395935, "rewards/margins": 1.3629460334777832, "rewards/rejected": -1.6939074993133545, "step": 50 }, { "epoch": 0.99, "step": 59, "total_flos": 0.0, "train_loss": 0.5081947011462713, "train_runtime": 896.7003, "train_samples_per_second": 4.227, "train_steps_per_second": 0.066 } ], "logging_steps": 10, "max_steps": 59, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }