{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 38, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "grad_norm": 4.6681179725575745, "learning_rate": 1.25e-07, "logits/chosen": -0.3981982469558716, "logits/rejected": 0.007589429616928101, "logps/chosen": -253.3463134765625, "logps/rejected": -671.2213745117188, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.26, "grad_norm": 4.768585444261873, "learning_rate": 4.6255428393240354e-07, "logits/chosen": -0.5132235884666443, "logits/rejected": -0.01092798262834549, "logps/chosen": -188.30332946777344, "logps/rejected": -627.010986328125, "loss": 0.6914, "rewards/accuracies": 0.625, "rewards/chosen": 0.0005456734797917306, "rewards/margins": 0.004515086766332388, "rewards/rejected": -0.003969413228332996, "step": 10 }, { "epoch": 0.53, "grad_norm": 4.366842926411694, "learning_rate": 2.730670898658255e-07, "logits/chosen": -0.6587502360343933, "logits/rejected": -0.0813610702753067, "logps/chosen": -157.48902893066406, "logps/rejected": -626.0028686523438, "loss": 0.6775, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.002243603579699993, "rewards/margins": 0.03073681890964508, "rewards/rejected": -0.03298041969537735, "step": 20 }, { "epoch": 0.79, "grad_norm": 4.881380621263082, "learning_rate": 6.524777069483525e-08, "logits/chosen": -0.49691787362098694, "logits/rejected": -0.0660545602440834, "logps/chosen": -177.71261596679688, "logps/rejected": -657.294677734375, "loss": 0.6583, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.006053019780665636, "rewards/margins": 0.07823501527309418, "rewards/rejected": -0.08428805321455002, "step": 30 }, { "epoch": 1.0, "step": 38, "total_flos": 0.0, "train_loss": 0.6694252616480777, "train_runtime": 502.7814, "train_samples_per_second": 4.799, "train_steps_per_second": 0.076 } ], "logging_steps": 10, "max_steps": 38, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }