{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 81, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 4.271874539371896, "learning_rate": 5.555555555555555e-08, "logits/chosen": 0.23574040830135345, "logits/rejected": -0.9465489983558655, "logps/chosen": -461.99664306640625, "logps/rejected": -1076.3154296875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.12, "grad_norm": 4.053815107503334, "learning_rate": 4.997620553954645e-07, "logits/chosen": 0.3158790171146393, "logits/rejected": -0.6447792649269104, "logps/chosen": -491.0052185058594, "logps/rejected": -1057.68212890625, "loss": 0.693, "rewards/accuracies": 0.4444444477558136, "rewards/chosen": 0.0007979909423738718, "rewards/margins": 0.00010702761210268363, "rewards/rejected": 0.0006909631192684174, "step": 10 }, { "epoch": 0.25, "grad_norm": 3.8468513788685024, "learning_rate": 4.717527082945554e-07, "logits/chosen": 0.3795907199382782, "logits/rejected": -0.6666702032089233, "logps/chosen": -426.83050537109375, "logps/rejected": -1043.7567138671875, "loss": 0.6899, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.00618498120456934, "rewards/margins": 0.005285660736262798, "rewards/rejected": 0.0008993210503831506, "step": 20 }, { "epoch": 0.37, "grad_norm": 3.809120262255183, "learning_rate": 4.0219035725218013e-07, "logits/chosen": 0.4218289256095886, "logits/rejected": -0.7673497796058655, "logps/chosen": -476.8456115722656, "logps/rejected": -1008.1292114257812, "loss": 0.6821, "rewards/accuracies": 0.75, "rewards/chosen": 0.019507069140672684, "rewards/margins": 0.0191163569688797, "rewards/rejected": 0.00039071092032827437, "step": 30 }, { "epoch": 0.49, "grad_norm": 3.8969795803158, "learning_rate": 3.041099034845257e-07, "logits/chosen": 0.24538719654083252, "logits/rejected": -0.5925542712211609, "logps/chosen": -496.660400390625, "logps/rejected": -976.67041015625, "loss": 0.6779, "rewards/accuracies": 0.768750011920929, "rewards/chosen": 0.027163496240973473, "rewards/margins": 0.030033668503165245, "rewards/rejected": -0.0028701708652079105, "step": 40 }, { "epoch": 0.62, "grad_norm": 3.9669240189360293, "learning_rate": 1.9589009651547428e-07, "logits/chosen": 0.4538944661617279, "logits/rejected": -0.61766117811203, "logps/chosen": -444.9410095214844, "logps/rejected": -980.8577270507812, "loss": 0.6697, "rewards/accuracies": 0.831250011920929, "rewards/chosen": 0.04584876075387001, "rewards/margins": 0.04866841062903404, "rewards/rejected": -0.002819649875164032, "step": 50 }, { "epoch": 0.74, "grad_norm": 3.761586308179101, "learning_rate": 9.780964274781983e-08, "logits/chosen": 0.3308876156806946, "logits/rejected": -0.6796020269393921, "logps/chosen": -471.8916931152344, "logps/rejected": -997.9669799804688, "loss": 0.6645, "rewards/accuracies": 0.893750011920929, "rewards/chosen": 0.05425606295466423, "rewards/margins": 0.06382595747709274, "rewards/rejected": -0.009569892659783363, "step": 60 }, { "epoch": 0.86, "grad_norm": 3.8973053098411667, "learning_rate": 2.824729170544457e-08, "logits/chosen": 0.3237437307834625, "logits/rejected": -0.6007438898086548, "logps/chosen": -452.1813049316406, "logps/rejected": -974.1201171875, "loss": 0.6626, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": 0.041471779346466064, "rewards/margins": 0.04470660537481308, "rewards/rejected": -0.003234823001548648, "step": 70 }, { "epoch": 0.99, "grad_norm": 3.6208468629029484, "learning_rate": 2.3794460453555044e-10, "logits/chosen": 0.3552563190460205, "logits/rejected": -0.7167419195175171, "logps/chosen": -452.73162841796875, "logps/rejected": -1027.007568359375, "loss": 0.665, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": 0.05553414672613144, "rewards/margins": 0.059998393058776855, "rewards/rejected": -0.00446424912661314, "step": 80 }, { "epoch": 1.0, "step": 81, "total_flos": 0.0, "train_loss": 0.6753969060050117, "train_runtime": 1121.4796, "train_samples_per_second": 4.608, "train_steps_per_second": 0.072 } ], "logging_steps": 10, "max_steps": 81, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }