{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 48, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 1e-07, "logits/chosen": -2.714864492416382, "logits/rejected": -2.751317024230957, "logps/chosen": -236.86740112304688, "logps/pi_response": -114.0628662109375, "logps/ref_response": -114.0628662109375, "logps/rejected": -478.6082763671875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.21, "learning_rate": 4.83504027183137e-07, "logits/chosen": -2.657212257385254, "logits/rejected": -2.586080312728882, "logps/chosen": -296.349365234375, "logps/pi_response": -145.713623046875, "logps/ref_response": -136.1793212890625, "logps/rejected": -501.9734802246094, "loss": 0.643, "rewards/accuracies": 0.6006944179534912, "rewards/chosen": -0.26413747668266296, "rewards/margins": 0.26705610752105713, "rewards/rejected": -0.5311936140060425, "step": 10 }, { "epoch": 0.42, "learning_rate": 3.643105808261596e-07, "logits/chosen": -2.5398590564727783, "logits/rejected": -2.4759039878845215, "logps/chosen": -350.80548095703125, "logps/pi_response": -158.80862426757812, "logps/ref_response": -116.90655517578125, "logps/rejected": -726.4019775390625, "loss": 0.5769, "rewards/accuracies": 0.7593749761581421, "rewards/chosen": -1.0708297491073608, "rewards/margins": 1.4764198064804077, "rewards/rejected": -2.5472495555877686, "step": 20 }, { "epoch": 0.62, "learning_rate": 1.8676665440207977e-07, "logits/chosen": -2.436058282852173, "logits/rejected": -2.3596558570861816, "logps/chosen": -376.2472229003906, "logps/pi_response": -170.85630798339844, "logps/ref_response": -132.64312744140625, "logps/rejected": -646.6350708007812, "loss": 0.5035, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.9769057035446167, "rewards/margins": 0.9189132452011108, "rewards/rejected": -1.895819067955017, "step": 30 }, { "epoch": 0.83, "learning_rate": 4.1500545527530544e-08, "logits/chosen": -2.4626142978668213, "logits/rejected": -2.351714849472046, "logps/chosen": -363.10650634765625, "logps/pi_response": -163.067626953125, "logps/ref_response": -126.86649322509766, "logps/rejected": -648.2885131835938, "loss": 0.4795, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.845011830329895, "rewards/margins": 0.9437265396118164, "rewards/rejected": -1.788738489151001, "step": 40 }, { "epoch": 1.0, "step": 48, "total_flos": 0.0, "train_loss": 0.5470561385154724, "train_runtime": 2831.8023, "train_samples_per_second": 4.318, "train_steps_per_second": 0.017 } ], "logging_steps": 10, "max_steps": 48, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }