{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.010141490144761593, "eval_steps": 100, "global_step": 31, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 2.421875, "learning_rate": 1.25e-06, "logits/chosen": -2.3689165115356445, "logits/rejected": -2.3419089317321777, "logps/chosen": -304.96429443359375, "logps/rejected": -224.31954956054688, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "grad_norm": 1.9296875, "learning_rate": 4.415111107797445e-06, "logits/chosen": -2.3774471282958984, "logits/rejected": -2.358837127685547, "logps/chosen": -267.6408386230469, "logps/rejected": -221.9726104736328, "loss": 0.6921, "rewards/accuracies": 0.5166666507720947, "rewards/chosen": 0.008927525021135807, "rewards/margins": 0.002250629710033536, "rewards/rejected": 0.006676895078271627, "step": 10 }, { "epoch": 0.01, "grad_norm": 2.203125, "learning_rate": 1.7829919182222752e-06, "logits/chosen": -2.4560706615448, "logits/rejected": -2.402303695678711, "logps/chosen": -265.12762451171875, "logps/rejected": -272.61566162109375, "loss": 0.6889, "rewards/accuracies": 0.5900000333786011, "rewards/chosen": 0.028245043009519577, "rewards/margins": 0.009032377041876316, "rewards/rejected": 0.019212666898965836, "step": 20 }, { "epoch": 0.01, "grad_norm": 2.375, "learning_rate": 1.6904105645142443e-08, "logits/chosen": -2.3814165592193604, "logits/rejected": -2.3470723628997803, "logps/chosen": -304.08697509765625, "logps/rejected": -281.0203552246094, "loss": 0.6844, "rewards/accuracies": 0.6299999952316284, "rewards/chosen": 0.03472686558961868, "rewards/margins": 0.019056813791394234, "rewards/rejected": 0.015670055523514748, "step": 30 }, { "epoch": 0.01, "step": 31, "total_flos": 0.0, "train_loss": 0.6888245363389293, "train_runtime": 439.9957, "train_samples_per_second": 1.389, "train_steps_per_second": 0.07 } ], "logging_steps": 10, "max_steps": 31, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 5, "trial_name": null, "trial_params": null }