{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 78, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 5.038770139725663, "learning_rate": 6.25e-07, "logits/chosen": -1.3596761226654053, "logits/rejected": -1.0023326873779297, "logps/chosen": -450.79583740234375, "logps/rejected": -781.127197265625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.13, "grad_norm": 5.222015988708302, "learning_rate": 4.989935734988098e-06, "logits/chosen": -1.0887341499328613, "logits/rejected": -1.0588740110397339, "logps/chosen": -564.32080078125, "logps/rejected": -855.5671997070312, "loss": 0.6684, "rewards/accuracies": 0.6944444179534912, "rewards/chosen": -0.007345028221607208, "rewards/margins": 0.07519946992397308, "rewards/rejected": -0.08254450559616089, "step": 10 }, { "epoch": 0.26, "grad_norm": 3.800980531623392, "learning_rate": 4.646121984004666e-06, "logits/chosen": -1.4986072778701782, "logits/rejected": -2.113752603530884, "logps/chosen": -606.9659423828125, "logps/rejected": -1018.4754638671875, "loss": 0.4124, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.3804299235343933, "rewards/margins": 1.465038537979126, "rewards/rejected": -1.845468521118164, "step": 20 }, { "epoch": 0.38, "grad_norm": 4.950457492388352, "learning_rate": 3.8772424536302565e-06, "logits/chosen": -1.7479159832000732, "logits/rejected": -2.567039966583252, "logps/chosen": -614.0457153320312, "logps/rejected": -1108.7679443359375, "loss": 0.3199, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.10914883762598038, "rewards/margins": 2.6420645713806152, "rewards/rejected": -2.751213550567627, "step": 30 }, { "epoch": 0.51, "grad_norm": 4.011791311794506, "learning_rate": 2.835583164544139e-06, "logits/chosen": -1.7483808994293213, "logits/rejected": -2.6819939613342285, "logps/chosen": -644.5540161132812, "logps/rejected": -1273.226318359375, "loss": 0.2279, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.04285002499818802, "rewards/margins": 4.04502010345459, "rewards/rejected": -4.002170085906982, "step": 40 }, { "epoch": 0.64, "grad_norm": 3.2682180093043582, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -1.5594079494476318, "logits/rejected": -2.4641501903533936, "logps/chosen": -627.8310546875, "logps/rejected": -1211.9755859375, "loss": 0.2181, "rewards/accuracies": 0.9375, "rewards/chosen": 0.09700597822666168, "rewards/margins": 3.8286356925964355, "rewards/rejected": -3.7316298484802246, "step": 50 }, { "epoch": 0.77, "grad_norm": 2.445366662132522, "learning_rate": 7.723433775328385e-07, "logits/chosen": -1.6432807445526123, "logits/rejected": -2.286984920501709, "logps/chosen": -544.6373291015625, "logps/rejected": -1150.681396484375, "loss": 0.1556, "rewards/accuracies": 0.956250011920929, "rewards/chosen": 0.27296125888824463, "rewards/margins": 3.291428327560425, "rewards/rejected": -3.0184669494628906, "step": 60 }, { "epoch": 0.9, "grad_norm": 4.67000331566183, "learning_rate": 1.59412823400657e-07, "logits/chosen": -1.5773608684539795, "logits/rejected": -2.4722862243652344, "logps/chosen": -636.8548583984375, "logps/rejected": -1234.2066650390625, "loss": 0.1503, "rewards/accuracies": 0.956250011920929, "rewards/chosen": 0.26559901237487793, "rewards/margins": 4.093817234039307, "rewards/rejected": -3.828218460083008, "step": 70 }, { "epoch": 1.0, "step": 78, "total_flos": 0.0, "train_loss": 0.2915988182410216, "train_runtime": 1026.489, "train_samples_per_second": 4.863, "train_steps_per_second": 0.076 } ], "logging_steps": 10, "max_steps": 78, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }