{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 52, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 6.388934227309893, "learning_rate": 8.333333333333333e-08, "logits/chosen": -1.2247042655944824, "logits/rejected": -1.0684211254119873, "logps/chosen": -569.8499145507812, "logps/rejected": -1057.6484375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.19, "grad_norm": 6.575619838412587, "learning_rate": 4.907293218369498e-07, "logits/chosen": -1.1266288757324219, "logits/rejected": -0.9588133692741394, "logps/chosen": -644.7501831054688, "logps/rejected": -896.37548828125, "loss": 0.6907, "rewards/accuracies": 0.5347222089767456, "rewards/chosen": -0.0003651145671028644, "rewards/margins": 0.003978920169174671, "rewards/rejected": -0.004344034940004349, "step": 10 }, { "epoch": 0.38, "grad_norm": 6.33044990257594, "learning_rate": 3.941700805287168e-07, "logits/chosen": -1.1694377660751343, "logits/rejected": -0.9742730855941772, "logps/chosen": -553.2115478515625, "logps/rejected": -933.1784057617188, "loss": 0.66, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.0070250085555016994, "rewards/margins": 0.06466411054134369, "rewards/rejected": -0.07168911397457123, "step": 20 }, { "epoch": 0.58, "grad_norm": 7.898094155565296, "learning_rate": 2.3293939665883228e-07, "logits/chosen": -1.121983528137207, "logits/rejected": -0.9881827235221863, "logps/chosen": -594.9716186523438, "logps/rejected": -1039.31982421875, "loss": 0.5638, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.06966790556907654, "rewards/margins": 0.3061855733394623, "rewards/rejected": -0.3758534789085388, "step": 30 }, { "epoch": 0.77, "grad_norm": 9.30835250885683, "learning_rate": 7.936171419533652e-08, "logits/chosen": -1.0671305656433105, "logits/rejected": -0.9684460759162903, "logps/chosen": -680.4083251953125, "logps/rejected": -1006.2408447265625, "loss": 0.4643, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20575208961963654, "rewards/margins": 0.7841703295707703, "rewards/rejected": -0.9899223446846008, "step": 40 }, { "epoch": 0.96, "grad_norm": 8.502404185464833, "learning_rate": 2.328513490917311e-09, "logits/chosen": -1.0291626453399658, "logits/rejected": -0.9718970060348511, "logps/chosen": -671.4656372070312, "logps/rejected": -1046.7633056640625, "loss": 0.4299, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.2569717466831207, "rewards/margins": 0.8754018545150757, "rewards/rejected": -1.132373571395874, "step": 50 }, { "epoch": 1.0, "step": 52, "total_flos": 0.0, "train_loss": 0.5552032154340011, "train_runtime": 696.2336, "train_samples_per_second": 4.777, "train_steps_per_second": 0.075 } ], "logging_steps": 10, "max_steps": 52, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }