{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.992, "eval_steps": 100, "global_step": 62, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 7.120798164408008, "learning_rate": 7.142857142857142e-08, "logits/chosen": -1.457259178161621, "logits/rejected": -1.0621511936187744, "logps/chosen": -272.0050354003906, "logps/rejected": -816.85107421875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.16, "grad_norm": 6.902950407858967, "learning_rate": 4.963384589619232e-07, "logits/chosen": -1.310307502746582, "logits/rejected": -1.0846761465072632, "logps/chosen": -535.0698852539062, "logps/rejected": -898.6776733398438, "loss": 0.6917, "rewards/accuracies": 0.4444444477558136, "rewards/chosen": -0.001652209903113544, "rewards/margins": 0.001886376878246665, "rewards/rejected": -0.003538586664944887, "step": 10 }, { "epoch": 0.32, "grad_norm": 6.3100220055883565, "learning_rate": 4.341852844691012e-07, "logits/chosen": -1.3299431800842285, "logits/rejected": -1.0885355472564697, "logps/chosen": -514.69140625, "logps/rejected": -881.6802978515625, "loss": 0.6634, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.011395025067031384, "rewards/margins": 0.07305588573217392, "rewards/rejected": -0.08445090055465698, "step": 20 }, { "epoch": 0.48, "grad_norm": 7.330007892766956, "learning_rate": 3.135545835483718e-07, "logits/chosen": -1.2920414209365845, "logits/rejected": -1.104931116104126, "logps/chosen": -543.4114990234375, "logps/rejected": -906.7586059570312, "loss": 0.593, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.043302834033966064, "rewards/margins": 0.253420889377594, "rewards/rejected": -0.29672369360923767, "step": 30 }, { "epoch": 0.64, "grad_norm": 7.8988027878682825, "learning_rate": 1.7274575140626315e-07, "logits/chosen": -1.2756078243255615, "logits/rejected": -1.09669029712677, "logps/chosen": -593.3638305664062, "logps/rejected": -903.4348754882812, "loss": 0.5274, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.08866459131240845, "rewards/margins": 0.4585743844509125, "rewards/rejected": -0.5472390651702881, "step": 40 }, { "epoch": 0.8, "grad_norm": 7.092952620264841, "learning_rate": 5.6464597340229375e-08, "logits/chosen": -1.1793540716171265, "logits/rejected": -1.0933005809783936, "logps/chosen": -541.88671875, "logps/rejected": -1034.5299072265625, "loss": 0.4638, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.13559284806251526, "rewards/margins": 0.9057224988937378, "rewards/rejected": -1.0413153171539307, "step": 50 }, { "epoch": 0.96, "grad_norm": 7.683839890605678, "learning_rate": 1.6295661628624447e-09, "logits/chosen": -1.1111315488815308, "logits/rejected": -1.098163366317749, "logps/chosen": -511.08526611328125, "logps/rejected": -985.9429931640625, "loss": 0.4288, "rewards/accuracies": 0.875, "rewards/chosen": -0.20404699444770813, "rewards/margins": 0.9808656573295593, "rewards/rejected": -1.1849125623703003, "step": 60 }, { "epoch": 0.99, "step": 62, "total_flos": 0.0, "train_loss": 0.5556549103029312, "train_runtime": 1091.242, "train_samples_per_second": 3.642, "train_steps_per_second": 0.057 } ], "logging_steps": 10, "max_steps": 62, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }