{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 45, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "grad_norm": 14.650781993147133, "learning_rate": 1e-07, "logits/chosen": -1.8544178009033203, "logits/rejected": -1.5131595134735107, "logps/chosen": -382.39324951171875, "logps/rejected": -74.34650421142578, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.67, "grad_norm": 13.355694493789834, "learning_rate": 4.809698831278217e-07, "logits/chosen": -1.7981030941009521, "logits/rejected": -1.8637254238128662, "logps/chosen": -297.70697021484375, "logps/rejected": -91.71724700927734, "loss": 0.6887, "rewards/accuracies": 0.625, "rewards/chosen": 0.0073351990431547165, "rewards/margins": 0.007683979347348213, "rewards/rejected": -0.000348779110936448, "step": 10 }, { "epoch": 1.33, "grad_norm": 10.957160162463524, "learning_rate": 3.4567085809127245e-07, "logits/chosen": -1.7605440616607666, "logits/rejected": -1.6980727910995483, "logps/chosen": -317.1847229003906, "logps/rejected": -89.17781066894531, "loss": 0.6424, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": 0.10211040079593658, "rewards/margins": 0.10564006865024567, "rewards/rejected": -0.003529661800712347, "step": 20 }, { "epoch": 2.0, "grad_norm": 9.631401952273302, "learning_rate": 1.5432914190872756e-07, "logits/chosen": -1.7424694299697876, "logits/rejected": -1.690342903137207, "logps/chosen": -304.099609375, "logps/rejected": -91.2957763671875, "loss": 0.5679, "rewards/accuracies": 1.0, "rewards/chosen": 0.263149231672287, "rewards/margins": 0.2803087830543518, "rewards/rejected": -0.017159538343548775, "step": 30 }, { "epoch": 2.67, "grad_norm": 9.39281581673066, "learning_rate": 1.9030116872178314e-08, "logits/chosen": -1.7320054769515991, "logits/rejected": -1.6545593738555908, "logps/chosen": -303.2212829589844, "logps/rejected": -99.20294189453125, "loss": 0.5282, "rewards/accuracies": 1.0, "rewards/chosen": 0.36085695028305054, "rewards/margins": 0.40523195266723633, "rewards/rejected": -0.04437502473592758, "step": 40 }, { "epoch": 3.0, "step": 45, "total_flos": 0.0, "train_loss": 0.5981406688690185, "train_runtime": 346.5059, "train_samples_per_second": 16.32, "train_steps_per_second": 0.13 } ], "logging_steps": 10, "max_steps": 45, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }