{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9952038369304557, "eval_steps": 100, "global_step": 83, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011990407673860911, "grad_norm": 11.655411235528401, "learning_rate": 5.555555555555555e-08, "logits/chosen": -1.875, "logits/rejected": -1.2421875, "logps/chosen": -620.0, "logps/rejected": -476.0, "loss": 0.6961, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": 0.00099945068359375, "rewards/margins": 0.0050048828125, "rewards/rejected": -0.003997802734375, "step": 1 }, { "epoch": 0.11990407673860912, "grad_norm": 9.7437445581163, "learning_rate": 4.997747415511704e-07, "logits/chosen": -1.765625, "logits/rejected": -1.2890625, "logps/chosen": -396.0, "logps/rejected": -402.0, "loss": 0.6931, "rewards/accuracies": 0.3222222328186035, "rewards/chosen": 0.0025177001953125, "rewards/margins": -0.0025634765625, "rewards/rejected": 0.005096435546875, "step": 10 }, { "epoch": 0.23980815347721823, "grad_norm": 7.4219523058604535, "learning_rate": 4.732314645373921e-07, "logits/chosen": -1.8203125, "logits/rejected": -1.21875, "logps/chosen": -418.0, "logps/rejected": -400.0, "loss": 0.6729, "rewards/accuracies": 0.5699999928474426, "rewards/chosen": 0.1279296875, "rewards/margins": 0.041015625, "rewards/rejected": 0.08642578125, "step": 20 }, { "epoch": 0.3597122302158273, "grad_norm": 6.559255309529087, "learning_rate": 4.070549993239106e-07, "logits/chosen": -1.859375, "logits/rejected": -1.2421875, "logps/chosen": -362.0, "logps/rejected": -394.0, "loss": 0.6572, "rewards/accuracies": 0.7099999785423279, "rewards/chosen": 0.212890625, "rewards/margins": 0.1142578125, "rewards/rejected": 0.09814453125, "step": 30 }, { "epoch": 0.47961630695443647, "grad_norm": 11.071096096929814, "learning_rate": 3.129945153462813e-07, "logits/chosen": -1.8515625, "logits/rejected": -1.2421875, "logps/chosen": -406.0, "logps/rejected": -398.0, "loss": 0.6418, "rewards/accuracies": 0.7799999713897705, "rewards/chosen": 0.24609375, "rewards/margins": 0.1376953125, "rewards/rejected": 0.10791015625, "step": 40 }, { "epoch": 0.5995203836930456, "grad_norm": 8.025564513560811, "learning_rate": 2.0774979491953776e-07, "logits/chosen": -1.7890625, "logits/rejected": -1.359375, "logps/chosen": -382.0, "logps/rejected": -378.0, "loss": 0.6333, "rewards/accuracies": 0.7699999809265137, "rewards/chosen": 0.2275390625, "rewards/margins": 0.15234375, "rewards/rejected": 0.07568359375, "step": 50 }, { "epoch": 0.7194244604316546, "grad_norm": 9.025744560928734, "learning_rate": 1.1000630346560116e-07, "logits/chosen": -1.8515625, "logits/rejected": -1.3125, "logps/chosen": -376.0, "logps/rejected": -388.0, "loss": 0.6318, "rewards/accuracies": 0.7300000190734863, "rewards/chosen": 0.1904296875, "rewards/margins": 0.13671875, "rewards/rejected": 0.05419921875, "step": 60 }, { "epoch": 0.8393285371702638, "grad_norm": 6.592935931022897, "learning_rate": 3.7117715566672176e-08, "logits/chosen": -1.71875, "logits/rejected": -1.2890625, "logps/chosen": -356.0, "logps/rejected": -370.0, "loss": 0.6223, "rewards/accuracies": 0.7599999904632568, "rewards/chosen": 0.11865234375, "rewards/margins": 0.0947265625, "rewards/rejected": 0.0242919921875, "step": 70 }, { "epoch": 0.9592326139088729, "grad_norm": 7.695842566054067, "learning_rate": 2.0248911852807917e-09, "logits/chosen": -1.90625, "logits/rejected": -1.3359375, "logps/chosen": -394.0, "logps/rejected": -390.0, "loss": 0.6231, "rewards/accuracies": 0.7599999904632568, "rewards/chosen": 0.2099609375, "rewards/margins": 0.189453125, "rewards/rejected": 0.020263671875, "step": 80 }, { "epoch": 0.9952038369304557, "step": 83, "total_flos": 0.0, "train_loss": 0.6455191000398383, "train_runtime": 1327.6883, "train_samples_per_second": 3.765, "train_steps_per_second": 0.063 } ], "logging_steps": 10, "max_steps": 83, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }