{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 53, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "debug/losses": 0.342240571975708, "debug/policy_weights": 0.4937487840652466, "debug/raw_losses": 0.6931471824645996, "epoch": 0.02, "learning_rate": 8.333333333333333e-08, "logits/chosen": -2.8462421894073486, "logits/rejected": -2.8283610343933105, "logps/chosen": -274.7393798828125, "logps/rejected": -204.42575073242188, "loss": 0.3624, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "debug/losses": 0.3810771703720093, "debug/policy_weights": 0.5504893660545349, "debug/raw_losses": 0.6920116543769836, "epoch": 0.19, "learning_rate": 4.911172937635942e-07, "logits/chosen": -2.852349281311035, "logits/rejected": -2.83735990524292, "logps/chosen": -306.01458740234375, "logps/rejected": -295.93804931640625, "loss": 0.3763, "rewards/accuracies": 0.4861111044883728, "rewards/chosen": 0.0012468346394598484, "rewards/margins": 0.0023373025469481945, "rewards/rejected": -0.0010904677910730243, "step": 10 }, { "debug/losses": 0.3667130768299103, "debug/policy_weights": 0.5492504835128784, "debug/raw_losses": 0.6667538285255432, "epoch": 0.38, "learning_rate": 3.982949361823388e-07, "logits/chosen": -2.8518126010894775, "logits/rejected": -2.872077226638794, "logps/chosen": -296.2869567871094, "logps/rejected": -332.9769592285156, "loss": 0.374, "rewards/accuracies": 0.6875, "rewards/chosen": 0.017367612570524216, "rewards/margins": 0.05825704336166382, "rewards/rejected": -0.04088941961526871, "step": 20 }, { "debug/losses": 0.35409680008888245, "debug/policy_weights": 0.5632873177528381, "debug/raw_losses": 0.6232098340988159, "epoch": 0.57, "learning_rate": 2.416462557480814e-07, "logits/chosen": -2.824850559234619, "logits/rejected": -2.8103976249694824, "logps/chosen": -312.3518981933594, "logps/rejected": -323.0265197753906, "loss": 0.3551, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.014037144370377064, "rewards/margins": 0.1827748715877533, "rewards/rejected": -0.1687377393245697, "step": 30 }, { "debug/losses": 0.34752941131591797, "debug/policy_weights": 0.550245463848114, "debug/raw_losses": 0.6313939690589905, "epoch": 0.75, "learning_rate": 8.859303711029939e-08, "logits/chosen": -2.79345965385437, "logits/rejected": -2.797208547592163, "logps/chosen": -275.5638122558594, "logps/rejected": -348.8089294433594, "loss": 0.3454, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.0044286223128438, "rewards/margins": 0.20287349820137024, "rewards/rejected": -0.20730212330818176, "step": 40 }, { "debug/losses": 0.3295789361000061, "debug/policy_weights": 0.5445331335067749, "debug/raw_losses": 0.5927887558937073, "epoch": 0.94, "learning_rate": 5.009573740853313e-09, "logits/chosen": -2.829876184463501, "logits/rejected": -2.8419814109802246, "logps/chosen": -307.4209899902344, "logps/rejected": -348.12298583984375, "loss": 0.3378, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0008583018789067864, "rewards/margins": 0.31627127528190613, "rewards/rejected": -0.3154129683971405, "step": 50 }, { "epoch": 1.0, "step": 53, "total_flos": 0.0, "train_loss": 0.3564033879424041, "train_runtime": 425.1442, "train_samples_per_second": 15.877, "train_steps_per_second": 0.125 } ], "logging_steps": 10, "max_steps": 53, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "trial_name": null, "trial_params": null }