{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 96, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 5e-08, "logits/chosen": -2.848330020904541, "logits/rejected": -2.834825038909912, "logps/chosen": -287.69622802734375, "logps/rejected": -255.33815002441406, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.1, "learning_rate": 5e-07, "logits/chosen": -2.8012733459472656, "logits/rejected": -2.753434658050537, "logps/chosen": -244.99864196777344, "logps/rejected": -200.799560546875, "loss": 0.6926, "rewards/accuracies": 0.5069444179534912, "rewards/chosen": 0.0004034777812194079, "rewards/margins": 0.0008078858954831958, "rewards/rejected": -0.00040440805605612695, "step": 10 }, { "epoch": 0.21, "learning_rate": 4.83504027183137e-07, "logits/chosen": -2.7793192863464355, "logits/rejected": -2.7794222831726074, "logps/chosen": -267.78729248046875, "logps/rejected": -250.0585174560547, "loss": 0.6825, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.021286360919475555, "rewards/margins": 0.027004068717360497, "rewards/rejected": -0.00571770966053009, "step": 20 }, { "epoch": 0.31, "learning_rate": 4.3619304568594546e-07, "logits/chosen": -2.7962687015533447, "logits/rejected": -2.7896554470062256, "logps/chosen": -278.6320495605469, "logps/rejected": -262.569580078125, "loss": 0.6661, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.0014000388327986002, "rewards/margins": 0.0615326389670372, "rewards/rejected": -0.060132600367069244, "step": 30 }, { "epoch": 0.42, "learning_rate": 3.643105808261596e-07, "logits/chosen": -2.8134655952453613, "logits/rejected": -2.801830768585205, "logps/chosen": -290.64794921875, "logps/rejected": -259.28704833984375, "loss": 0.6455, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.10460182279348373, "rewards/margins": 0.1050100103020668, "rewards/rejected": -0.20961184799671173, "step": 40 }, { "epoch": 0.52, "learning_rate": 2.7734280209446865e-07, "logits/chosen": -2.7795636653900146, "logits/rejected": -2.7554478645324707, "logps/chosen": -303.3760986328125, "logps/rejected": -283.14141845703125, "loss": 0.6247, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.15783920884132385, "rewards/margins": 0.1601353883743286, "rewards/rejected": -0.3179745674133301, "step": 50 }, { "epoch": 0.62, "learning_rate": 1.8676665440207977e-07, "logits/chosen": -2.773420810699463, "logits/rejected": -2.792323589324951, "logps/chosen": -319.9610900878906, "logps/rejected": -301.3622741699219, "loss": 0.6152, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.33203256130218506, "rewards/margins": 0.21394209563732147, "rewards/rejected": -0.5459746718406677, "step": 60 }, { "epoch": 0.73, "learning_rate": 1.0453527111051183e-07, "logits/chosen": -2.770045518875122, "logits/rejected": -2.764028549194336, "logps/chosen": -324.46160888671875, "logps/rejected": -320.1623840332031, "loss": 0.5972, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.2613491415977478, "rewards/margins": 0.3139723539352417, "rewards/rejected": -0.5753214955329895, "step": 70 }, { "epoch": 0.83, "learning_rate": 4.1500545527530544e-08, "logits/chosen": -2.8060431480407715, "logits/rejected": -2.773552417755127, "logps/chosen": -326.8184509277344, "logps/rejected": -345.7678527832031, "loss": 0.589, "rewards/accuracies": 0.71875, "rewards/chosen": -0.369906485080719, "rewards/margins": 0.32024139165878296, "rewards/rejected": -0.690147876739502, "step": 80 }, { "epoch": 0.94, "learning_rate": 5.981030611018234e-09, "logits/chosen": -2.753633499145508, "logits/rejected": -2.697878360748291, "logps/chosen": -304.6568603515625, "logps/rejected": -340.0152893066406, "loss": 0.6002, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.35058873891830444, "rewards/margins": 0.34887048602104187, "rewards/rejected": -0.6994592547416687, "step": 90 }, { "epoch": 1.0, "step": 96, "total_flos": 0.0, "train_loss": 0.6312613611419996, "train_runtime": 1566.6754, "train_samples_per_second": 7.804, "train_steps_per_second": 0.061 } ], "logging_steps": 10, "max_steps": 96, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }