{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997382884061764, "eval_steps": 100, "global_step": 955, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -2.980285167694092, "logits/rejected": -2.87275767326355, "logps/chosen": -313.4390563964844, "logps/rejected": -236.1754150390625, "loss": 0.6931, "pred_label": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1, "use_label": 0.0 }, { "epoch": 0.1, "learning_rate": 9.953434225844005e-06, "logits/chosen": -2.8180909156799316, "logits/rejected": -2.8273613452911377, "logps/chosen": -285.13623046875, "logps/rejected": -271.66839599609375, "loss": 0.6624, "pred_label": 0.0, "rewards/accuracies": 0.6041666865348816, "rewards/chosen": 0.014434419572353363, "rewards/margins": 0.08631344139575958, "rewards/rejected": -0.07187902927398682, "step": 100, "use_label": 0.0 }, { "epoch": 0.21, "learning_rate": 8.789289871944122e-06, "logits/chosen": -2.8230364322662354, "logits/rejected": -2.8086395263671875, "logps/chosen": -278.2524108886719, "logps/rejected": -263.9921569824219, "loss": 0.5868, "pred_label": 0.0, "rewards/accuracies": 0.6949999928474426, "rewards/chosen": 0.05295524746179581, "rewards/margins": 0.39829620718955994, "rewards/rejected": -0.3453409671783447, "step": 200, "use_label": 0.0 }, { "epoch": 0.31, "learning_rate": 7.625145518044238e-06, "logits/chosen": -2.803905725479126, "logits/rejected": -2.802032232284546, "logps/chosen": -284.01385498046875, "logps/rejected": -259.5546569824219, "loss": 0.562, "pred_label": 0.0, "rewards/accuracies": 0.7056249976158142, "rewards/chosen": 0.005377008114010096, "rewards/margins": 0.5735920667648315, "rewards/rejected": -0.5682151317596436, "step": 300, "use_label": 0.0 }, { "epoch": 0.42, "learning_rate": 6.461001164144355e-06, "logits/chosen": -2.8141045570373535, "logits/rejected": -2.7911813259124756, "logps/chosen": -284.3139953613281, "logps/rejected": -269.4837951660156, "loss": 0.5527, "pred_label": 0.0, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.011648621410131454, "rewards/margins": 0.6873368620872498, "rewards/rejected": -0.6756882667541504, "step": 400, "use_label": 0.0 }, { "epoch": 0.52, "learning_rate": 5.2968568102444705e-06, "logits/chosen": -2.7915823459625244, "logits/rejected": -2.776299476623535, "logps/chosen": -269.73016357421875, "logps/rejected": -257.2498474121094, "loss": 0.5556, "pred_label": 0.0, "rewards/accuracies": 0.6862499713897705, "rewards/chosen": 0.03227977454662323, "rewards/margins": 0.5588020086288452, "rewards/rejected": -0.5265222191810608, "step": 500, "use_label": 0.0 }, { "epoch": 0.63, "learning_rate": 4.132712456344587e-06, "logits/chosen": -2.8199498653411865, "logits/rejected": -2.8022332191467285, "logps/chosen": -284.0947265625, "logps/rejected": -271.3774108886719, "loss": 0.5422, "pred_label": 0.0, "rewards/accuracies": 0.7143750190734863, "rewards/chosen": 0.11361943930387497, "rewards/margins": 0.7251341938972473, "rewards/rejected": -0.6115147471427917, "step": 600, "use_label": 0.0 }, { "epoch": 0.73, "learning_rate": 2.9685681024447033e-06, "logits/chosen": -2.8110527992248535, "logits/rejected": -2.788975477218628, "logps/chosen": -280.3959045410156, "logps/rejected": -254.49673461914062, "loss": 0.5404, "pred_label": 0.0, "rewards/accuracies": 0.7212499976158142, "rewards/chosen": 0.11637673527002335, "rewards/margins": 0.6999369263648987, "rewards/rejected": -0.5835601687431335, "step": 700, "use_label": 0.0 }, { "epoch": 0.84, "learning_rate": 1.8044237485448196e-06, "logits/chosen": -2.8146722316741943, "logits/rejected": -2.812129020690918, "logps/chosen": -287.4331359863281, "logps/rejected": -267.59161376953125, "loss": 0.5343, "pred_label": 0.0, "rewards/accuracies": 0.721875011920929, "rewards/chosen": 0.13239255547523499, "rewards/margins": 0.7503484487533569, "rewards/rejected": -0.6179558634757996, "step": 800, "use_label": 0.0 }, { "epoch": 0.94, "learning_rate": 6.402793946449361e-07, "logits/chosen": -2.8043179512023926, "logits/rejected": -2.8079681396484375, "logps/chosen": -276.8100891113281, "logps/rejected": -262.690673828125, "loss": 0.545, "pred_label": 0.0, "rewards/accuracies": 0.7193750143051147, "rewards/chosen": 0.06981150805950165, "rewards/margins": 0.7052963972091675, "rewards/rejected": -0.6354848742485046, "step": 900, "use_label": 0.0 }, { "epoch": 1.0, "eval_logits/chosen": -2.827404737472534, "eval_logits/rejected": -2.818655014038086, "eval_logps/chosen": -283.37945556640625, "eval_logps/rejected": -265.9969787597656, "eval_loss": 0.5459502935409546, "eval_pred_label": 0.0, "eval_rewards/accuracies": 0.7139999866485596, "eval_rewards/chosen": 0.08778975158929825, "eval_rewards/margins": 0.7575166821479797, "eval_rewards/rejected": -0.6697269678115845, "eval_runtime": 479.5351, "eval_samples_per_second": 4.171, "eval_steps_per_second": 0.261, "eval_use_label": 0.0, "step": 955 }, { "epoch": 1.0, "step": 955, "total_flos": 0.0, "train_loss": 0.5628191218950361, "train_runtime": 25746.1298, "train_samples_per_second": 2.375, "train_steps_per_second": 0.037 } ], "logging_steps": 100, "max_steps": 955, "num_train_epochs": 1, "save_steps": 50, "total_flos": 0.0, "trial_name": null, "trial_params": null }