{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.984, "eval_steps": 100, "global_step": 124, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 3.846153846153847e-07, "logits/chosen": 0.09327474981546402, "logits/rejected": 0.0010143294930458069, "logps/chosen": -192.48846435546875, "logps/rejected": -100.94705200195312, "loss": 0.0102, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.16, "learning_rate": 3.846153846153847e-06, "logits/chosen": 0.1767091602087021, "logits/rejected": 0.07442302256822586, "logps/chosen": -181.4591522216797, "logps/rejected": -162.86940002441406, "loss": 0.0101, "rewards/accuracies": 0.3958333432674408, "rewards/chosen": -0.001088320161215961, "rewards/margins": 0.0011313353898003697, "rewards/rejected": -0.0022196555510163307, "step": 10 }, { "epoch": 0.32, "learning_rate": 4.951096619903317e-06, "logits/chosen": 0.22487369179725647, "logits/rejected": 0.2731098234653473, "logps/chosen": -178.03126525878906, "logps/rejected": -134.20669555664062, "loss": 0.0102, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -0.0006419686833396554, "rewards/margins": 0.000687784340698272, "rewards/rejected": -0.0013297529658302665, "step": 20 }, { "epoch": 0.48, "learning_rate": 4.716164218065246e-06, "logits/chosen": 0.2530214190483093, "logits/rejected": 0.23770615458488464, "logps/chosen": -171.042724609375, "logps/rejected": -149.58749389648438, "loss": 0.0104, "rewards/accuracies": 0.375, "rewards/chosen": -0.0013910250272601843, "rewards/margins": -0.00043516140431165695, "rewards/rejected": -0.0009558637393638492, "step": 30 }, { "epoch": 0.64, "learning_rate": 4.3048902348863116e-06, "logits/chosen": 0.2087957113981247, "logits/rejected": 0.2569302022457123, "logps/chosen": -174.41390991210938, "logps/rejected": -153.81405639648438, "loss": 0.0105, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": 2.615232915559318e-05, "rewards/margins": -0.0007305769249796867, "rewards/rejected": 0.000756729394197464, "step": 40 }, { "epoch": 0.8, "learning_rate": 3.7500000000000005e-06, "logits/chosen": 0.22499620914459229, "logits/rejected": 0.17940528690814972, "logps/chosen": -198.2622833251953, "logps/rejected": -162.13284301757812, "loss": 0.0105, "rewards/accuracies": 0.3687500059604645, "rewards/chosen": 0.0005174060934223235, "rewards/margins": -0.0008635882404632866, "rewards/rejected": 0.0013809942174702883, "step": 50 }, { "epoch": 0.96, "learning_rate": 3.0956464785579125e-06, "logits/chosen": 0.2462855875492096, "logits/rejected": 0.2873149812221527, "logps/chosen": -191.82350158691406, "logps/rejected": -158.4237823486328, "loss": 0.0101, "rewards/accuracies": 0.40625, "rewards/chosen": -0.0009249815484508872, "rewards/margins": 0.0009990528924390674, "rewards/rejected": -0.0019240345573052764, "step": 60 }, { "epoch": 1.12, "learning_rate": 2.39389699200963e-06, "logits/chosen": 0.24232041835784912, "logits/rejected": 0.14627447724342346, "logps/chosen": -198.8780059814453, "logps/rejected": -164.05284118652344, "loss": 0.0101, "rewards/accuracies": 0.39375001192092896, "rewards/chosen": 0.000793910410720855, "rewards/margins": 0.001237305928952992, "rewards/rejected": -0.0004433956928551197, "step": 70 }, { "epoch": 1.28, "learning_rate": 1.700590188571887e-06, "logits/chosen": 0.13903482258319855, "logits/rejected": 0.18899227678775787, "logps/chosen": -202.53575134277344, "logps/rejected": -162.55294799804688, "loss": 0.0101, "rewards/accuracies": 0.39375001192092896, "rewards/chosen": 4.065161192556843e-05, "rewards/margins": 0.0009858234552666545, "rewards/rejected": -0.0009451720979996026, "step": 80 }, { "epoch": 1.44, "learning_rate": 1.0708929268538034e-06, "logits/chosen": 0.2650024890899658, "logits/rejected": 0.17201000452041626, "logps/chosen": -175.6930694580078, "logps/rejected": -153.9431915283203, "loss": 0.0101, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.00013711625069845468, "rewards/margins": 0.0010541726369410753, "rewards/rejected": -0.001191288698464632, "step": 90 }, { "epoch": 1.6, "learning_rate": 5.549106142039018e-07, "logits/chosen": 0.1846569925546646, "logits/rejected": 0.1680203378200531, "logps/chosen": -177.53756713867188, "logps/rejected": -143.4310302734375, "loss": 0.0103, "rewards/accuracies": 0.3687500059604645, "rewards/chosen": -0.0002936289238277823, "rewards/margins": 0.00016203436825890094, "rewards/rejected": -0.0004556631320156157, "step": 100 }, { "epoch": 1.6, "eval_logits/chosen": -0.010850394144654274, "eval_logits/rejected": 0.0866025984287262, "eval_logps/chosen": -306.3062438964844, "eval_logps/rejected": -278.57757568359375, "eval_loss": 0.010814609937369823, "eval_rewards/accuracies": 0.5005000233650208, "eval_rewards/chosen": 0.0006301876856014132, "eval_rewards/margins": -2.8262209525564685e-05, "eval_rewards/rejected": 0.00065844994969666, "eval_runtime": 840.0738, "eval_samples_per_second": 2.381, "eval_steps_per_second": 0.595, "step": 100 }, { "epoch": 1.76, "learning_rate": 1.937002879188285e-07, "logits/chosen": 0.2905462980270386, "logits/rejected": 0.20439067482948303, "logps/chosen": -172.76882934570312, "logps/rejected": -152.1476593017578, "loss": 0.0102, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.0004940209328196943, "rewards/margins": 0.0009124716743826866, "rewards/rejected": -0.0014064926654100418, "step": 110 }, { "epoch": 1.92, "learning_rate": 1.6003680950742728e-08, "logits/chosen": 0.1698417365550995, "logits/rejected": 0.17967729270458221, "logps/chosen": -172.40513610839844, "logps/rejected": -144.2118377685547, "loss": 0.0103, "rewards/accuracies": 0.4375, "rewards/chosen": 0.0004940934595651925, "rewards/margins": 0.0001839537435444072, "rewards/rejected": 0.00031013964326120913, "step": 120 }, { "epoch": 1.98, "step": 124, "total_flos": 0.0, "train_loss": 0.0102488252845022, "train_runtime": 3093.4739, "train_samples_per_second": 0.647, "train_steps_per_second": 0.04 } ], "logging_steps": 10, "max_steps": 124, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }