{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.99581589958159, "eval_steps": 500, "global_step": 119, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 8.333333333333333e-08, "logits/chosen": -2.6852197647094727, "logits/rejected": -2.6903719902038574, "logps/chosen": -263.7275390625, "logps/rejected": -230.14215087890625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.08, "learning_rate": 8.333333333333333e-07, "logits/chosen": -2.7360000610351562, "logits/rejected": -2.735159158706665, "logps/chosen": -277.9869079589844, "logps/rejected": -253.91012573242188, "loss": 0.6916, "rewards/accuracies": 0.4791666567325592, "rewards/chosen": 0.004290024284273386, "rewards/margins": 0.002072603441774845, "rewards/rejected": 0.0022174210753291845, "step": 10 }, { "epoch": 0.17, "learning_rate": 9.862705056474794e-07, "logits/chosen": -2.764007568359375, "logits/rejected": -2.7425270080566406, "logps/chosen": -286.0954284667969, "logps/rejected": -274.6139221191406, "loss": 0.6689, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.009724309667944908, "rewards/margins": 0.05151768773794174, "rewards/rejected": -0.041793376207351685, "step": 20 }, { "epoch": 0.25, "learning_rate": 9.317841607379106e-07, "logits/chosen": -2.739412784576416, "logits/rejected": -2.731487512588501, "logps/chosen": -286.3190002441406, "logps/rejected": -291.80010986328125, "loss": 0.6266, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.25924447178840637, "rewards/margins": 0.24311673641204834, "rewards/rejected": -0.5023611783981323, "step": 30 }, { "epoch": 0.33, "learning_rate": 8.403425107745314e-07, "logits/chosen": -2.7835636138916016, "logits/rejected": -2.7717299461364746, "logps/chosen": -322.2847900390625, "logps/rejected": -343.01287841796875, "loss": 0.6088, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.4827890396118164, "rewards/margins": 0.33479487895965576, "rewards/rejected": -0.8175839185714722, "step": 40 }, { "epoch": 0.42, "learning_rate": 7.197718133561508e-07, "logits/chosen": -2.7819108963012695, "logits/rejected": -2.7709670066833496, "logps/chosen": -355.53387451171875, "logps/rejected": -347.73553466796875, "loss": 0.5815, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.5724222660064697, "rewards/margins": 0.429451048374176, "rewards/rejected": -1.001873254776001, "step": 50 }, { "epoch": 0.5, "learning_rate": 5.803914069597341e-07, "logits/chosen": -2.65649676322937, "logits/rejected": -2.6217668056488037, "logps/chosen": -364.63458251953125, "logps/rejected": -355.6944580078125, "loss": 0.5917, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.7049036026000977, "rewards/margins": 0.4729110598564148, "rewards/rejected": -1.1778147220611572, "step": 60 }, { "epoch": 0.59, "learning_rate": 4.3413050507959067e-07, "logits/chosen": -2.5201010704040527, "logits/rejected": -2.4627785682678223, "logps/chosen": -344.98284912109375, "logps/rejected": -325.82452392578125, "loss": 0.5671, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.628750741481781, "rewards/margins": 0.43215298652648926, "rewards/rejected": -1.060903787612915, "step": 70 }, { "epoch": 0.67, "learning_rate": 2.935072052698059e-07, "logits/chosen": -2.376207113265991, "logits/rejected": -2.3495357036590576, "logps/chosen": -290.0788269042969, "logps/rejected": -357.1901550292969, "loss": 0.5714, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.6383817791938782, "rewards/margins": 0.5417054891586304, "rewards/rejected": -1.1800873279571533, "step": 80 }, { "epoch": 0.75, "learning_rate": 1.7055709710194448e-07, "logits/chosen": -2.415030002593994, "logits/rejected": -2.389885187149048, "logps/chosen": -351.54156494140625, "logps/rejected": -358.383544921875, "loss": 0.5425, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.820050835609436, "rewards/margins": 0.46901077032089233, "rewards/rejected": -1.289061427116394, "step": 90 }, { "epoch": 0.84, "learning_rate": 7.580316675034254e-08, "logits/chosen": -2.3294150829315186, "logits/rejected": -2.289402484893799, "logps/chosen": -357.5911560058594, "logps/rejected": -436.22357177734375, "loss": 0.5717, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.0840901136398315, "rewards/margins": 0.5104671120643616, "rewards/rejected": -1.5945571660995483, "step": 100 }, { "epoch": 0.92, "learning_rate": 1.7355161444279343e-08, "logits/chosen": -2.406877040863037, "logits/rejected": -2.3612313270568848, "logps/chosen": -392.439697265625, "logps/rejected": -412.04833984375, "loss": 0.5647, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.9446671605110168, "rewards/margins": 0.5704779624938965, "rewards/rejected": -1.5151450634002686, "step": 110 }, { "epoch": 1.0, "step": 119, "total_flos": 0.0, "train_loss": 0.596198278314927, "train_runtime": 1970.3201, "train_samples_per_second": 7.757, "train_steps_per_second": 0.06 } ], "logging_steps": 10, "max_steps": 119, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }