{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9968652037617555, "eval_steps": 500, "global_step": 159, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.125e-08, "logits/chosen": -2.5757241249084473, "logits/rejected": -2.6334033012390137, "logps/chosen": -158.64126586914062, "logps/rejected": -129.17214965820312, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.06, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -2.689120292663574, "logits/rejected": -2.7100415229797363, "logps/chosen": -231.9307403564453, "logps/rejected": -230.61669921875, "loss": 0.6931, "rewards/accuracies": 0.3541666567325592, "rewards/chosen": -0.005180968437343836, "rewards/margins": -0.0007737001869827509, "rewards/rejected": -0.004407268483191729, "step": 10 }, { "epoch": 0.13, "learning_rate": 4.990353313429303e-07, "logits/chosen": -2.7198684215545654, "logits/rejected": -2.677248477935791, "logps/chosen": -246.5954132080078, "logps/rejected": -250.72412109375, "loss": 0.6907, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.14306680858135223, "rewards/margins": 0.009777521714568138, "rewards/rejected": -0.15284433960914612, "step": 20 }, { "epoch": 0.19, "learning_rate": 4.882681251368548e-07, "logits/chosen": -2.576653242111206, "logits/rejected": -2.5379045009613037, "logps/chosen": -247.7720489501953, "logps/rejected": -245.41921997070312, "loss": 0.6804, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.21431183815002441, "rewards/margins": 0.036602433770895004, "rewards/rejected": -0.2509142756462097, "step": 30 }, { "epoch": 0.25, "learning_rate": 4.6604720940421207e-07, "logits/chosen": -2.4344098567962646, "logits/rejected": -2.4352052211761475, "logps/chosen": -253.05126953125, "logps/rejected": -293.6170654296875, "loss": 0.666, "rewards/accuracies": 0.59375, "rewards/chosen": -0.521602988243103, "rewards/margins": 0.07865401357412338, "rewards/rejected": -0.6002570390701294, "step": 40 }, { "epoch": 0.31, "learning_rate": 4.3344075855595097e-07, "logits/chosen": -2.3450260162353516, "logits/rejected": -2.3353271484375, "logps/chosen": -307.8914794921875, "logps/rejected": -316.11505126953125, "loss": 0.6678, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8211005926132202, "rewards/margins": 0.08438173681497574, "rewards/rejected": -0.9054821729660034, "step": 50 }, { "epoch": 0.38, "learning_rate": 3.920161866827889e-07, "logits/chosen": -2.2511789798736572, "logits/rejected": -2.2335622310638428, "logps/chosen": -282.9967956542969, "logps/rejected": -303.55426025390625, "loss": 0.6582, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.6935502886772156, "rewards/margins": 0.16655965149402618, "rewards/rejected": -0.8601099848747253, "step": 60 }, { "epoch": 0.44, "learning_rate": 3.4376480090239047e-07, "logits/chosen": -2.2453956604003906, "logits/rejected": -2.226074457168579, "logps/chosen": -286.8125915527344, "logps/rejected": -300.02789306640625, "loss": 0.665, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6816620826721191, "rewards/margins": 0.20877805352210999, "rewards/rejected": -0.8904401659965515, "step": 70 }, { "epoch": 0.5, "learning_rate": 2.910060778827554e-07, "logits/chosen": -2.2295961380004883, "logits/rejected": -2.209394931793213, "logps/chosen": -282.4059753417969, "logps/rejected": -335.4136657714844, "loss": 0.6747, "rewards/accuracies": 0.625, "rewards/chosen": -0.8680456876754761, "rewards/margins": 0.1932743787765503, "rewards/rejected": -1.0613200664520264, "step": 80 }, { "epoch": 0.56, "learning_rate": 2.3627616503391812e-07, "logits/chosen": -2.2832415103912354, "logits/rejected": -2.2923521995544434, "logps/chosen": -280.1626892089844, "logps/rejected": -269.5970153808594, "loss": 0.6566, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.7220357656478882, "rewards/margins": 0.12294892966747284, "rewards/rejected": -0.8449847102165222, "step": 90 }, { "epoch": 0.63, "learning_rate": 1.8220596619089573e-07, "logits/chosen": -2.2097229957580566, "logits/rejected": -2.204942464828491, "logps/chosen": -277.9078674316406, "logps/rejected": -311.76177978515625, "loss": 0.6557, "rewards/accuracies": 0.625, "rewards/chosen": -0.695646345615387, "rewards/margins": 0.10921863466501236, "rewards/rejected": -0.8048648834228516, "step": 100 }, { "epoch": 0.69, "learning_rate": 1.3139467229135998e-07, "logits/chosen": -2.2849056720733643, "logits/rejected": -2.301518201828003, "logps/chosen": -298.0511474609375, "logps/rejected": -290.65155029296875, "loss": 0.6557, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.8137847781181335, "rewards/margins": 0.09719870239496231, "rewards/rejected": -0.9109834432601929, "step": 110 }, { "epoch": 0.75, "learning_rate": 8.628481651367875e-08, "logits/chosen": -2.277188777923584, "logits/rejected": -2.2617883682250977, "logps/chosen": -308.5967102050781, "logps/rejected": -313.9825134277344, "loss": 0.6536, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.7052074670791626, "rewards/margins": 0.12203893810510635, "rewards/rejected": -0.827246367931366, "step": 120 }, { "epoch": 0.82, "learning_rate": 4.904486005914027e-08, "logits/chosen": -2.3243308067321777, "logits/rejected": -2.3333945274353027, "logps/chosen": -274.1197204589844, "logps/rejected": -313.65740966796875, "loss": 0.6559, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.615770161151886, "rewards/margins": 0.12384297698736191, "rewards/rejected": -0.7396131753921509, "step": 130 }, { "epoch": 0.88, "learning_rate": 2.1464952759020856e-08, "logits/chosen": -2.321314573287964, "logits/rejected": -2.318671703338623, "logps/chosen": -298.8154602050781, "logps/rejected": -320.1452331542969, "loss": 0.6433, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.7064296007156372, "rewards/margins": 0.19559960067272186, "rewards/rejected": -0.9020291566848755, "step": 140 }, { "epoch": 0.94, "learning_rate": 4.8708793644441086e-09, "logits/chosen": -2.2854580879211426, "logits/rejected": -2.2632009983062744, "logps/chosen": -282.5638732910156, "logps/rejected": -272.4760437011719, "loss": 0.6327, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.6732932925224304, "rewards/margins": 0.1675054430961609, "rewards/rejected": -0.8407986760139465, "step": 150 }, { "epoch": 1.0, "step": 159, "total_flos": 0.0, "train_loss": 0.6621600247029239, "train_runtime": 2660.7412, "train_samples_per_second": 7.659, "train_steps_per_second": 0.06 } ], "logging_steps": 10, "max_steps": 159, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }