|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9968652037617555, |
|
"eval_steps": 500, |
|
"global_step": 159, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.5757241249084473, |
|
"logits/rejected": -2.6334033012390137, |
|
"logps/chosen": -158.64126586914062, |
|
"logps/rejected": -129.17214965820312, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.689120292663574, |
|
"logits/rejected": -2.7100415229797363, |
|
"logps/chosen": -231.9307403564453, |
|
"logps/rejected": -230.61669921875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.3541666567325592, |
|
"rewards/chosen": -0.005180968437343836, |
|
"rewards/margins": -0.0007737001869827509, |
|
"rewards/rejected": -0.004407268483191729, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -2.7198684215545654, |
|
"logits/rejected": -2.677248477935791, |
|
"logps/chosen": -246.5954132080078, |
|
"logps/rejected": -250.72412109375, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.14306680858135223, |
|
"rewards/margins": 0.009777521714568138, |
|
"rewards/rejected": -0.15284433960914612, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -2.576653242111206, |
|
"logits/rejected": -2.5379045009613037, |
|
"logps/chosen": -247.7720489501953, |
|
"logps/rejected": -245.41921997070312, |
|
"loss": 0.6804, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.21431183815002441, |
|
"rewards/margins": 0.036602433770895004, |
|
"rewards/rejected": -0.2509142756462097, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -2.4344098567962646, |
|
"logits/rejected": -2.4352052211761475, |
|
"logps/chosen": -253.05126953125, |
|
"logps/rejected": -293.6170654296875, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.521602988243103, |
|
"rewards/margins": 0.07865401357412338, |
|
"rewards/rejected": -0.6002570390701294, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -2.3450260162353516, |
|
"logits/rejected": -2.3353271484375, |
|
"logps/chosen": -307.8914794921875, |
|
"logps/rejected": -316.11505126953125, |
|
"loss": 0.6678, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8211005926132202, |
|
"rewards/margins": 0.08438173681497574, |
|
"rewards/rejected": -0.9054821729660034, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -2.2511789798736572, |
|
"logits/rejected": -2.2335622310638428, |
|
"logps/chosen": -282.9967956542969, |
|
"logps/rejected": -303.55426025390625, |
|
"loss": 0.6582, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.6935502886772156, |
|
"rewards/margins": 0.16655965149402618, |
|
"rewards/rejected": -0.8601099848747253, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -2.2453956604003906, |
|
"logits/rejected": -2.226074457168579, |
|
"logps/chosen": -286.8125915527344, |
|
"logps/rejected": -300.02789306640625, |
|
"loss": 0.665, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6816620826721191, |
|
"rewards/margins": 0.20877805352210999, |
|
"rewards/rejected": -0.8904401659965515, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -2.2295961380004883, |
|
"logits/rejected": -2.209394931793213, |
|
"logps/chosen": -282.4059753417969, |
|
"logps/rejected": -335.4136657714844, |
|
"loss": 0.6747, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8680456876754761, |
|
"rewards/margins": 0.1932743787765503, |
|
"rewards/rejected": -1.0613200664520264, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": -2.2832415103912354, |
|
"logits/rejected": -2.2923521995544434, |
|
"logps/chosen": -280.1626892089844, |
|
"logps/rejected": -269.5970153808594, |
|
"loss": 0.6566, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.7220357656478882, |
|
"rewards/margins": 0.12294892966747284, |
|
"rewards/rejected": -0.8449847102165222, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -2.2097229957580566, |
|
"logits/rejected": -2.204942464828491, |
|
"logps/chosen": -277.9078674316406, |
|
"logps/rejected": -311.76177978515625, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.695646345615387, |
|
"rewards/margins": 0.10921863466501236, |
|
"rewards/rejected": -0.8048648834228516, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -2.2849056720733643, |
|
"logits/rejected": -2.301518201828003, |
|
"logps/chosen": -298.0511474609375, |
|
"logps/rejected": -290.65155029296875, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.8137847781181335, |
|
"rewards/margins": 0.09719870239496231, |
|
"rewards/rejected": -0.9109834432601929, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -2.277188777923584, |
|
"logits/rejected": -2.2617883682250977, |
|
"logps/chosen": -308.5967102050781, |
|
"logps/rejected": -313.9825134277344, |
|
"loss": 0.6536, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.7052074670791626, |
|
"rewards/margins": 0.12203893810510635, |
|
"rewards/rejected": -0.827246367931366, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -2.3243308067321777, |
|
"logits/rejected": -2.3333945274353027, |
|
"logps/chosen": -274.1197204589844, |
|
"logps/rejected": -313.65740966796875, |
|
"loss": 0.6559, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.615770161151886, |
|
"rewards/margins": 0.12384297698736191, |
|
"rewards/rejected": -0.7396131753921509, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": -2.321314573287964, |
|
"logits/rejected": -2.318671703338623, |
|
"logps/chosen": -298.8154602050781, |
|
"logps/rejected": -320.1452331542969, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7064296007156372, |
|
"rewards/margins": 0.19559960067272186, |
|
"rewards/rejected": -0.9020291566848755, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -2.2854580879211426, |
|
"logits/rejected": -2.2632009983062744, |
|
"logps/chosen": -282.5638732910156, |
|
"logps/rejected": -272.4760437011719, |
|
"loss": 0.6327, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.6732932925224304, |
|
"rewards/margins": 0.1675054430961609, |
|
"rewards/rejected": -0.8407986760139465, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 159, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6621600247029239, |
|
"train_runtime": 2660.7412, |
|
"train_samples_per_second": 7.659, |
|
"train_steps_per_second": 0.06 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|