|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9968652037617555, |
|
"eval_steps": 500, |
|
"global_step": 159, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-09, |
|
"logits/chosen": -1.9591341018676758, |
|
"logits/rejected": -2.0234761238098145, |
|
"logps/chosen": -395.7680969238281, |
|
"logps/rejected": -380.58642578125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.063248872756958, |
|
"logits/rejected": -1.960920810699463, |
|
"logps/chosen": -262.19256591796875, |
|
"logps/rejected": -326.22943115234375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": -0.0009140498586930335, |
|
"rewards/margins": -0.0010445532388985157, |
|
"rewards/rejected": 0.00013050338020548224, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990353313429303e-08, |
|
"logits/chosen": -2.0558881759643555, |
|
"logits/rejected": -2.0031511783599854, |
|
"logps/chosen": -240.96337890625, |
|
"logps/rejected": -371.1289978027344, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.006654644850641489, |
|
"rewards/margins": 0.0035710707306861877, |
|
"rewards/rejected": -0.010225716046988964, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8826812513685484e-08, |
|
"logits/chosen": -2.042896270751953, |
|
"logits/rejected": -1.9954335689544678, |
|
"logps/chosen": -274.43927001953125, |
|
"logps/rejected": -347.6334228515625, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.02912921831011772, |
|
"rewards/margins": 0.024064257740974426, |
|
"rewards/rejected": -0.053193479776382446, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6604720940421204e-08, |
|
"logits/chosen": -2.079772472381592, |
|
"logits/rejected": -1.9894037246704102, |
|
"logps/chosen": -259.71014404296875, |
|
"logps/rejected": -420.01416015625, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0546238012611866, |
|
"rewards/margins": 0.0798250287771225, |
|
"rewards/rejected": -0.1344488114118576, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3344075855595095e-08, |
|
"logits/chosen": -2.148991346359253, |
|
"logits/rejected": -2.0234665870666504, |
|
"logps/chosen": -257.0783996582031, |
|
"logps/rejected": -427.166259765625, |
|
"loss": 0.6505, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.052711982280015945, |
|
"rewards/margins": 0.15193995833396912, |
|
"rewards/rejected": -0.20465192198753357, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920161866827889e-08, |
|
"logits/chosen": -2.040611743927002, |
|
"logits/rejected": -1.9959065914154053, |
|
"logps/chosen": -268.3548278808594, |
|
"logps/rejected": -405.2843933105469, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.08892913907766342, |
|
"rewards/margins": 0.15090619027614594, |
|
"rewards/rejected": -0.23983530700206757, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.437648009023905e-08, |
|
"logits/chosen": -1.9940481185913086, |
|
"logits/rejected": -1.9221343994140625, |
|
"logps/chosen": -264.3708190917969, |
|
"logps/rejected": -402.1899108886719, |
|
"loss": 0.6339, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.10763730853796005, |
|
"rewards/margins": 0.19921264052391052, |
|
"rewards/rejected": -0.30684995651245117, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9100607788275543e-08, |
|
"logits/chosen": -2.079116106033325, |
|
"logits/rejected": -2.0244381427764893, |
|
"logps/chosen": -296.0696716308594, |
|
"logps/rejected": -376.6617431640625, |
|
"loss": 0.6396, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.13822747766971588, |
|
"rewards/margins": 0.16332195699214935, |
|
"rewards/rejected": -0.30154943466186523, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.362761650339181e-08, |
|
"logits/chosen": -2.067830801010132, |
|
"logits/rejected": -1.9739353656768799, |
|
"logps/chosen": -278.68927001953125, |
|
"logps/rejected": -415.96826171875, |
|
"loss": 0.6306, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.13140961527824402, |
|
"rewards/margins": 0.2424076348543167, |
|
"rewards/rejected": -0.3738172650337219, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089574e-08, |
|
"logits/chosen": -2.0384275913238525, |
|
"logits/rejected": -1.9769903421401978, |
|
"logps/chosen": -293.6415100097656, |
|
"logps/rejected": -422.6812438964844, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.14798042178153992, |
|
"rewards/margins": 0.2610620856285095, |
|
"rewards/rejected": -0.4090425372123718, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135997e-08, |
|
"logits/chosen": -2.034789562225342, |
|
"logits/rejected": -1.9853355884552002, |
|
"logps/chosen": -291.19439697265625, |
|
"logps/rejected": -387.21539306640625, |
|
"loss": 0.6413, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.12260253727436066, |
|
"rewards/margins": 0.20887689292430878, |
|
"rewards/rejected": -0.3314794600009918, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367876e-09, |
|
"logits/chosen": -2.036113977432251, |
|
"logits/rejected": -1.9962167739868164, |
|
"logps/chosen": -259.037353515625, |
|
"logps/rejected": -444.01019287109375, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.12251874059438705, |
|
"rewards/margins": 0.33078280091285706, |
|
"rewards/rejected": -0.45330148935317993, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-09, |
|
"logits/chosen": -2.035203218460083, |
|
"logits/rejected": -1.9533179998397827, |
|
"logps/chosen": -272.2509460449219, |
|
"logps/rejected": -424.2042541503906, |
|
"loss": 0.6395, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.09688977152109146, |
|
"rewards/margins": 0.31916412711143494, |
|
"rewards/rejected": -0.4160539209842682, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020853e-09, |
|
"logits/chosen": -2.0237715244293213, |
|
"logits/rejected": -1.960404396057129, |
|
"logps/chosen": -278.28302001953125, |
|
"logps/rejected": -391.74505615234375, |
|
"loss": 0.6212, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.13397958874702454, |
|
"rewards/margins": 0.21643836796283722, |
|
"rewards/rejected": -0.35041797161102295, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.870879364444108e-10, |
|
"logits/chosen": -2.026533603668213, |
|
"logits/rejected": -1.9909785985946655, |
|
"logps/chosen": -286.67529296875, |
|
"logps/rejected": -433.8912658691406, |
|
"loss": 0.6425, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.10987748950719833, |
|
"rewards/margins": 0.24108321964740753, |
|
"rewards/rejected": -0.35096070170402527, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 159, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6457447525840135, |
|
"train_runtime": 2659.443, |
|
"train_samples_per_second": 7.663, |
|
"train_steps_per_second": 0.06 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|