ShenaoZhang's picture
Model save
7686b64 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9968652037617555,
"eval_steps": 500,
"global_step": 159,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.125e-09,
"logits/chosen": -1.9591341018676758,
"logits/rejected": -2.0234761238098145,
"logps/chosen": -395.7680969238281,
"logps/rejected": -380.58642578125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"learning_rate": 3.125e-08,
"logits/chosen": -2.063248872756958,
"logits/rejected": -1.960920810699463,
"logps/chosen": -262.19256591796875,
"logps/rejected": -326.22943115234375,
"loss": 0.693,
"rewards/accuracies": 0.4166666567325592,
"rewards/chosen": -0.0009140498586930335,
"rewards/margins": -0.0010445532388985157,
"rewards/rejected": 0.00013050338020548224,
"step": 10
},
{
"epoch": 0.13,
"learning_rate": 4.990353313429303e-08,
"logits/chosen": -2.0558881759643555,
"logits/rejected": -2.0031511783599854,
"logps/chosen": -240.96337890625,
"logps/rejected": -371.1289978027344,
"loss": 0.6907,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.006654644850641489,
"rewards/margins": 0.0035710707306861877,
"rewards/rejected": -0.010225716046988964,
"step": 20
},
{
"epoch": 0.19,
"learning_rate": 4.8826812513685484e-08,
"logits/chosen": -2.042896270751953,
"logits/rejected": -1.9954335689544678,
"logps/chosen": -274.43927001953125,
"logps/rejected": -347.6334228515625,
"loss": 0.6812,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.02912921831011772,
"rewards/margins": 0.024064257740974426,
"rewards/rejected": -0.053193479776382446,
"step": 30
},
{
"epoch": 0.25,
"learning_rate": 4.6604720940421204e-08,
"logits/chosen": -2.079772472381592,
"logits/rejected": -1.9894037246704102,
"logps/chosen": -259.71014404296875,
"logps/rejected": -420.01416015625,
"loss": 0.6651,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.0546238012611866,
"rewards/margins": 0.0798250287771225,
"rewards/rejected": -0.1344488114118576,
"step": 40
},
{
"epoch": 0.31,
"learning_rate": 4.3344075855595095e-08,
"logits/chosen": -2.148991346359253,
"logits/rejected": -2.0234665870666504,
"logps/chosen": -257.0783996582031,
"logps/rejected": -427.166259765625,
"loss": 0.6505,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.052711982280015945,
"rewards/margins": 0.15193995833396912,
"rewards/rejected": -0.20465192198753357,
"step": 50
},
{
"epoch": 0.38,
"learning_rate": 3.920161866827889e-08,
"logits/chosen": -2.040611743927002,
"logits/rejected": -1.9959065914154053,
"logps/chosen": -268.3548278808594,
"logps/rejected": -405.2843933105469,
"loss": 0.6472,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.08892913907766342,
"rewards/margins": 0.15090619027614594,
"rewards/rejected": -0.23983530700206757,
"step": 60
},
{
"epoch": 0.44,
"learning_rate": 3.437648009023905e-08,
"logits/chosen": -1.9940481185913086,
"logits/rejected": -1.9221343994140625,
"logps/chosen": -264.3708190917969,
"logps/rejected": -402.1899108886719,
"loss": 0.6339,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.10763730853796005,
"rewards/margins": 0.19921264052391052,
"rewards/rejected": -0.30684995651245117,
"step": 70
},
{
"epoch": 0.5,
"learning_rate": 2.9100607788275543e-08,
"logits/chosen": -2.079116106033325,
"logits/rejected": -2.0244381427764893,
"logps/chosen": -296.0696716308594,
"logps/rejected": -376.6617431640625,
"loss": 0.6396,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.13822747766971588,
"rewards/margins": 0.16332195699214935,
"rewards/rejected": -0.30154943466186523,
"step": 80
},
{
"epoch": 0.56,
"learning_rate": 2.362761650339181e-08,
"logits/chosen": -2.067830801010132,
"logits/rejected": -1.9739353656768799,
"logps/chosen": -278.68927001953125,
"logps/rejected": -415.96826171875,
"loss": 0.6306,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.13140961527824402,
"rewards/margins": 0.2424076348543167,
"rewards/rejected": -0.3738172650337219,
"step": 90
},
{
"epoch": 0.63,
"learning_rate": 1.8220596619089574e-08,
"logits/chosen": -2.0384275913238525,
"logits/rejected": -1.9769903421401978,
"logps/chosen": -293.6415100097656,
"logps/rejected": -422.6812438964844,
"loss": 0.6461,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.14798042178153992,
"rewards/margins": 0.2610620856285095,
"rewards/rejected": -0.4090425372123718,
"step": 100
},
{
"epoch": 0.69,
"learning_rate": 1.3139467229135997e-08,
"logits/chosen": -2.034789562225342,
"logits/rejected": -1.9853355884552002,
"logps/chosen": -291.19439697265625,
"logps/rejected": -387.21539306640625,
"loss": 0.6413,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.12260253727436066,
"rewards/margins": 0.20887689292430878,
"rewards/rejected": -0.3314794600009918,
"step": 110
},
{
"epoch": 0.75,
"learning_rate": 8.628481651367876e-09,
"logits/chosen": -2.036113977432251,
"logits/rejected": -1.9962167739868164,
"logps/chosen": -259.037353515625,
"logps/rejected": -444.01019287109375,
"loss": 0.6243,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.12251874059438705,
"rewards/margins": 0.33078280091285706,
"rewards/rejected": -0.45330148935317993,
"step": 120
},
{
"epoch": 0.82,
"learning_rate": 4.904486005914027e-09,
"logits/chosen": -2.035203218460083,
"logits/rejected": -1.9533179998397827,
"logps/chosen": -272.2509460449219,
"logps/rejected": -424.2042541503906,
"loss": 0.6395,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.09688977152109146,
"rewards/margins": 0.31916412711143494,
"rewards/rejected": -0.4160539209842682,
"step": 130
},
{
"epoch": 0.88,
"learning_rate": 2.1464952759020853e-09,
"logits/chosen": -2.0237715244293213,
"logits/rejected": -1.960404396057129,
"logps/chosen": -278.28302001953125,
"logps/rejected": -391.74505615234375,
"loss": 0.6212,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.13397958874702454,
"rewards/margins": 0.21643836796283722,
"rewards/rejected": -0.35041797161102295,
"step": 140
},
{
"epoch": 0.94,
"learning_rate": 4.870879364444108e-10,
"logits/chosen": -2.026533603668213,
"logits/rejected": -1.9909785985946655,
"logps/chosen": -286.67529296875,
"logps/rejected": -433.8912658691406,
"loss": 0.6425,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.10987748950719833,
"rewards/margins": 0.24108321964740753,
"rewards/rejected": -0.35096070170402527,
"step": 150
},
{
"epoch": 1.0,
"step": 159,
"total_flos": 0.0,
"train_loss": 0.6457447525840135,
"train_runtime": 2659.443,
"train_samples_per_second": 7.663,
"train_steps_per_second": 0.06
}
],
"logging_steps": 10,
"max_steps": 159,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}