0.0001_idpo_same_3itersn_iter_3 / trainer_state.json
ShenaoZ's picture
Model save
375b2ad verified
raw
history blame contribute delete
No virus
9.81 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9968652037617555,
"eval_steps": 500,
"global_step": 159,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.125e-08,
"logits/chosen": -2.072941541671753,
"logits/rejected": -2.0026817321777344,
"logps/chosen": -474.7008361816406,
"logps/pi_response": -295.3243408203125,
"logps/ref_response": -295.3243408203125,
"logps/rejected": -399.129638671875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -2.0674960613250732,
"logits/rejected": -2.0091359615325928,
"logps/chosen": -293.94415283203125,
"logps/pi_response": -184.32736206054688,
"logps/ref_response": -184.39166259765625,
"logps/rejected": -331.1109924316406,
"loss": 0.6873,
"rewards/accuracies": 0.5069444179534912,
"rewards/chosen": -0.005304105579853058,
"rewards/margins": 0.014647711999714375,
"rewards/rejected": -0.019951816648244858,
"step": 10
},
{
"epoch": 0.13,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": -1.976543664932251,
"logits/rejected": -1.9456901550292969,
"logps/chosen": -287.80316162109375,
"logps/pi_response": -200.89390563964844,
"logps/ref_response": -194.6990509033203,
"logps/rejected": -415.5035095214844,
"loss": 0.6721,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.28011927008628845,
"rewards/margins": 0.24019071459770203,
"rewards/rejected": -0.5203099846839905,
"step": 20
},
{
"epoch": 0.19,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": -1.948282241821289,
"logits/rejected": -1.9048038721084595,
"logps/chosen": -322.1227111816406,
"logps/pi_response": -202.88558959960938,
"logps/ref_response": -198.0297088623047,
"logps/rejected": -393.98382568359375,
"loss": 0.6566,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.31841763854026794,
"rewards/margins": 0.2520221173763275,
"rewards/rejected": -0.5704396963119507,
"step": 30
},
{
"epoch": 0.25,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": -1.99917471408844,
"logits/rejected": -1.9424034357070923,
"logps/chosen": -306.33917236328125,
"logps/pi_response": -221.41934204101562,
"logps/ref_response": -196.04620361328125,
"logps/rejected": -473.10430908203125,
"loss": 0.6508,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.4164486825466156,
"rewards/margins": 0.34302154183387756,
"rewards/rejected": -0.7594702243804932,
"step": 40
},
{
"epoch": 0.31,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": -2.022153615951538,
"logits/rejected": -1.8780553340911865,
"logps/chosen": -300.8150634765625,
"logps/pi_response": -213.9830322265625,
"logps/ref_response": -212.6190185546875,
"logps/rejected": -455.1544494628906,
"loss": 0.6199,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.07085980474948883,
"rewards/margins": 0.4123227000236511,
"rewards/rejected": -0.48318248987197876,
"step": 50
},
{
"epoch": 0.38,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": -1.623453140258789,
"logits/rejected": -1.5108482837677002,
"logps/chosen": -327.77496337890625,
"logps/pi_response": -236.1122589111328,
"logps/ref_response": -207.8074493408203,
"logps/rejected": -478.13677978515625,
"loss": 0.6015,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.4635746479034424,
"rewards/margins": 0.47717157006263733,
"rewards/rejected": -0.9407461285591125,
"step": 60
},
{
"epoch": 0.44,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": -1.3422645330429077,
"logits/rejected": -1.035585641860962,
"logps/chosen": -311.18182373046875,
"logps/pi_response": -238.19351196289062,
"logps/ref_response": -195.33450317382812,
"logps/rejected": -481.33416748046875,
"loss": 0.5778,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.45635801553726196,
"rewards/margins": 0.5854658484458923,
"rewards/rejected": -1.0418239831924438,
"step": 70
},
{
"epoch": 0.5,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": -1.1403189897537231,
"logits/rejected": -0.7611511945724487,
"logps/chosen": -358.22308349609375,
"logps/pi_response": -249.77072143554688,
"logps/ref_response": -202.05001831054688,
"logps/rejected": -496.70916748046875,
"loss": 0.5643,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.564284086227417,
"rewards/margins": 0.598731517791748,
"rewards/rejected": -1.163015604019165,
"step": 80
},
{
"epoch": 0.56,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": -0.8958919644355774,
"logits/rejected": -0.4801406264305115,
"logps/chosen": -365.9770202636719,
"logps/pi_response": -254.03115844726562,
"logps/ref_response": -202.2326202392578,
"logps/rejected": -469.5523986816406,
"loss": 0.582,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.6682159900665283,
"rewards/margins": 0.44698458909988403,
"rewards/rejected": -1.1152006387710571,
"step": 90
},
{
"epoch": 0.63,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": -0.8831208944320679,
"logits/rejected": -0.5018015503883362,
"logps/chosen": -358.91448974609375,
"logps/pi_response": -286.5412292480469,
"logps/ref_response": -215.90811157226562,
"logps/rejected": -526.8685302734375,
"loss": 0.5697,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.6926028728485107,
"rewards/margins": 0.6388980150222778,
"rewards/rejected": -1.331500768661499,
"step": 100
},
{
"epoch": 0.69,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": -0.8882027864456177,
"logits/rejected": -0.3719862103462219,
"logps/chosen": -382.29156494140625,
"logps/pi_response": -279.39959716796875,
"logps/ref_response": -194.23623657226562,
"logps/rejected": -500.37554931640625,
"loss": 0.5822,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.8814951181411743,
"rewards/margins": 0.4973115026950836,
"rewards/rejected": -1.3788065910339355,
"step": 110
},
{
"epoch": 0.75,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": -0.7373358011245728,
"logits/rejected": -0.3627234399318695,
"logps/chosen": -384.5245666503906,
"logps/pi_response": -285.951171875,
"logps/ref_response": -195.06085205078125,
"logps/rejected": -552.2709350585938,
"loss": 0.5429,
"rewards/accuracies": 0.6875,
"rewards/chosen": -1.024389624595642,
"rewards/margins": 0.5978730916976929,
"rewards/rejected": -1.6222625970840454,
"step": 120
},
{
"epoch": 0.82,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": -0.8074722290039062,
"logits/rejected": -0.4555937349796295,
"logps/chosen": -363.5116271972656,
"logps/pi_response": -276.3110046386719,
"logps/ref_response": -198.39134216308594,
"logps/rejected": -539.1277465820312,
"loss": 0.5798,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.7972265481948853,
"rewards/margins": 0.6627088785171509,
"rewards/rejected": -1.4599354267120361,
"step": 130
},
{
"epoch": 0.88,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": -0.7802283763885498,
"logits/rejected": -0.35213881731033325,
"logps/chosen": -357.5544128417969,
"logps/pi_response": -271.56719970703125,
"logps/ref_response": -197.29266357421875,
"logps/rejected": -542.0772094726562,
"loss": 0.5593,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.7836874723434448,
"rewards/margins": 0.6558796167373657,
"rewards/rejected": -1.4395670890808105,
"step": 140
},
{
"epoch": 0.94,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": -0.9855061769485474,
"logits/rejected": -0.5764757990837097,
"logps/chosen": -366.1041564941406,
"logps/pi_response": -283.294921875,
"logps/ref_response": -215.0994873046875,
"logps/rejected": -516.2839965820312,
"loss": 0.5699,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.7308257818222046,
"rewards/margins": 0.46794968843460083,
"rewards/rejected": -1.1987755298614502,
"step": 150
},
{
"epoch": 1.0,
"step": 159,
"total_flos": 0.0,
"train_loss": 0.5937920936248587,
"train_runtime": 4171.3796,
"train_samples_per_second": 4.885,
"train_steps_per_second": 0.038
}
],
"logging_steps": 10,
"max_steps": 159,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}