llava-lora-dpo-1227lrvtail2000_sft-self-sampled-beta-0.5-lr-5e-6-avg-False-epoch-2
/
trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 1.8666666666666667, | |
"global_step": 14, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0, | |
"logps_train/chosen": -100.49485778808594, | |
"logps_train/ref_chosen": -100.5, | |
"logps_train/ref_rejected": -105.0, | |
"logps_train/rejected": -104.80752563476562, | |
"rewards_train/accuracies": 0.46875, | |
"rewards_train/chosen": -0.020498279482126236, | |
"rewards_train/margins": -0.021032828837633133, | |
"rewards_train/rejected": 0.000534549355506897, | |
"step": 0 | |
}, | |
{ | |
"epoch": 0, | |
"logps_train/chosen": -89.90950012207031, | |
"logps_train/ref_chosen": -90.0, | |
"logps_train/ref_rejected": -101.0, | |
"logps_train/rejected": -100.86872863769531, | |
"rewards_train/accuracies": 0.5, | |
"rewards_train/chosen": 0.0191262885928154, | |
"rewards_train/margins": 0.0020734891295433044, | |
"rewards_train/rejected": 0.017052799463272095, | |
"step": 0 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 5e-06, | |
"loss": 0.7043, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.13, | |
"logps_train/chosen": -104.38166809082031, | |
"logps_train/ref_chosen": -104.5, | |
"logps_train/ref_rejected": -98.0, | |
"logps_train/rejected": -97.89907836914062, | |
"rewards_train/accuracies": 0.515625, | |
"rewards_train/chosen": 0.03059956058859825, | |
"rewards_train/margins": 0.004916800186038017, | |
"rewards_train/rejected": 0.025682760402560234, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.13, | |
"logps_train/chosen": -94.88069152832031, | |
"logps_train/ref_chosen": -95.0, | |
"logps_train/ref_rejected": -95.0, | |
"logps_train/rejected": -95.29293823242188, | |
"rewards_train/accuracies": 0.546875, | |
"rewards_train/chosen": -0.009438544511795044, | |
"rewards_train/margins": -0.020688317716121674, | |
"rewards_train/rejected": 0.01124977320432663, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.27, | |
"learning_rate": 4.927354543565131e-06, | |
"loss": 0.7061, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.27, | |
"logps_train/chosen": -82.45372009277344, | |
"logps_train/ref_chosen": -82.5, | |
"logps_train/ref_rejected": -89.0, | |
"logps_train/rejected": -89.00653839111328, | |
"rewards_train/accuracies": 0.5625, | |
"rewards_train/chosen": 0.0465165413916111, | |
"rewards_train/margins": 0.0453295623883605, | |
"rewards_train/rejected": 0.001186979003250599, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.27, | |
"logps_train/chosen": -100.2186508178711, | |
"logps_train/ref_chosen": -100.0, | |
"logps_train/ref_rejected": -106.0, | |
"logps_train/rejected": -106.00666809082031, | |
"rewards_train/accuracies": 0.59375, | |
"rewards_train/chosen": -0.029247179627418518, | |
"rewards_train/margins": -0.0325072705745697, | |
"rewards_train/rejected": 0.003260090947151184, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.4, | |
"learning_rate": 4.7136400641330245e-06, | |
"loss": 0.6969, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.4, | |
"logps_train/chosen": -85.843994140625, | |
"logps_train/ref_chosen": -86.0, | |
"logps_train/ref_rejected": -90.0, | |
"logps_train/rejected": -90.01055908203125, | |
"rewards_train/accuracies": 0.5625, | |
"rewards_train/chosen": 0.0008565783500671387, | |
"rewards_train/margins": -0.012054374441504478, | |
"rewards_train/rejected": 0.012910952791571617, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.4, | |
"logps_train/chosen": -92.26002502441406, | |
"logps_train/ref_chosen": -92.0, | |
"logps_train/ref_rejected": -77.5, | |
"logps_train/rejected": -77.61531829833984, | |
"rewards_train/accuracies": 0.546875, | |
"rewards_train/chosen": 0.024773243814706802, | |
"rewards_train/margins": 0.04251495748758316, | |
"rewards_train/rejected": -0.017741713672876358, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.53, | |
"learning_rate": 4.3712768704277535e-06, | |
"loss": 0.6936, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.53, | |
"logps_train/chosen": -88.51109313964844, | |
"logps_train/ref_chosen": -88.5, | |
"logps_train/ref_rejected": -91.5, | |
"logps_train/rejected": -91.55830383300781, | |
"rewards_train/accuracies": 0.53125, | |
"rewards_train/chosen": 0.044210705906152725, | |
"rewards_train/margins": 0.0598596166819334, | |
"rewards_train/rejected": -0.015648910775780678, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.53, | |
"logps_train/chosen": -104.84616088867188, | |
"logps_train/ref_chosen": -105.0, | |
"logps_train/ref_rejected": -104.5, | |
"logps_train/rejected": -104.39971923828125, | |
"rewards_train/accuracies": 0.515625, | |
"rewards_train/chosen": 0.04029738903045654, | |
"rewards_train/margins": 0.04008368402719498, | |
"rewards_train/rejected": 0.00021370500326156616, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.67, | |
"learning_rate": 3.92016186682789e-06, | |
"loss": 0.6761, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.67, | |
"logps_train/chosen": -109.26580047607422, | |
"logps_train/ref_chosen": -109.0, | |
"logps_train/ref_rejected": -99.5, | |
"logps_train/rejected": -99.34159851074219, | |
"rewards_train/accuracies": 0.40625, | |
"rewards_train/chosen": 0.017489098012447357, | |
"rewards_train/margins": -0.0021405071020126343, | |
"rewards_train/rejected": 0.01962960511445999, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.67, | |
"logps_train/chosen": -82.48857116699219, | |
"logps_train/ref_chosen": -82.5, | |
"logps_train/ref_rejected": -80.0, | |
"logps_train/rejected": -79.8382568359375, | |
"rewards_train/accuracies": 0.46875, | |
"rewards_train/chosen": 0.05613037571310997, | |
"rewards_train/margins": 0.01352725550532341, | |
"rewards_train/rejected": 0.04260312020778656, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.8, | |
"learning_rate": 3.386512217606339e-06, | |
"loss": 0.6952, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.8, | |
"logps_train/chosen": -92.07105255126953, | |
"logps_train/ref_chosen": -92.0, | |
"logps_train/ref_rejected": -89.5, | |
"logps_train/rejected": -89.48802185058594, | |
"rewards_train/accuracies": 0.53125, | |
"rewards_train/chosen": 0.07213925570249557, | |
"rewards_train/margins": -0.003675796091556549, | |
"rewards_train/rejected": 0.07581505179405212, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.8, | |
"logps_train/chosen": -88.25656127929688, | |
"logps_train/ref_chosen": -88.5, | |
"logps_train/ref_rejected": -94.0, | |
"logps_train/rejected": -93.6957778930664, | |
"rewards_train/accuracies": 0.46875, | |
"rewards_train/chosen": 0.025041330605745316, | |
"rewards_train/margins": -0.009836137294769287, | |
"rewards_train/rejected": 0.0348774679005146, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.93, | |
"learning_rate": 2.8013417006383078e-06, | |
"loss": 0.7033, | |
"step": 7 | |
}, | |
{ | |
"epoch": 0.93, | |
"logps_train/chosen": -106.97582244873047, | |
"logps_train/ref_chosen": -107.0, | |
"logps_train/ref_rejected": -104.0, | |
"logps_train/rejected": -104.00101470947266, | |
"rewards_train/accuracies": 0.78125, | |
"rewards_train/chosen": 0.1645529866218567, | |
"rewards_train/margins": 0.235374353826046, | |
"rewards_train/rejected": -0.0708213672041893, | |
"step": 7 | |
}, | |
{ | |
"epoch": 0.93, | |
"logps_train/chosen": -95.53459167480469, | |
"logps_train/ref_chosen": -96.0, | |
"logps_train/ref_rejected": -90.5, | |
"logps_train/rejected": -90.36405944824219, | |
"rewards_train/accuracies": 0.734375, | |
"rewards_train/chosen": 0.18082404136657715, | |
"rewards_train/margins": 0.21460064873099327, | |
"rewards_train/rejected": -0.03377660736441612, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.07, | |
"learning_rate": 2.1986582993616926e-06, | |
"loss": 0.5967, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.07, | |
"logps_train/chosen": -103.63533020019531, | |
"logps_train/ref_chosen": -104.0, | |
"logps_train/ref_rejected": -91.0, | |
"logps_train/rejected": -91.11985778808594, | |
"rewards_train/accuracies": 0.6875, | |
"rewards_train/chosen": 0.211873859167099, | |
"rewards_train/margins": 0.20722994953393936, | |
"rewards_train/rejected": 0.0046439096331596375, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.07, | |
"logps_train/chosen": -89.88359069824219, | |
"logps_train/ref_chosen": -90.0, | |
"logps_train/ref_rejected": -91.0, | |
"logps_train/rejected": -91.22651672363281, | |
"rewards_train/accuracies": 0.71875, | |
"rewards_train/chosen": 0.1387726217508316, | |
"rewards_train/margins": 0.168781116604805, | |
"rewards_train/rejected": -0.03000849485397339, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.2, | |
"learning_rate": 1.613487782393661e-06, | |
"loss": 0.6165, | |
"step": 9 | |
}, | |
{ | |
"epoch": 1.2, | |
"logps_train/chosen": -99.79483795166016, | |
"logps_train/ref_chosen": -100.0, | |
"logps_train/ref_rejected": -91.5, | |
"logps_train/rejected": -91.58244323730469, | |
"rewards_train/accuracies": 0.78125, | |
"rewards_train/chosen": 0.2085389792919159, | |
"rewards_train/margins": 0.2784496992826462, | |
"rewards_train/rejected": -0.06991071999073029, | |
"step": 9 | |
}, | |
{ | |
"epoch": 1.2, | |
"logps_train/chosen": -97.40536499023438, | |
"logps_train/ref_chosen": -98.0, | |
"logps_train/ref_rejected": -95.5, | |
"logps_train/rejected": -95.38594055175781, | |
"rewards_train/accuracies": 0.765625, | |
"rewards_train/chosen": 0.16743528842926025, | |
"rewards_train/margins": 0.1775425188243389, | |
"rewards_train/rejected": -0.010107230395078659, | |
"step": 9 | |
}, | |
{ | |
"epoch": 1.33, | |
"learning_rate": 1.079838133172111e-06, | |
"loss": 0.5951, | |
"step": 10 | |
}, | |
{ | |
"epoch": 1.33, | |
"logps_train/chosen": -99.17311096191406, | |
"logps_train/ref_chosen": -99.5, | |
"logps_train/ref_rejected": -97.0, | |
"logps_train/rejected": -96.72930908203125, | |
"rewards_train/accuracies": 0.703125, | |
"rewards_train/chosen": 0.21544356644153595, | |
"rewards_train/margins": 0.18458961695432663, | |
"rewards_train/rejected": 0.03085394948720932, | |
"step": 10 | |
}, | |
{ | |
"epoch": 1.33, | |
"logps_train/chosen": -90.94091796875, | |
"logps_train/ref_chosen": -91.0, | |
"logps_train/ref_rejected": -97.5, | |
"logps_train/rejected": -97.65658569335938, | |
"rewards_train/accuracies": 0.8125, | |
"rewards_train/chosen": 0.21173310279846191, | |
"rewards_train/margins": 0.18907413445413113, | |
"rewards_train/rejected": 0.022658968344330788, | |
"step": 10 | |
}, | |
{ | |
"epoch": 1.47, | |
"learning_rate": 6.28723129572247e-07, | |
"loss": 0.6138, | |
"step": 11 | |
}, | |
{ | |
"epoch": 1.47, | |
"logps_train/chosen": -96.39669799804688, | |
"logps_train/ref_chosen": -97.0, | |
"logps_train/ref_rejected": -99.0, | |
"logps_train/rejected": -98.8724136352539, | |
"rewards_train/accuracies": 0.8125, | |
"rewards_train/chosen": 0.16395366191864014, | |
"rewards_train/margins": 0.25006232410669327, | |
"rewards_train/rejected": -0.08610866218805313, | |
"step": 11 | |
}, | |
{ | |
"epoch": 1.47, | |
"logps_train/chosen": -80.8447036743164, | |
"logps_train/ref_chosen": -81.0, | |
"logps_train/ref_rejected": -90.0, | |
"logps_train/rejected": -90.13240814208984, | |
"rewards_train/accuracies": 0.734375, | |
"rewards_train/chosen": 0.13546136021614075, | |
"rewards_train/margins": 0.16997329890727997, | |
"rewards_train/rejected": -0.03451193869113922, | |
"step": 11 | |
}, | |
{ | |
"epoch": 1.6, | |
"learning_rate": 2.8635993586697555e-07, | |
"loss": 0.6012, | |
"step": 12 | |
}, | |
{ | |
"epoch": 1.6, | |
"logps_train/chosen": -89.55223846435547, | |
"logps_train/ref_chosen": -90.0, | |
"logps_train/ref_rejected": -96.0, | |
"logps_train/rejected": -95.89089965820312, | |
"rewards_train/accuracies": 0.84375, | |
"rewards_train/chosen": 0.20703516900539398, | |
"rewards_train/margins": 0.23602662980556488, | |
"rewards_train/rejected": -0.0289914608001709, | |
"step": 12 | |
}, | |
{ | |
"epoch": 1.6, | |
"logps_train/chosen": -82.7729263305664, | |
"logps_train/ref_chosen": -83.0, | |
"logps_train/ref_rejected": -85.0, | |
"logps_train/rejected": -85.27727508544922, | |
"rewards_train/accuracies": 0.796875, | |
"rewards_train/chosen": 0.17896729707717896, | |
"rewards_train/margins": 0.2392353191971779, | |
"rewards_train/rejected": -0.06026802211999893, | |
"step": 12 | |
}, | |
{ | |
"epoch": 1.73, | |
"learning_rate": 7.264545643486997e-08, | |
"loss": 0.5902, | |
"step": 13 | |
}, | |
{ | |
"epoch": 1.73, | |
"logps_train/chosen": -104.69845581054688, | |
"logps_train/ref_chosen": -105.0, | |
"logps_train/ref_rejected": -101.0, | |
"logps_train/rejected": -100.98780822753906, | |
"rewards_train/accuracies": 0.796875, | |
"rewards_train/chosen": 0.19716176390647888, | |
"rewards_train/margins": 0.24550895392894745, | |
"rewards_train/rejected": -0.04834719002246857, | |
"step": 13 | |
}, | |
{ | |
"epoch": 1.73, | |
"logps_train/chosen": -90.20387268066406, | |
"logps_train/ref_chosen": -91.0, | |
"logps_train/ref_rejected": -85.0, | |
"logps_train/rejected": -85.12068939208984, | |
"rewards_train/accuracies": 0.65625, | |
"rewards_train/chosen": 0.1614937037229538, | |
"rewards_train/margins": 0.1774044744670391, | |
"rewards_train/rejected": -0.015910770744085312, | |
"step": 13 | |
}, | |
{ | |
"epoch": 1.87, | |
"learning_rate": 0.0, | |
"loss": 0.6056, | |
"step": 14 | |
}, | |
{ | |
"epoch": 1.87, | |
"step": 14, | |
"total_flos": 0.0, | |
"train_loss": 0.6496244881834302, | |
"train_runtime": 164.1376, | |
"train_samples_per_second": 11.076, | |
"train_steps_per_second": 0.085 | |
} | |
], | |
"max_steps": 14, | |
"num_train_epochs": 2, | |
"total_flos": 0.0, | |
"trial_name": null, | |
"trial_params": null | |
} | |