zephyr-7b-dpo-full / trainer_state.json
RikkiXu's picture
Model save
7762505 verified
raw
history blame
No virus
8.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9983361064891847,
"eval_steps": 500,
"global_step": 150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 1151.6129333090275,
"learning_rate": 3.333333333333333e-10,
"logits/chosen": -4.106247425079346,
"logits/rejected": -4.200438499450684,
"logps/chosen": -382.81439208984375,
"logps/rejected": -357.65960693359375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.07,
"grad_norm": 1175.0279383615512,
"learning_rate": 3.3333333333333334e-09,
"logits/chosen": -4.217203617095947,
"logits/rejected": -4.32081413269043,
"logps/chosen": -334.6714172363281,
"logps/rejected": -313.4322509765625,
"loss": 0.7317,
"rewards/accuracies": 0.3923611044883728,
"rewards/chosen": -0.044815655797719955,
"rewards/margins": -0.03612741455435753,
"rewards/rejected": -0.008688241243362427,
"step": 10
},
{
"epoch": 0.13,
"grad_norm": 1253.9538230101048,
"learning_rate": 4.983095894354858e-09,
"logits/chosen": -4.266427040100098,
"logits/rejected": -4.4187798500061035,
"logps/chosen": -313.9143371582031,
"logps/rejected": -288.782470703125,
"loss": 0.732,
"rewards/accuracies": 0.4781250059604645,
"rewards/chosen": -0.000932177877984941,
"rewards/margins": -0.012739461846649647,
"rewards/rejected": 0.011807283386588097,
"step": 20
},
{
"epoch": 0.2,
"grad_norm": 1147.308654200848,
"learning_rate": 4.849231551964771e-09,
"logits/chosen": -4.301184177398682,
"logits/rejected": -4.36545991897583,
"logps/chosen": -308.3140563964844,
"logps/rejected": -285.6829833984375,
"loss": 0.7352,
"rewards/accuracies": 0.46562498807907104,
"rewards/chosen": -0.018803134560585022,
"rewards/margins": 0.008298242464661598,
"rewards/rejected": -0.02710137702524662,
"step": 30
},
{
"epoch": 0.27,
"grad_norm": 1152.01480268075,
"learning_rate": 4.588719528532341e-09,
"logits/chosen": -4.186487674713135,
"logits/rejected": -4.270204544067383,
"logps/chosen": -332.467041015625,
"logps/rejected": -307.530517578125,
"loss": 0.722,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": 0.012754167430102825,
"rewards/margins": 0.04999501258134842,
"rewards/rejected": -0.03724084421992302,
"step": 40
},
{
"epoch": 0.33,
"grad_norm": 1161.340877574017,
"learning_rate": 4.215604094671834e-09,
"logits/chosen": -4.197475910186768,
"logits/rejected": -4.366654396057129,
"logps/chosen": -333.69940185546875,
"logps/rejected": -309.05511474609375,
"loss": 0.7263,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": 0.022939234972000122,
"rewards/margins": -0.01653190515935421,
"rewards/rejected": 0.03947114199399948,
"step": 50
},
{
"epoch": 0.4,
"grad_norm": 1135.5326964714952,
"learning_rate": 3.7500000000000005e-09,
"logits/chosen": -4.186155796051025,
"logits/rejected": -4.269167900085449,
"logps/chosen": -323.9843444824219,
"logps/rejected": -308.364990234375,
"loss": 0.7133,
"rewards/accuracies": 0.484375,
"rewards/chosen": 0.009113344363868237,
"rewards/margins": -0.005971288774162531,
"rewards/rejected": 0.015084633603692055,
"step": 60
},
{
"epoch": 0.47,
"grad_norm": 1170.598014061488,
"learning_rate": 3.2170080817777257e-09,
"logits/chosen": -4.14601469039917,
"logits/rejected": -4.301178932189941,
"logps/chosen": -344.8753967285156,
"logps/rejected": -314.6096496582031,
"loss": 0.7182,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.08767497539520264,
"rewards/margins": 0.08200599253177643,
"rewards/rejected": 0.005668987520039082,
"step": 70
},
{
"epoch": 0.53,
"grad_norm": 1088.822875661876,
"learning_rate": 2.6453620722761897e-09,
"logits/chosen": -4.220850944519043,
"logits/rejected": -4.3867106437683105,
"logps/chosen": -334.7141418457031,
"logps/rejected": -305.6958923339844,
"loss": 0.7175,
"rewards/accuracies": 0.515625,
"rewards/chosen": 0.06255482137203217,
"rewards/margins": 0.04721928387880325,
"rewards/rejected": 0.015335534699261189,
"step": 80
},
{
"epoch": 0.6,
"grad_norm": 1120.1472438016,
"learning_rate": 2.0658795558326744e-09,
"logits/chosen": -4.226521968841553,
"logits/rejected": -4.347161293029785,
"logps/chosen": -327.5687561035156,
"logps/rejected": -308.19744873046875,
"loss": 0.7128,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": 0.03935312479734421,
"rewards/margins": 0.012340927496552467,
"rewards/rejected": 0.02701219543814659,
"step": 90
},
{
"epoch": 0.67,
"grad_norm": 1147.49194388717,
"learning_rate": 1.5098005849021078e-09,
"logits/chosen": -4.208071708679199,
"logits/rejected": -4.417771339416504,
"logps/chosen": -320.9470520019531,
"logps/rejected": -290.018310546875,
"loss": 0.712,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": 0.060337893664836884,
"rewards/margins": 0.03545590117573738,
"rewards/rejected": 0.0248819962143898,
"step": 100
},
{
"epoch": 0.73,
"grad_norm": 1131.801124941311,
"learning_rate": 1.0071035207430352e-09,
"logits/chosen": -4.222798824310303,
"logits/rejected": -4.275721549987793,
"logps/chosen": -315.00030517578125,
"logps/rejected": -303.7385559082031,
"loss": 0.7022,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": 0.08098876476287842,
"rewards/margins": 0.037886131554841995,
"rewards/rejected": 0.04310264065861702,
"step": 110
},
{
"epoch": 0.8,
"grad_norm": 1201.8232728734756,
"learning_rate": 5.848888922025553e-10,
"logits/chosen": -4.147335052490234,
"logits/rejected": -4.3030242919921875,
"logps/chosen": -339.2781677246094,
"logps/rejected": -313.5749206542969,
"loss": 0.7129,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": 0.08869560062885284,
"rewards/margins": 0.048744406551122665,
"rewards/rejected": 0.03995119035243988,
"step": 120
},
{
"epoch": 0.87,
"grad_norm": 1182.4206440855232,
"learning_rate": 2.659183991914696e-10,
"logits/chosen": -4.173482418060303,
"logits/rejected": -4.367284297943115,
"logps/chosen": -324.4493713378906,
"logps/rejected": -305.2012634277344,
"loss": 0.7,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": 0.08546491712331772,
"rewards/margins": 0.01908993348479271,
"rewards/rejected": 0.06637498736381531,
"step": 130
},
{
"epoch": 0.93,
"grad_norm": 1158.9214495930962,
"learning_rate": 6.738782355044049e-11,
"logits/chosen": -4.290203094482422,
"logits/rejected": -4.35637092590332,
"logps/chosen": -308.0101318359375,
"logps/rejected": -297.37701416015625,
"loss": 0.7082,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": 0.12010886520147324,
"rewards/margins": 0.05074785277247429,
"rewards/rejected": 0.06936100870370865,
"step": 140
},
{
"epoch": 1.0,
"grad_norm": 1208.2615961890754,
"learning_rate": 0.0,
"logits/chosen": -4.257304668426514,
"logits/rejected": -4.3310017585754395,
"logps/chosen": -312.7613830566406,
"logps/rejected": -300.3546447753906,
"loss": 0.7025,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": 0.10821112245321274,
"rewards/margins": 0.047078292816877365,
"rewards/rejected": 0.06113281846046448,
"step": 150
},
{
"epoch": 1.0,
"step": 150,
"total_flos": 0.0,
"train_loss": 0.7162073644002279,
"train_runtime": 4512.4874,
"train_samples_per_second": 8.52,
"train_steps_per_second": 0.033
}
],
"logging_steps": 10,
"max_steps": 150,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}