llama2-7b-dpo-full-wo-live_qa-ep3 / trainer_state.json
Minbyul's picture
Model save
b820d93 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9969788519637462,
"eval_steps": 100,
"global_step": 165,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 3.774768176591989,
"learning_rate": 2.941176470588235e-08,
"logits/chosen": 0.48741579055786133,
"logits/rejected": -0.8717803955078125,
"logps/chosen": -311.44610595703125,
"logps/rejected": -1042.2933349609375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"grad_norm": 4.065933729000048,
"learning_rate": 2.941176470588235e-07,
"logits/chosen": 0.3187962770462036,
"logits/rejected": -0.46175992488861084,
"logps/chosen": -526.5966796875,
"logps/rejected": -899.632568359375,
"loss": 0.6931,
"rewards/accuracies": 0.4375,
"rewards/chosen": 0.0003381037386134267,
"rewards/margins": 0.00014021807874087244,
"rewards/rejected": 0.00019788570352829993,
"step": 10
},
{
"epoch": 0.12,
"grad_norm": 4.133159908424447,
"learning_rate": 4.994932636402031e-07,
"logits/chosen": 0.22923466563224792,
"logits/rejected": -0.6458711624145508,
"logps/chosen": -566.1712646484375,
"logps/rejected": -926.1541137695312,
"loss": 0.6919,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.0022346877958625555,
"rewards/margins": 0.0030761375091969967,
"rewards/rejected": -0.0008414499461650848,
"step": 20
},
{
"epoch": 0.18,
"grad_norm": 3.759041431537677,
"learning_rate": 4.905416503522123e-07,
"logits/chosen": 0.2407102882862091,
"logits/rejected": -0.7926596999168396,
"logps/chosen": -523.1210327148438,
"logps/rejected": -1028.3199462890625,
"loss": 0.6855,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": 0.010289192199707031,
"rewards/margins": 0.013627497479319572,
"rewards/rejected": -0.003338304813951254,
"step": 30
},
{
"epoch": 0.24,
"grad_norm": 3.918700608724971,
"learning_rate": 4.707922373336523e-07,
"logits/chosen": 0.14743538200855255,
"logits/rejected": -0.7249930500984192,
"logps/chosen": -524.011474609375,
"logps/rejected": -989.4501953125,
"loss": 0.675,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": 0.016446446999907494,
"rewards/margins": 0.051999401301145554,
"rewards/rejected": -0.03555295616388321,
"step": 40
},
{
"epoch": 0.3,
"grad_norm": 3.6017852179026626,
"learning_rate": 4.4113156629677313e-07,
"logits/chosen": 0.23459818959236145,
"logits/rejected": -0.6225197911262512,
"logps/chosen": -481.66455078125,
"logps/rejected": -867.3211059570312,
"loss": 0.6639,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": 0.03439263254404068,
"rewards/margins": 0.06260526925325394,
"rewards/rejected": -0.02821262739598751,
"step": 50
},
{
"epoch": 0.36,
"grad_norm": 3.679527248386035,
"learning_rate": 4.0289109058972283e-07,
"logits/chosen": 0.26775047183036804,
"logits/rejected": -0.49902766942977905,
"logps/chosen": -516.3983154296875,
"logps/rejected": -819.7734375,
"loss": 0.6398,
"rewards/accuracies": 0.84375,
"rewards/chosen": 0.03639604151248932,
"rewards/margins": 0.1593528836965561,
"rewards/rejected": -0.12295685708522797,
"step": 60
},
{
"epoch": 0.42,
"grad_norm": 3.945875727521845,
"learning_rate": 3.577874068920446e-07,
"logits/chosen": 0.26115402579307556,
"logits/rejected": -0.6307616233825684,
"logps/chosen": -534.6641845703125,
"logps/rejected": -911.5435791015625,
"loss": 0.6322,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": 0.048932626843452454,
"rewards/margins": 0.24001319706439972,
"rewards/rejected": -0.19108060002326965,
"step": 70
},
{
"epoch": 0.48,
"grad_norm": 3.7126637404674536,
"learning_rate": 3.078451980100854e-07,
"logits/chosen": 0.20563717186450958,
"logits/rejected": -0.688762903213501,
"logps/chosen": -493.32684326171875,
"logps/rejected": -957.6318359375,
"loss": 0.6237,
"rewards/accuracies": 0.84375,
"rewards/chosen": 0.060481660068035126,
"rewards/margins": 0.21118538081645966,
"rewards/rejected": -0.15070374310016632,
"step": 80
},
{
"epoch": 0.54,
"grad_norm": 4.119949298182235,
"learning_rate": 2.553063458334059e-07,
"logits/chosen": 0.3919462263584137,
"logits/rejected": -0.5500736832618713,
"logps/chosen": -510.05712890625,
"logps/rejected": -912.9411010742188,
"loss": 0.6164,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 0.05354578420519829,
"rewards/margins": 0.2815362215042114,
"rewards/rejected": -0.22799046337604523,
"step": 90
},
{
"epoch": 0.6,
"grad_norm": 4.539444195047728,
"learning_rate": 2.0252929432814287e-07,
"logits/chosen": 0.23407666385173798,
"logits/rejected": -0.6277016401290894,
"logps/chosen": -514.2950439453125,
"logps/rejected": -985.7261962890625,
"loss": 0.6065,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": 0.046576742082834244,
"rewards/margins": 0.26194584369659424,
"rewards/rejected": -0.2153691053390503,
"step": 100
},
{
"epoch": 0.6,
"eval_logits/chosen": -0.1363597810268402,
"eval_logits/rejected": -0.3805391788482666,
"eval_logps/chosen": -523.0221557617188,
"eval_logps/rejected": -812.6375732421875,
"eval_loss": 0.6296960115432739,
"eval_rewards/accuracies": 0.7678571343421936,
"eval_rewards/chosen": 0.07959667593240738,
"eval_rewards/margins": 0.17506957054138184,
"eval_rewards/rejected": -0.09547291696071625,
"eval_runtime": 22.7695,
"eval_samples_per_second": 9.135,
"eval_steps_per_second": 0.307,
"step": 100
},
{
"epoch": 0.66,
"grad_norm": 4.166464578639834,
"learning_rate": 1.5188318011445906e-07,
"logits/chosen": 0.09842907637357712,
"logits/rejected": -0.7154465913772583,
"logps/chosen": -633.3096923828125,
"logps/rejected": -972.07861328125,
"loss": 0.5933,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": 0.04117094725370407,
"rewards/margins": 0.2970955967903137,
"rewards/rejected": -0.25592464208602905,
"step": 110
},
{
"epoch": 0.73,
"grad_norm": 4.767777281679362,
"learning_rate": 1.0564148305586295e-07,
"logits/chosen": 0.2290249764919281,
"logits/rejected": -0.5675751566886902,
"logps/chosen": -553.788330078125,
"logps/rejected": -963.9578247070312,
"loss": 0.5795,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": 0.04246982932090759,
"rewards/margins": 0.5371382832527161,
"rewards/rejected": -0.4946684241294861,
"step": 120
},
{
"epoch": 0.79,
"grad_norm": 4.959401739670467,
"learning_rate": 6.587997083462196e-08,
"logits/chosen": 0.1415528953075409,
"logits/rejected": -0.6273466348648071,
"logps/chosen": -579.4324951171875,
"logps/rejected": -927.8792114257812,
"loss": 0.5587,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": 0.02018633857369423,
"rewards/margins": 0.7045117616653442,
"rewards/rejected": -0.6843255162239075,
"step": 130
},
{
"epoch": 0.85,
"grad_norm": 4.589724744119317,
"learning_rate": 3.438351873250492e-08,
"logits/chosen": 0.2175011932849884,
"logits/rejected": -0.5643750429153442,
"logps/chosen": -543.2364501953125,
"logps/rejected": -1040.180908203125,
"loss": 0.5653,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 0.03240719065070152,
"rewards/margins": 0.5543904900550842,
"rewards/rejected": -0.52198326587677,
"step": 140
},
{
"epoch": 0.91,
"grad_norm": 5.293978243611277,
"learning_rate": 1.256598743236703e-08,
"logits/chosen": 0.2741110026836395,
"logits/rejected": -0.6036696434020996,
"logps/chosen": -437.42901611328125,
"logps/rejected": -982.3721923828125,
"loss": 0.5555,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 0.033161625266075134,
"rewards/margins": 0.5011934638023376,
"rewards/rejected": -0.46803179383277893,
"step": 150
},
{
"epoch": 0.97,
"grad_norm": 4.856259961602652,
"learning_rate": 1.406755487774386e-09,
"logits/chosen": 0.14368140697479248,
"logits/rejected": -0.6074076294898987,
"logps/chosen": -525.0721435546875,
"logps/rejected": -952.7180786132812,
"loss": 0.5519,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 0.04387623816728592,
"rewards/margins": 0.42462554574012756,
"rewards/rejected": -0.38074928522109985,
"step": 160
},
{
"epoch": 1.0,
"step": 165,
"total_flos": 0.0,
"train_loss": 0.6189163742643414,
"train_runtime": 2381.9724,
"train_samples_per_second": 4.446,
"train_steps_per_second": 0.069
}
],
"logging_steps": 10,
"max_steps": 165,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}