llama3-1-ox-llms-8b-dpo-full / trainer_state.json
IeBoytsov's picture
Model save
8e3f88e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9905213270142181,
"eval_steps": 100,
"global_step": 210,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.009478672985781991,
"grad_norm": 31.999091462105085,
"learning_rate": 2.3809523809523807e-08,
"logits/chosen": -1.3901093006134033,
"logits/rejected": -1.3982200622558594,
"logps/chosen": -439.7777099609375,
"logps/rejected": -517.9480590820312,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.0947867298578199,
"grad_norm": 31.526233424514775,
"learning_rate": 2.3809523809523806e-07,
"logits/chosen": -1.1840474605560303,
"logits/rejected": -1.2023670673370361,
"logps/chosen": -318.02642822265625,
"logps/rejected": -345.5296325683594,
"loss": 0.6944,
"rewards/accuracies": 0.4444444477558136,
"rewards/chosen": 0.0005787869449704885,
"rewards/margins": 0.002175838453695178,
"rewards/rejected": -0.0015970510430634022,
"step": 10
},
{
"epoch": 0.1895734597156398,
"grad_norm": 29.460026918462425,
"learning_rate": 4.761904761904761e-07,
"logits/chosen": -1.2405064105987549,
"logits/rejected": -1.2777436971664429,
"logps/chosen": -325.66754150390625,
"logps/rejected": -444.10162353515625,
"loss": 0.6877,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.0043294974602758884,
"rewards/margins": 0.01897953823208809,
"rewards/rejected": -0.014650041237473488,
"step": 20
},
{
"epoch": 0.2843601895734597,
"grad_norm": 29.858845825573614,
"learning_rate": 4.972077065562821e-07,
"logits/chosen": -1.2667722702026367,
"logits/rejected": -1.2541126012802124,
"logps/chosen": -379.63861083984375,
"logps/rejected": -386.56842041015625,
"loss": 0.6743,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.015792345628142357,
"rewards/margins": 0.05691219121217728,
"rewards/rejected": -0.04111984372138977,
"step": 30
},
{
"epoch": 0.3791469194312796,
"grad_norm": 28.04723285384618,
"learning_rate": 4.876353872369572e-07,
"logits/chosen": -1.2606487274169922,
"logits/rejected": -1.2776422500610352,
"logps/chosen": -330.6627197265625,
"logps/rejected": -432.9537658691406,
"loss": 0.6487,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.060253970324993134,
"rewards/margins": 0.0897565707564354,
"rewards/rejected": -0.029502594843506813,
"step": 40
},
{
"epoch": 0.47393364928909953,
"grad_norm": 29.04625829417335,
"learning_rate": 4.715123776075336e-07,
"logits/chosen": -1.2534068822860718,
"logits/rejected": -1.1865966320037842,
"logps/chosen": -307.3436279296875,
"logps/rejected": -284.7574462890625,
"loss": 0.6288,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.020858120173215866,
"rewards/margins": 0.09515110403299332,
"rewards/rejected": -0.07429297268390656,
"step": 50
},
{
"epoch": 0.5687203791469194,
"grad_norm": 28.336384421687487,
"learning_rate": 4.492831268057306e-07,
"logits/chosen": -1.2906352281570435,
"logits/rejected": -1.3070456981658936,
"logps/chosen": -324.20489501953125,
"logps/rejected": -374.00274658203125,
"loss": 0.5761,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.08636633306741714,
"rewards/margins": 0.28788790106773376,
"rewards/rejected": -0.20152156054973602,
"step": 60
},
{
"epoch": 0.6635071090047393,
"grad_norm": 24.885818315067937,
"learning_rate": 4.2156040946718343e-07,
"logits/chosen": -1.330127477645874,
"logits/rejected": -1.276735782623291,
"logps/chosen": -344.3301086425781,
"logps/rejected": -326.61151123046875,
"loss": 0.5866,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.132747620344162,
"rewards/margins": 0.28044360876083374,
"rewards/rejected": -0.14769601821899414,
"step": 70
},
{
"epoch": 0.7582938388625592,
"grad_norm": 25.726269521197576,
"learning_rate": 3.891084338941603e-07,
"logits/chosen": -1.2656139135360718,
"logits/rejected": -1.2805755138397217,
"logps/chosen": -338.24822998046875,
"logps/rejected": -349.0716857910156,
"loss": 0.5703,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.06792763620615005,
"rewards/margins": 0.5581067204475403,
"rewards/rejected": -0.4901791214942932,
"step": 80
},
{
"epoch": 0.8530805687203792,
"grad_norm": 23.99960856337552,
"learning_rate": 3.528217757826529e-07,
"logits/chosen": -1.303812026977539,
"logits/rejected": -1.2814157009124756,
"logps/chosen": -288.6119689941406,
"logps/rejected": -334.3503723144531,
"loss": 0.5343,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.0316891223192215,
"rewards/margins": 0.408654123544693,
"rewards/rejected": -0.37696507573127747,
"step": 90
},
{
"epoch": 0.9478672985781991,
"grad_norm": 24.512409938030444,
"learning_rate": 3.137007182236637e-07,
"logits/chosen": -1.3553069829940796,
"logits/rejected": -1.3930418491363525,
"logps/chosen": -394.2057800292969,
"logps/rejected": -524.2303466796875,
"loss": 0.528,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.024631375446915627,
"rewards/margins": 0.491068035364151,
"rewards/rejected": -0.5156994462013245,
"step": 100
},
{
"epoch": 0.9478672985781991,
"eval_logits/chosen": -1.3159226179122925,
"eval_logits/rejected": -1.3169375658035278,
"eval_logps/chosen": -320.6200866699219,
"eval_logps/rejected": -330.54302978515625,
"eval_loss": 0.5266835689544678,
"eval_rewards/accuracies": 0.7604166865348816,
"eval_rewards/chosen": -0.025471201166510582,
"eval_rewards/margins": 0.5800454020500183,
"eval_rewards/rejected": -0.6055166125297546,
"eval_runtime": 37.7626,
"eval_samples_per_second": 19.861,
"eval_steps_per_second": 0.636,
"step": 100
},
{
"epoch": 1.042654028436019,
"grad_norm": 18.302035950206864,
"learning_rate": 2.728236777596621e-07,
"logits/chosen": -1.3288469314575195,
"logits/rejected": -1.3007264137268066,
"logps/chosen": -330.83392333984375,
"logps/rejected": -358.9930419921875,
"loss": 0.4808,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.03551309555768967,
"rewards/margins": 0.604155421257019,
"rewards/rejected": -0.5686423778533936,
"step": 110
},
{
"epoch": 1.1374407582938388,
"grad_norm": 21.060796057076118,
"learning_rate": 2.3131747660339394e-07,
"logits/chosen": -1.3152925968170166,
"logits/rejected": -1.2728514671325684,
"logps/chosen": -354.1836242675781,
"logps/rejected": -403.5113525390625,
"loss": 0.4193,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.02837497368454933,
"rewards/margins": 0.9122093915939331,
"rewards/rejected": -0.8838345408439636,
"step": 120
},
{
"epoch": 1.2322274881516588,
"grad_norm": 20.045154065240116,
"learning_rate": 1.9032628049921556e-07,
"logits/chosen": -1.2807761430740356,
"logits/rejected": -1.3415155410766602,
"logps/chosen": -319.76373291015625,
"logps/rejected": -381.88702392578125,
"loss": 0.4052,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.052242428064346313,
"rewards/margins": 1.2852985858917236,
"rewards/rejected": -1.2330560684204102,
"step": 130
},
{
"epoch": 1.3270142180094786,
"grad_norm": 20.731255319297293,
"learning_rate": 1.5098005849021078e-07,
"logits/chosen": -1.3120365142822266,
"logits/rejected": -1.2953948974609375,
"logps/chosen": -295.6787109375,
"logps/rejected": -355.78753662109375,
"loss": 0.4188,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.07798402011394501,
"rewards/margins": 0.8329852819442749,
"rewards/rejected": -0.9109692573547363,
"step": 140
},
{
"epoch": 1.4218009478672986,
"grad_norm": 22.334784640837082,
"learning_rate": 1.1436343403356016e-07,
"logits/chosen": -1.3462207317352295,
"logits/rejected": -1.3286292552947998,
"logps/chosen": -368.55035400390625,
"logps/rejected": -399.04498291015625,
"loss": 0.3699,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.05135868862271309,
"rewards/margins": 1.201449990272522,
"rewards/rejected": -1.2528085708618164,
"step": 150
},
{
"epoch": 1.5165876777251186,
"grad_norm": 20.701829718895063,
"learning_rate": 8.148578611867113e-08,
"logits/chosen": -1.3034837245941162,
"logits/rejected": -1.3216516971588135,
"logps/chosen": -353.6603088378906,
"logps/rejected": -452.823486328125,
"loss": 0.3858,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.15693092346191406,
"rewards/margins": 1.356492519378662,
"rewards/rejected": -1.5134233236312866,
"step": 160
},
{
"epoch": 1.6113744075829384,
"grad_norm": 19.451153050692426,
"learning_rate": 5.325342458482779e-08,
"logits/chosen": -1.2586638927459717,
"logits/rejected": -1.292633295059204,
"logps/chosen": -292.8143310546875,
"logps/rejected": -365.19879150390625,
"loss": 0.3721,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.14380544424057007,
"rewards/margins": 1.1733076572418213,
"rewards/rejected": -1.317112922668457,
"step": 170
},
{
"epoch": 1.7061611374407581,
"grad_norm": 21.916474338608154,
"learning_rate": 3.044460665744283e-08,
"logits/chosen": -1.3701001405715942,
"logits/rejected": -1.3554545640945435,
"logps/chosen": -404.88909912109375,
"logps/rejected": -457.29754638671875,
"loss": 0.3622,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.04532099887728691,
"rewards/margins": 1.295754075050354,
"rewards/rejected": -1.3410749435424805,
"step": 180
},
{
"epoch": 1.8009478672985781,
"grad_norm": 18.57991978572858,
"learning_rate": 1.368808340056879e-08,
"logits/chosen": -1.3218441009521484,
"logits/rejected": -1.3573790788650513,
"logps/chosen": -320.82342529296875,
"logps/rejected": -399.82952880859375,
"loss": 0.3755,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.12398891150951385,
"rewards/margins": 1.0439832210540771,
"rewards/rejected": -1.1679723262786865,
"step": 190
},
{
"epoch": 1.8957345971563981,
"grad_norm": 19.884917777757575,
"learning_rate": 3.4457674771554422e-09,
"logits/chosen": -1.2637640237808228,
"logits/rejected": -1.3061563968658447,
"logps/chosen": -331.66986083984375,
"logps/rejected": -383.78948974609375,
"loss": 0.3731,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.10066553205251694,
"rewards/margins": 1.0765092372894287,
"rewards/rejected": -1.177174687385559,
"step": 200
},
{
"epoch": 1.8957345971563981,
"eval_logits/chosen": -1.3081656694412231,
"eval_logits/rejected": -1.3098862171173096,
"eval_logps/chosen": -325.0733337402344,
"eval_logps/rejected": -340.2323303222656,
"eval_loss": 0.4820757508277893,
"eval_rewards/accuracies": 0.7604166865348816,
"eval_rewards/chosen": -0.2481323480606079,
"eval_rewards/margins": 0.8418500423431396,
"eval_rewards/rejected": -1.0899823904037476,
"eval_runtime": 37.8271,
"eval_samples_per_second": 19.827,
"eval_steps_per_second": 0.634,
"step": 200
},
{
"epoch": 1.9905213270142181,
"grad_norm": 21.350222142767173,
"learning_rate": 0.0,
"logits/chosen": -1.3175251483917236,
"logits/rejected": -1.3090837001800537,
"logps/chosen": -369.90289306640625,
"logps/rejected": -375.2828674316406,
"loss": 0.4175,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.085756316781044,
"rewards/margins": 0.9539656639099121,
"rewards/rejected": -1.039721965789795,
"step": 210
},
{
"epoch": 1.9905213270142181,
"step": 210,
"total_flos": 0.0,
"train_loss": 0.5004417010716029,
"train_runtime": 1390.6464,
"train_samples_per_second": 9.708,
"train_steps_per_second": 0.151
}
],
"logging_steps": 10,
"max_steps": 210,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}