llama2-7b-dpo-lora-20231129-32 / trainer_state.json
xz-huggingface-0's picture
Model save
34b508f
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9992254066615027,
"eval_steps": 100,
"global_step": 363,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.3513513513513514e-08,
"logits/chosen": -0.3176477253437042,
"logits/rejected": -0.44033315777778625,
"logps/chosen": -323.77838134765625,
"logps/rejected": -252.17037963867188,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.08,
"learning_rate": 1.3513513513513515e-07,
"logits/chosen": -0.30215251445770264,
"logits/rejected": -0.5123822093009949,
"logps/chosen": -364.54791259765625,
"logps/rejected": -273.88922119140625,
"loss": 0.6929,
"rewards/accuracies": 0.4739583432674408,
"rewards/chosen": 0.0046452307142317295,
"rewards/margins": 0.006923990789800882,
"rewards/rejected": -0.00227876054123044,
"step": 10
},
{
"epoch": 0.17,
"learning_rate": 2.702702702702703e-07,
"logits/chosen": -0.3361722528934479,
"logits/rejected": -0.5208011865615845,
"logps/chosen": -336.6085205078125,
"logps/rejected": -258.9732666015625,
"loss": 0.6931,
"rewards/accuracies": 0.53125,
"rewards/chosen": -1.764330045261886e-05,
"rewards/margins": 0.0017105641309171915,
"rewards/rejected": -0.0017282068729400635,
"step": 20
},
{
"epoch": 0.25,
"learning_rate": 4.054054054054054e-07,
"logits/chosen": -0.30692434310913086,
"logits/rejected": -0.48451298475265503,
"logps/chosen": -340.2192077636719,
"logps/rejected": -277.30206298828125,
"loss": 0.6942,
"rewards/accuracies": 0.510937511920929,
"rewards/chosen": -0.0009899890283122659,
"rewards/margins": -0.0009086016798391938,
"rewards/rejected": -8.13871156424284e-05,
"step": 30
},
{
"epoch": 0.33,
"learning_rate": 4.95398773006135e-07,
"logits/chosen": -0.35014405846595764,
"logits/rejected": -0.48199597001075745,
"logps/chosen": -325.4999084472656,
"logps/rejected": -281.00787353515625,
"loss": 0.6933,
"rewards/accuracies": 0.47187501192092896,
"rewards/chosen": -0.0008938731625676155,
"rewards/margins": -0.00308123673312366,
"rewards/rejected": 0.0021873635705560446,
"step": 40
},
{
"epoch": 0.41,
"learning_rate": 4.800613496932515e-07,
"logits/chosen": -0.29833748936653137,
"logits/rejected": -0.4896470904350281,
"logps/chosen": -335.3420715332031,
"logps/rejected": -276.52862548828125,
"loss": 0.6938,
"rewards/accuracies": 0.47187501192092896,
"rewards/chosen": -0.0022904174402356148,
"rewards/margins": -0.005074300337582827,
"rewards/rejected": 0.002783883363008499,
"step": 50
},
{
"epoch": 0.5,
"learning_rate": 4.647239263803681e-07,
"logits/chosen": -0.3576056659221649,
"logits/rejected": -0.5076812505722046,
"logps/chosen": -317.60040283203125,
"logps/rejected": -272.95111083984375,
"loss": 0.6924,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": 0.0018081676680594683,
"rewards/margins": 0.004018681589514017,
"rewards/rejected": -0.002210513921454549,
"step": 60
},
{
"epoch": 0.58,
"learning_rate": 4.4938650306748465e-07,
"logits/chosen": -0.3065566420555115,
"logits/rejected": -0.47715896368026733,
"logps/chosen": -332.10443115234375,
"logps/rejected": -271.3089904785156,
"loss": 0.6938,
"rewards/accuracies": 0.5093749761581421,
"rewards/chosen": 0.0021473777014762163,
"rewards/margins": 0.0010715832468122244,
"rewards/rejected": 0.00107579433824867,
"step": 70
},
{
"epoch": 0.66,
"learning_rate": 4.340490797546012e-07,
"logits/chosen": -0.3575075566768646,
"logits/rejected": -0.46308469772338867,
"logps/chosen": -353.43768310546875,
"logps/rejected": -296.3087463378906,
"loss": 0.6927,
"rewards/accuracies": 0.515625,
"rewards/chosen": 0.00587086845189333,
"rewards/margins": 0.002153487876057625,
"rewards/rejected": 0.0037173808086663485,
"step": 80
},
{
"epoch": 0.74,
"learning_rate": 4.187116564417178e-07,
"logits/chosen": -0.3637954294681549,
"logits/rejected": -0.4861740469932556,
"logps/chosen": -337.003662109375,
"logps/rejected": -293.85443115234375,
"loss": 0.6929,
"rewards/accuracies": 0.4765625,
"rewards/chosen": 0.0004090176953468472,
"rewards/margins": -0.003162259701639414,
"rewards/rejected": 0.003571277018636465,
"step": 90
},
{
"epoch": 0.83,
"learning_rate": 4.0337423312883434e-07,
"logits/chosen": -0.36683008074760437,
"logits/rejected": -0.478945255279541,
"logps/chosen": -318.0301208496094,
"logps/rejected": -269.9043273925781,
"loss": 0.6922,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.0035112896002829075,
"rewards/margins": 0.0036252178251743317,
"rewards/rejected": -0.00011392822489142418,
"step": 100
},
{
"epoch": 0.91,
"learning_rate": 3.880368098159509e-07,
"logits/chosen": -0.33621570467948914,
"logits/rejected": -0.4734673500061035,
"logps/chosen": -353.40789794921875,
"logps/rejected": -288.8216552734375,
"loss": 0.6916,
"rewards/accuracies": 0.542187511920929,
"rewards/chosen": 0.011151823215186596,
"rewards/margins": 0.006945834495127201,
"rewards/rejected": 0.0042059896513819695,
"step": 110
},
{
"epoch": 0.99,
"learning_rate": 3.7269938650306747e-07,
"logits/chosen": -0.32742369174957275,
"logits/rejected": -0.46600741147994995,
"logps/chosen": -333.48138427734375,
"logps/rejected": -268.22503662109375,
"loss": 0.6918,
"rewards/accuracies": 0.5546875,
"rewards/chosen": 0.011352723464369774,
"rewards/margins": 0.00880088098347187,
"rewards/rejected": 0.002551841316744685,
"step": 120
},
{
"epoch": 1.0,
"eval_logits/chosen": -0.17398348450660706,
"eval_logits/rejected": -0.3204001486301422,
"eval_logps/chosen": -360.14556884765625,
"eval_logps/rejected": -288.76519775390625,
"eval_loss": 0.69110107421875,
"eval_rewards/accuracies": 0.5158730149269104,
"eval_rewards/chosen": 0.011802121065557003,
"eval_rewards/margins": 0.0090893330052495,
"eval_rewards/rejected": 0.0027127889916300774,
"eval_runtime": 155.9602,
"eval_samples_per_second": 12.824,
"eval_steps_per_second": 0.404,
"step": 121
},
{
"epoch": 1.07,
"learning_rate": 3.5736196319018404e-07,
"logits/chosen": -0.3686336576938629,
"logits/rejected": -0.4647085666656494,
"logps/chosen": -308.15850830078125,
"logps/rejected": -267.60150146484375,
"loss": 0.6913,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.007929937914013863,
"rewards/margins": 0.004114674404263496,
"rewards/rejected": 0.003815263509750366,
"step": 130
},
{
"epoch": 1.16,
"learning_rate": 3.420245398773006e-07,
"logits/chosen": -0.3048830032348633,
"logits/rejected": -0.46840834617614746,
"logps/chosen": -331.2347717285156,
"logps/rejected": -280.2388916015625,
"loss": 0.6905,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": 0.011773429811000824,
"rewards/margins": 0.00874190591275692,
"rewards/rejected": 0.0030315222684293985,
"step": 140
},
{
"epoch": 1.24,
"learning_rate": 3.2668711656441716e-07,
"logits/chosen": -0.33495235443115234,
"logits/rejected": -0.49104562401771545,
"logps/chosen": -340.7163391113281,
"logps/rejected": -282.6747741699219,
"loss": 0.6897,
"rewards/accuracies": 0.557812511920929,
"rewards/chosen": 0.017182378098368645,
"rewards/margins": 0.010856041684746742,
"rewards/rejected": 0.006326337344944477,
"step": 150
},
{
"epoch": 1.32,
"learning_rate": 3.1134969325153373e-07,
"logits/chosen": -0.3351030945777893,
"logits/rejected": -0.4738260805606842,
"logps/chosen": -333.8556823730469,
"logps/rejected": -291.29840087890625,
"loss": 0.69,
"rewards/accuracies": 0.515625,
"rewards/chosen": 0.013335606083273888,
"rewards/margins": 0.008105851709842682,
"rewards/rejected": 0.005229754839092493,
"step": 160
},
{
"epoch": 1.4,
"learning_rate": 2.960122699386503e-07,
"logits/chosen": -0.39908671379089355,
"logits/rejected": -0.5024284720420837,
"logps/chosen": -315.42840576171875,
"logps/rejected": -271.08343505859375,
"loss": 0.6902,
"rewards/accuracies": 0.546875,
"rewards/chosen": 0.012735432013869286,
"rewards/margins": 0.008142312988638878,
"rewards/rejected": 0.004593119956552982,
"step": 170
},
{
"epoch": 1.49,
"learning_rate": 2.8067484662576686e-07,
"logits/chosen": -0.33173808455467224,
"logits/rejected": -0.5215679407119751,
"logps/chosen": -342.9329528808594,
"logps/rejected": -273.66485595703125,
"loss": 0.6903,
"rewards/accuracies": 0.5328124761581421,
"rewards/chosen": 0.010897275060415268,
"rewards/margins": 0.004992068745195866,
"rewards/rejected": 0.005905206315219402,
"step": 180
},
{
"epoch": 1.57,
"learning_rate": 2.653374233128834e-07,
"logits/chosen": -0.360731840133667,
"logits/rejected": -0.4705289900302887,
"logps/chosen": -338.09039306640625,
"logps/rejected": -281.22674560546875,
"loss": 0.6893,
"rewards/accuracies": 0.542187511920929,
"rewards/chosen": 0.013345611281692982,
"rewards/margins": 0.008156336843967438,
"rewards/rejected": 0.005189274903386831,
"step": 190
},
{
"epoch": 1.65,
"learning_rate": 2.5e-07,
"logits/chosen": -0.271279901266098,
"logits/rejected": -0.5268155932426453,
"logps/chosen": -361.9761657714844,
"logps/rejected": -279.6358642578125,
"loss": 0.689,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": 0.015021780505776405,
"rewards/margins": 0.010343039408326149,
"rewards/rejected": 0.004678742028772831,
"step": 200
},
{
"epoch": 1.74,
"learning_rate": 2.3466257668711655e-07,
"logits/chosen": -0.33011576533317566,
"logits/rejected": -0.4637225270271301,
"logps/chosen": -347.186767578125,
"logps/rejected": -278.4908142089844,
"loss": 0.6892,
"rewards/accuracies": 0.5609375238418579,
"rewards/chosen": 0.017149794846773148,
"rewards/margins": 0.00933277327567339,
"rewards/rejected": 0.007817023433744907,
"step": 210
},
{
"epoch": 1.82,
"learning_rate": 2.1932515337423312e-07,
"logits/chosen": -0.320119172334671,
"logits/rejected": -0.4756544232368469,
"logps/chosen": -337.0110778808594,
"logps/rejected": -275.7445373535156,
"loss": 0.6889,
"rewards/accuracies": 0.528124988079071,
"rewards/chosen": 0.014787524938583374,
"rewards/margins": 0.008823606185615063,
"rewards/rejected": 0.005963918752968311,
"step": 220
},
{
"epoch": 1.9,
"learning_rate": 2.0398773006134968e-07,
"logits/chosen": -0.341126024723053,
"logits/rejected": -0.5036292672157288,
"logps/chosen": -342.09808349609375,
"logps/rejected": -284.2479553222656,
"loss": 0.688,
"rewards/accuracies": 0.551562488079071,
"rewards/chosen": 0.018701931461691856,
"rewards/margins": 0.013875585980713367,
"rewards/rejected": 0.004826345480978489,
"step": 230
},
{
"epoch": 1.98,
"learning_rate": 1.8865030674846625e-07,
"logits/chosen": -0.3302518129348755,
"logits/rejected": -0.5235751867294312,
"logps/chosen": -336.75885009765625,
"logps/rejected": -276.89056396484375,
"loss": 0.6887,
"rewards/accuracies": 0.5484374761581421,
"rewards/chosen": 0.014634380117058754,
"rewards/margins": 0.010870048776268959,
"rewards/rejected": 0.003764331340789795,
"step": 240
},
{
"epoch": 2.0,
"eval_logits/chosen": -0.1741456687450409,
"eval_logits/rejected": -0.32032549381256104,
"eval_logps/chosen": -360.0811767578125,
"eval_logps/rejected": -288.72760009765625,
"eval_loss": 0.6885652542114258,
"eval_rewards/accuracies": 0.5436508059501648,
"eval_rewards/chosen": 0.018233804032206535,
"eval_rewards/margins": 0.011760968714952469,
"eval_rewards/rejected": 0.006472836248576641,
"eval_runtime": 155.5913,
"eval_samples_per_second": 12.854,
"eval_steps_per_second": 0.405,
"step": 242
},
{
"epoch": 2.07,
"learning_rate": 1.733128834355828e-07,
"logits/chosen": -0.338714063167572,
"logits/rejected": -0.4940338730812073,
"logps/chosen": -329.88330078125,
"logps/rejected": -284.783447265625,
"loss": 0.6882,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": 0.018329061567783356,
"rewards/margins": 0.010221143253147602,
"rewards/rejected": 0.008107921108603477,
"step": 250
},
{
"epoch": 2.15,
"learning_rate": 1.5797546012269938e-07,
"logits/chosen": -0.336908757686615,
"logits/rejected": -0.508610725402832,
"logps/chosen": -355.3616027832031,
"logps/rejected": -275.9775085449219,
"loss": 0.6872,
"rewards/accuracies": 0.5640624761581421,
"rewards/chosen": 0.016039682552218437,
"rewards/margins": 0.017992937937378883,
"rewards/rejected": -0.001953254686668515,
"step": 260
},
{
"epoch": 2.23,
"learning_rate": 1.4263803680981594e-07,
"logits/chosen": -0.3601071536540985,
"logits/rejected": -0.53021240234375,
"logps/chosen": -337.66424560546875,
"logps/rejected": -281.8548889160156,
"loss": 0.6877,
"rewards/accuracies": 0.5703125,
"rewards/chosen": 0.021220825612545013,
"rewards/margins": 0.01665753871202469,
"rewards/rejected": 0.004563285503536463,
"step": 270
},
{
"epoch": 2.31,
"learning_rate": 1.273006134969325e-07,
"logits/chosen": -0.2682925760746002,
"logits/rejected": -0.43069085478782654,
"logps/chosen": -344.897705078125,
"logps/rejected": -284.98138427734375,
"loss": 0.6876,
"rewards/accuracies": 0.5640624761581421,
"rewards/chosen": 0.01918674074113369,
"rewards/margins": 0.009623361751437187,
"rewards/rejected": 0.009563378989696503,
"step": 280
},
{
"epoch": 2.4,
"learning_rate": 1.1196319018404908e-07,
"logits/chosen": -0.3256112337112427,
"logits/rejected": -0.4699035584926605,
"logps/chosen": -340.5793151855469,
"logps/rejected": -278.5997314453125,
"loss": 0.688,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": 0.019764618948101997,
"rewards/margins": 0.012027645483613014,
"rewards/rejected": 0.007736973464488983,
"step": 290
},
{
"epoch": 2.48,
"learning_rate": 9.662576687116564e-08,
"logits/chosen": -0.3254753351211548,
"logits/rejected": -0.49168673157691956,
"logps/chosen": -323.43658447265625,
"logps/rejected": -268.68194580078125,
"loss": 0.6872,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.024882303550839424,
"rewards/margins": 0.01763671264052391,
"rewards/rejected": 0.0072455937042832375,
"step": 300
},
{
"epoch": 2.56,
"learning_rate": 8.12883435582822e-08,
"logits/chosen": -0.2992364764213562,
"logits/rejected": -0.5038896799087524,
"logps/chosen": -336.90447998046875,
"logps/rejected": -273.3828430175781,
"loss": 0.6875,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.019791873171925545,
"rewards/margins": 0.015618661418557167,
"rewards/rejected": 0.00417321128770709,
"step": 310
},
{
"epoch": 2.64,
"learning_rate": 6.595092024539877e-08,
"logits/chosen": -0.3277471959590912,
"logits/rejected": -0.48526397347450256,
"logps/chosen": -316.490966796875,
"logps/rejected": -280.82403564453125,
"loss": 0.6884,
"rewards/accuracies": 0.573437511920929,
"rewards/chosen": 0.020627859979867935,
"rewards/margins": 0.014810837805271149,
"rewards/rejected": 0.005817021708935499,
"step": 320
},
{
"epoch": 2.73,
"learning_rate": 5.061349693251534e-08,
"logits/chosen": -0.29638558626174927,
"logits/rejected": -0.4925597310066223,
"logps/chosen": -344.7980651855469,
"logps/rejected": -279.2032775878906,
"loss": 0.6873,
"rewards/accuracies": 0.578125,
"rewards/chosen": 0.024601539596915245,
"rewards/margins": 0.014131123200058937,
"rewards/rejected": 0.010470417328178883,
"step": 330
},
{
"epoch": 2.81,
"learning_rate": 3.5276073619631896e-08,
"logits/chosen": -0.3097667098045349,
"logits/rejected": -0.4607706069946289,
"logps/chosen": -333.1075134277344,
"logps/rejected": -285.7237548828125,
"loss": 0.687,
"rewards/accuracies": 0.5703125,
"rewards/chosen": 0.02195788361132145,
"rewards/margins": 0.015133949927985668,
"rewards/rejected": 0.006823929958045483,
"step": 340
},
{
"epoch": 2.89,
"learning_rate": 1.9938650306748464e-08,
"logits/chosen": -0.3493804633617401,
"logits/rejected": -0.45914044976234436,
"logps/chosen": -327.6866455078125,
"logps/rejected": -290.8825378417969,
"loss": 0.6873,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.018699336796998978,
"rewards/margins": 0.008175373077392578,
"rewards/rejected": 0.010523964650928974,
"step": 350
},
{
"epoch": 2.97,
"learning_rate": 4.601226993865031e-09,
"logits/chosen": -0.3387224078178406,
"logits/rejected": -0.477064847946167,
"logps/chosen": -340.5426940917969,
"logps/rejected": -276.86090087890625,
"loss": 0.6865,
"rewards/accuracies": 0.578125,
"rewards/chosen": 0.023312732577323914,
"rewards/margins": 0.014612337574362755,
"rewards/rejected": 0.008700395002961159,
"step": 360
},
{
"epoch": 3.0,
"eval_logits/chosen": -0.17340299487113953,
"eval_logits/rejected": -0.31969568133354187,
"eval_logps/chosen": -359.973876953125,
"eval_logps/rejected": -288.69586181640625,
"eval_loss": 0.6869306564331055,
"eval_rewards/accuracies": 0.5833333134651184,
"eval_rewards/chosen": 0.028966180980205536,
"eval_rewards/margins": 0.01931903511285782,
"eval_rewards/rejected": 0.009647144004702568,
"eval_runtime": 155.81,
"eval_samples_per_second": 12.836,
"eval_steps_per_second": 0.404,
"step": 363
},
{
"epoch": 3.0,
"step": 363,
"total_flos": 0.0,
"train_loss": 0.689942762707219,
"train_runtime": 25419.9306,
"train_samples_per_second": 7.313,
"train_steps_per_second": 0.014
}
],
"logging_steps": 10,
"max_steps": 363,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}