zephyr-7b-dpo-full / trainer_state.json
RikkiXu's picture
Model save
04ffc18 verified
raw
history blame
No virus
22.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 391,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 422.289956912934,
"learning_rate": 1.25e-09,
"logits/chosen": -4.623842239379883,
"logits/rejected": -4.85917854309082,
"logps/chosen": -239.31422424316406,
"logps/rejected": -207.56365966796875,
"loss": 0.6959,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 397.335507450448,
"learning_rate": 1.25e-08,
"logits/chosen": -4.334493160247803,
"logits/rejected": -4.643917560577393,
"logps/chosen": -265.1265563964844,
"logps/rejected": -215.76223754882812,
"loss": 0.6932,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.0019649432506412268,
"rewards/margins": 0.0034448718652129173,
"rewards/rejected": -0.005409814417362213,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 411.8624893441942,
"learning_rate": 2.5e-08,
"logits/chosen": -4.506826400756836,
"logits/rejected": -4.740732192993164,
"logps/chosen": -267.86932373046875,
"logps/rejected": -216.64578247070312,
"loss": 0.6921,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": 0.007061410695314407,
"rewards/margins": 0.010701502673327923,
"rewards/rejected": -0.003640091512352228,
"step": 20
},
{
"epoch": 0.08,
"grad_norm": 426.4548766919431,
"learning_rate": 3.75e-08,
"logits/chosen": -4.585576057434082,
"logits/rejected": -4.762608528137207,
"logps/chosen": -258.25,
"logps/rejected": -214.71231079101562,
"loss": 0.6756,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.028281379491090775,
"rewards/margins": 0.04623327776789665,
"rewards/rejected": -0.017951902002096176,
"step": 30
},
{
"epoch": 0.1,
"grad_norm": 368.66232685986097,
"learning_rate": 5e-08,
"logits/chosen": -4.62213659286499,
"logits/rejected": -4.706842422485352,
"logps/chosen": -252.6122283935547,
"logps/rejected": -220.41427612304688,
"loss": 0.6177,
"rewards/accuracies": 0.78125,
"rewards/chosen": 0.0986957773566246,
"rewards/margins": 0.1730131059885025,
"rewards/rejected": -0.07431730628013611,
"step": 40
},
{
"epoch": 0.13,
"grad_norm": 294.81806277707193,
"learning_rate": 4.989992961303737e-08,
"logits/chosen": -4.516692161560059,
"logits/rejected": -4.714283466339111,
"logps/chosen": -269.63470458984375,
"logps/rejected": -227.5962371826172,
"loss": 0.5368,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 0.2334650307893753,
"rewards/margins": 0.4059367775917053,
"rewards/rejected": -0.17247170209884644,
"step": 50
},
{
"epoch": 0.15,
"grad_norm": 263.2732793061953,
"learning_rate": 4.960051957873725e-08,
"logits/chosen": -4.610293388366699,
"logits/rejected": -4.734058856964111,
"logps/chosen": -237.3783721923828,
"logps/rejected": -218.6456298828125,
"loss": 0.4381,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 0.3744103014469147,
"rewards/margins": 0.7368327975273132,
"rewards/rejected": -0.3624224364757538,
"step": 60
},
{
"epoch": 0.18,
"grad_norm": 243.25478495437125,
"learning_rate": 4.910416686333906e-08,
"logits/chosen": -4.496267795562744,
"logits/rejected": -4.724743843078613,
"logps/chosen": -248.19540405273438,
"logps/rejected": -222.69140625,
"loss": 0.4014,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 0.49465441703796387,
"rewards/margins": 0.9875491261482239,
"rewards/rejected": -0.49289458990097046,
"step": 70
},
{
"epoch": 0.2,
"grad_norm": 203.40516001004536,
"learning_rate": 4.841484508350678e-08,
"logits/chosen": -4.488529682159424,
"logits/rejected": -4.711686611175537,
"logps/chosen": -254.6974639892578,
"logps/rejected": -235.7099151611328,
"loss": 0.3429,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 0.652252197265625,
"rewards/margins": 1.1913691759109497,
"rewards/rejected": -0.5391170978546143,
"step": 80
},
{
"epoch": 0.23,
"grad_norm": 237.79736326938396,
"learning_rate": 4.7538072695020406e-08,
"logits/chosen": -4.58192777633667,
"logits/rejected": -4.797459602355957,
"logps/chosen": -243.06143188476562,
"logps/rejected": -210.63308715820312,
"loss": 0.3085,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 0.8151354789733887,
"rewards/margins": 1.595442533493042,
"rewards/rejected": -0.7803069353103638,
"step": 90
},
{
"epoch": 0.26,
"grad_norm": 222.07031689896021,
"learning_rate": 4.6480868814055416e-08,
"logits/chosen": -4.459914207458496,
"logits/rejected": -4.747165679931641,
"logps/chosen": -247.2704620361328,
"logps/rejected": -236.6487274169922,
"loss": 0.2984,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 0.9447771906852722,
"rewards/margins": 1.8987438678741455,
"rewards/rejected": -0.953966498374939,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": -4.55070686340332,
"eval_logits/rejected": -4.762002944946289,
"eval_logps/chosen": -390.7516174316406,
"eval_logps/rejected": -515.337158203125,
"eval_loss": 0.9402573704719543,
"eval_rewards/accuracies": 0.40625,
"eval_rewards/chosen": -0.027714576572179794,
"eval_rewards/margins": -0.3327641487121582,
"eval_rewards/rejected": 0.3050495684146881,
"eval_runtime": 97.8238,
"eval_samples_per_second": 20.445,
"eval_steps_per_second": 0.327,
"step": 100
},
{
"epoch": 0.28,
"grad_norm": 179.38114597248955,
"learning_rate": 4.525169702472916e-08,
"logits/chosen": -4.51773738861084,
"logits/rejected": -4.777291297912598,
"logps/chosen": -238.6410369873047,
"logps/rejected": -227.3874053955078,
"loss": 0.2711,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 1.0544707775115967,
"rewards/margins": 2.1246237754821777,
"rewards/rejected": -1.070152997970581,
"step": 110
},
{
"epoch": 0.31,
"grad_norm": 163.17108435846185,
"learning_rate": 4.386039762276975e-08,
"logits/chosen": -4.462746620178223,
"logits/rejected": -4.7056145668029785,
"logps/chosen": -262.86920166015625,
"logps/rejected": -238.99801635742188,
"loss": 0.267,
"rewards/accuracies": 0.96875,
"rewards/chosen": 1.3120397329330444,
"rewards/margins": 2.3509771823883057,
"rewards/rejected": -1.0389372110366821,
"step": 120
},
{
"epoch": 0.33,
"grad_norm": 229.70994586547334,
"learning_rate": 4.231810883773999e-08,
"logits/chosen": -4.532160758972168,
"logits/rejected": -4.803128719329834,
"logps/chosen": -246.3385009765625,
"logps/rejected": -227.83792114257812,
"loss": 0.2577,
"rewards/accuracies": 0.90625,
"rewards/chosen": 1.2105365991592407,
"rewards/margins": 2.461458921432495,
"rewards/rejected": -1.2509223222732544,
"step": 130
},
{
"epoch": 0.36,
"grad_norm": 152.87001146329087,
"learning_rate": 4.063717766448194e-08,
"logits/chosen": -4.559675216674805,
"logits/rejected": -4.842075824737549,
"logps/chosen": -272.3346252441406,
"logps/rejected": -246.54464721679688,
"loss": 0.2453,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 1.1860424280166626,
"rewards/margins": 2.6231608390808105,
"rewards/rejected": -1.4371181726455688,
"step": 140
},
{
"epoch": 0.38,
"grad_norm": 176.7311824941399,
"learning_rate": 3.8831061017632845e-08,
"logits/chosen": -4.557957172393799,
"logits/rejected": -4.812293529510498,
"logps/chosen": -239.32144165039062,
"logps/rejected": -232.82479858398438,
"loss": 0.2425,
"rewards/accuracies": 0.90625,
"rewards/chosen": 1.3096827268600464,
"rewards/margins": 2.680234432220459,
"rewards/rejected": -1.3705517053604126,
"step": 150
},
{
"epoch": 0.41,
"grad_norm": 179.5862213559593,
"learning_rate": 3.691421800053269e-08,
"logits/chosen": -4.614952564239502,
"logits/rejected": -4.799678802490234,
"logps/chosen": -238.4506378173828,
"logps/rejected": -229.4785614013672,
"loss": 0.2216,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 1.300586462020874,
"rewards/margins": 2.687537908554077,
"rewards/rejected": -1.3869514465332031,
"step": 160
},
{
"epoch": 0.43,
"grad_norm": 160.70849517962517,
"learning_rate": 3.490199415097892e-08,
"logits/chosen": -4.499081611633301,
"logits/rejected": -4.779529571533203,
"logps/chosen": -244.9915771484375,
"logps/rejected": -235.46743774414062,
"loss": 0.2108,
"rewards/accuracies": 0.9375,
"rewards/chosen": 1.6082652807235718,
"rewards/margins": 3.174710988998413,
"rewards/rejected": -1.5664453506469727,
"step": 170
},
{
"epoch": 0.46,
"grad_norm": 149.11333079529007,
"learning_rate": 3.2810498590513937e-08,
"logits/chosen": -4.59390926361084,
"logits/rejected": -4.832152366638184,
"logps/chosen": -222.95986938476562,
"logps/rejected": -211.53585815429688,
"loss": 0.2274,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 1.3903782367706299,
"rewards/margins": 2.9419426918029785,
"rewards/rejected": -1.5515644550323486,
"step": 180
},
{
"epoch": 0.49,
"grad_norm": 171.77003361632143,
"learning_rate": 3.065647506074306e-08,
"logits/chosen": -4.56182861328125,
"logits/rejected": -4.7075724601745605,
"logps/chosen": -245.95556640625,
"logps/rejected": -247.3394012451172,
"loss": 0.2299,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 1.4937294721603394,
"rewards/margins": 2.711061477661133,
"rewards/rejected": -1.2173320055007935,
"step": 190
},
{
"epoch": 0.51,
"grad_norm": 152.3542212939215,
"learning_rate": 2.8457167879118325e-08,
"logits/chosen": -4.556639194488525,
"logits/rejected": -4.735670566558838,
"logps/chosen": -237.8849639892578,
"logps/rejected": -229.5240020751953,
"loss": 0.2338,
"rewards/accuracies": 0.875,
"rewards/chosen": 1.4254279136657715,
"rewards/margins": 2.843172073364258,
"rewards/rejected": -1.4177442789077759,
"step": 200
},
{
"epoch": 0.51,
"eval_logits/chosen": -4.538640975952148,
"eval_logits/rejected": -4.758352279663086,
"eval_logps/chosen": -391.07916259765625,
"eval_logps/rejected": -514.2457885742188,
"eval_loss": 1.0996507406234741,
"eval_rewards/accuracies": 0.38671875,
"eval_rewards/chosen": -0.06046929210424423,
"eval_rewards/margins": -0.4746614694595337,
"eval_rewards/rejected": 0.41419219970703125,
"eval_runtime": 98.0841,
"eval_samples_per_second": 20.391,
"eval_steps_per_second": 0.326,
"step": 200
},
{
"epoch": 0.54,
"grad_norm": 139.65082338502864,
"learning_rate": 2.6230183887296952e-08,
"logits/chosen": -4.619709014892578,
"logits/rejected": -4.859663963317871,
"logps/chosen": -249.32113647460938,
"logps/rejected": -246.33468627929688,
"loss": 0.1967,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 1.6563961505889893,
"rewards/margins": 3.425267457962036,
"rewards/rejected": -1.768871545791626,
"step": 210
},
{
"epoch": 0.56,
"grad_norm": 147.42119588032455,
"learning_rate": 2.3993351497264626e-08,
"logits/chosen": -4.466108798980713,
"logits/rejected": -4.793113708496094,
"logps/chosen": -247.28756713867188,
"logps/rejected": -244.1797637939453,
"loss": 0.2159,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 1.5523773431777954,
"rewards/margins": 3.5030410289764404,
"rewards/rejected": -1.9506635665893555,
"step": 220
},
{
"epoch": 0.59,
"grad_norm": 158.88183877851495,
"learning_rate": 2.1764577963648613e-08,
"logits/chosen": -4.541924476623535,
"logits/rejected": -4.858447074890137,
"logps/chosen": -245.6726531982422,
"logps/rejected": -243.28677368164062,
"loss": 0.2197,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 1.5296446084976196,
"rewards/margins": 3.487210750579834,
"rewards/rejected": -1.957566499710083,
"step": 230
},
{
"epoch": 0.61,
"grad_norm": 157.203527489415,
"learning_rate": 1.9561706024845818e-08,
"logits/chosen": -4.5143561363220215,
"logits/rejected": -4.772491455078125,
"logps/chosen": -256.7393798828125,
"logps/rejected": -240.91226196289062,
"loss": 0.2123,
"rewards/accuracies": 0.9375,
"rewards/chosen": 1.6083800792694092,
"rewards/margins": 3.60624623298645,
"rewards/rejected": -1.9978656768798828,
"step": 240
},
{
"epoch": 0.64,
"grad_norm": 159.9556615524972,
"learning_rate": 1.740237106064383e-08,
"logits/chosen": -4.641883850097656,
"logits/rejected": -4.893360614776611,
"logps/chosen": -234.55264282226562,
"logps/rejected": -224.25631713867188,
"loss": 0.2024,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 1.414884090423584,
"rewards/margins": 3.079598903656006,
"rewards/rejected": -1.6647151708602905,
"step": 250
},
{
"epoch": 0.66,
"grad_norm": 198.60636587673295,
"learning_rate": 1.530385990987863e-08,
"logits/chosen": -4.517378330230713,
"logits/rejected": -4.799233913421631,
"logps/chosen": -238.4044189453125,
"logps/rejected": -242.4611358642578,
"loss": 0.2025,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 1.5644124746322632,
"rewards/margins": 3.8356785774230957,
"rewards/rejected": -2.271266460418701,
"step": 260
},
{
"epoch": 0.69,
"grad_norm": 186.10967020286805,
"learning_rate": 1.3282972478382409e-08,
"logits/chosen": -4.603947639465332,
"logits/rejected": -4.826247215270996,
"logps/chosen": -245.811279296875,
"logps/rejected": -236.3955841064453,
"loss": 0.2127,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 1.5221991539001465,
"rewards/margins": 3.6592516899108887,
"rewards/rejected": -2.137052536010742,
"step": 270
},
{
"epoch": 0.72,
"grad_norm": 144.59147008918274,
"learning_rate": 1.1355887245137383e-08,
"logits/chosen": -4.557550430297852,
"logits/rejected": -4.780216217041016,
"logps/chosen": -248.1402587890625,
"logps/rejected": -262.3576354980469,
"loss": 0.1884,
"rewards/accuracies": 0.96875,
"rewards/chosen": 1.636661171913147,
"rewards/margins": 3.578484296798706,
"rewards/rejected": -1.9418232440948486,
"step": 280
},
{
"epoch": 0.74,
"grad_norm": 331.8489693457681,
"learning_rate": 9.538031743343628e-09,
"logits/chosen": -4.426544666290283,
"logits/rejected": -4.761611461639404,
"logps/chosen": -255.4153289794922,
"logps/rejected": -234.84487915039062,
"loss": 0.1966,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 1.7193052768707275,
"rewards/margins": 3.6408512592315674,
"rewards/rejected": -1.9215457439422607,
"step": 290
},
{
"epoch": 0.77,
"grad_norm": 177.15049850318,
"learning_rate": 7.843959053281662e-09,
"logits/chosen": -4.541173934936523,
"logits/rejected": -4.741909503936768,
"logps/chosen": -236.41854858398438,
"logps/rejected": -236.483642578125,
"loss": 0.2158,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": 1.6989631652832031,
"rewards/margins": 3.7507786750793457,
"rewards/rejected": -2.0518155097961426,
"step": 300
},
{
"epoch": 0.77,
"eval_logits/chosen": -4.539734840393066,
"eval_logits/rejected": -4.759631633758545,
"eval_logps/chosen": -392.2895812988281,
"eval_logps/rejected": -515.406494140625,
"eval_loss": 1.1359957456588745,
"eval_rewards/accuracies": 0.41015625,
"eval_rewards/chosen": -0.18151262402534485,
"eval_rewards/margins": -0.47963014245033264,
"eval_rewards/rejected": 0.2981175184249878,
"eval_runtime": 97.905,
"eval_samples_per_second": 20.428,
"eval_steps_per_second": 0.327,
"step": 300
},
{
"epoch": 0.79,
"grad_norm": 159.8961908197972,
"learning_rate": 6.28723129572247e-09,
"logits/chosen": -4.461672782897949,
"logits/rejected": -4.776505470275879,
"logps/chosen": -244.0063934326172,
"logps/rejected": -244.4510955810547,
"loss": 0.2028,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 1.7133222818374634,
"rewards/margins": 3.9386374950408936,
"rewards/rejected": -2.2253153324127197,
"step": 310
},
{
"epoch": 0.82,
"grad_norm": 185.55678167306448,
"learning_rate": 4.880311058593617e-09,
"logits/chosen": -4.521292209625244,
"logits/rejected": -4.848372936248779,
"logps/chosen": -230.04397583007812,
"logps/rejected": -226.9331817626953,
"loss": 0.2095,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 1.5120347738265991,
"rewards/margins": 3.560204267501831,
"rewards/rejected": -2.0481698513031006,
"step": 320
},
{
"epoch": 0.84,
"grad_norm": 155.26912676521727,
"learning_rate": 3.6344616260994942e-09,
"logits/chosen": -4.473151683807373,
"logits/rejected": -4.766911029815674,
"logps/chosen": -251.7646484375,
"logps/rejected": -254.7379913330078,
"loss": 0.1928,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 1.638421654701233,
"rewards/margins": 3.8422539234161377,
"rewards/rejected": -2.2038321495056152,
"step": 330
},
{
"epoch": 0.87,
"grad_norm": 151.70242269299357,
"learning_rate": 2.5596568090246547e-09,
"logits/chosen": -4.4894232749938965,
"logits/rejected": -4.816695213317871,
"logps/chosen": -255.334716796875,
"logps/rejected": -228.4678192138672,
"loss": 0.1963,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 1.8811309337615967,
"rewards/margins": 4.049709320068359,
"rewards/rejected": -2.1685783863067627,
"step": 340
},
{
"epoch": 0.9,
"grad_norm": 150.47860807724385,
"learning_rate": 1.6645010980854079e-09,
"logits/chosen": -4.505433559417725,
"logits/rejected": -4.675290584564209,
"logps/chosen": -240.54714965820312,
"logps/rejected": -246.03665161132812,
"loss": 0.2168,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 1.4924026727676392,
"rewards/margins": 3.3190674781799316,
"rewards/rejected": -1.8266645669937134,
"step": 350
},
{
"epoch": 0.92,
"grad_norm": 165.46679700251414,
"learning_rate": 9.561607795526006e-10,
"logits/chosen": -4.49678373336792,
"logits/rejected": -4.711674690246582,
"logps/chosen": -246.95388793945312,
"logps/rejected": -247.2928009033203,
"loss": 0.2121,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 1.468100905418396,
"rewards/margins": 3.451129913330078,
"rewards/rejected": -1.9830286502838135,
"step": 360
},
{
"epoch": 0.95,
"grad_norm": 149.25359071163066,
"learning_rate": 4.403065646083809e-10,
"logits/chosen": -4.518364906311035,
"logits/rejected": -4.680220603942871,
"logps/chosen": -241.2300262451172,
"logps/rejected": -255.6038818359375,
"loss": 0.2103,
"rewards/accuracies": 0.875,
"rewards/chosen": 1.5591168403625488,
"rewards/margins": 3.434800386428833,
"rewards/rejected": -1.8756835460662842,
"step": 370
},
{
"epoch": 0.97,
"grad_norm": 160.57572024314433,
"learning_rate": 1.2106819172520434e-10,
"logits/chosen": -4.618407726287842,
"logits/rejected": -4.8883843421936035,
"logps/chosen": -246.15853881835938,
"logps/rejected": -243.1090087890625,
"loss": 0.2034,
"rewards/accuracies": 0.90625,
"rewards/chosen": 1.5924150943756104,
"rewards/margins": 3.8508572578430176,
"rewards/rejected": -2.2584421634674072,
"step": 380
},
{
"epoch": 1.0,
"grad_norm": 235.40189038757265,
"learning_rate": 1.0013655036916758e-12,
"logits/chosen": -4.611303329467773,
"logits/rejected": -4.857443809509277,
"logps/chosen": -242.1800994873047,
"logps/rejected": -241.1270294189453,
"loss": 0.199,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 1.7492481470108032,
"rewards/margins": 3.8104407787323,
"rewards/rejected": -2.0611929893493652,
"step": 390
},
{
"epoch": 1.0,
"step": 391,
"total_flos": 0.0,
"train_loss": 0.29024992444935965,
"train_runtime": 6148.7126,
"train_samples_per_second": 8.132,
"train_steps_per_second": 0.064
}
],
"logging_steps": 10,
"max_steps": 391,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}