zephyr-7b-dpo-full / trainer_state.json
RikkiXu's picture
Model save
bdd166d verified
raw history blame
No virus
10.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9974424552429667,
"eval_steps": 500,
"global_step": 195,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 1310.024749740419,
"learning_rate": 2.5e-08,
"logits/chosen": -5.0504608154296875,
"logits/rejected": -5.35328483581543,
"logps/chosen": -242.7239990234375,
"logps/rejected": -185.90835571289062,
"loss": 0.6893,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.05,
"grad_norm": 1343.8700325036616,
"learning_rate": 2.5e-07,
"logits/chosen": -4.959235191345215,
"logits/rejected": -5.051504135131836,
"logps/chosen": -226.43630981445312,
"logps/rejected": -216.47547912597656,
"loss": 0.7205,
"rewards/accuracies": 0.4479166567325592,
"rewards/chosen": 0.07974544167518616,
"rewards/margins": 0.013408761471509933,
"rewards/rejected": 0.06633666902780533,
"step": 10
},
{
"epoch": 0.1,
"grad_norm": 1443.7667771719773,
"learning_rate": 5e-07,
"logits/chosen": -4.906929969787598,
"logits/rejected": -5.0118937492370605,
"logps/chosen": -240.65188598632812,
"logps/rejected": -220.84378051757812,
"loss": 0.6926,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.7429171204566956,
"rewards/margins": 1.1278517246246338,
"rewards/rejected": -0.38493460416793823,
"step": 20
},
{
"epoch": 0.15,
"grad_norm": 1641.6770420153719,
"learning_rate": 4.959823971496574e-07,
"logits/chosen": -4.913812637329102,
"logits/rejected": -5.012935638427734,
"logps/chosen": -238.8269805908203,
"logps/rejected": -228.05404663085938,
"loss": 0.8116,
"rewards/accuracies": 0.6343749761581421,
"rewards/chosen": 1.8061437606811523,
"rewards/margins": 4.523256301879883,
"rewards/rejected": -2.7171127796173096,
"step": 30
},
{
"epoch": 0.2,
"grad_norm": 1382.4291689510926,
"learning_rate": 4.840587176599343e-07,
"logits/chosen": -4.964416980743408,
"logits/rejected": -5.0027852058410645,
"logps/chosen": -249.1742706298828,
"logps/rejected": -235.87576293945312,
"loss": 0.9983,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": 1.3685696125030518,
"rewards/margins": 4.053561210632324,
"rewards/rejected": -2.6849913597106934,
"step": 40
},
{
"epoch": 0.26,
"grad_norm": 1428.1508779981239,
"learning_rate": 4.646121984004665e-07,
"logits/chosen": -4.990395545959473,
"logits/rejected": -5.134562015533447,
"logps/chosen": -251.7528076171875,
"logps/rejected": -226.17306518554688,
"loss": 0.9987,
"rewards/accuracies": 0.6468750238418579,
"rewards/chosen": 2.2698659896850586,
"rewards/margins": 5.616934299468994,
"rewards/rejected": -3.3470687866210938,
"step": 50
},
{
"epoch": 0.31,
"grad_norm": 1429.7364912941882,
"learning_rate": 4.3826786650090273e-07,
"logits/chosen": -5.023388385772705,
"logits/rejected": -5.144254684448242,
"logps/chosen": -250.6563720703125,
"logps/rejected": -241.12484741210938,
"loss": 0.993,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": 1.217611312866211,
"rewards/margins": 6.1895647048950195,
"rewards/rejected": -4.97195291519165,
"step": 60
},
{
"epoch": 0.36,
"grad_norm": 1385.9054301583744,
"learning_rate": 4.058724504646834e-07,
"logits/chosen": -4.992190361022949,
"logits/rejected": -5.075345039367676,
"logps/chosen": -256.97406005859375,
"logps/rejected": -242.94003295898438,
"loss": 1.1539,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": 2.1734097003936768,
"rewards/margins": 5.453003883361816,
"rewards/rejected": -3.2795944213867188,
"step": 70
},
{
"epoch": 0.41,
"grad_norm": 1267.3737422156325,
"learning_rate": 3.6846716561824967e-07,
"logits/chosen": -5.066686630249023,
"logits/rejected": -5.165375709533691,
"logps/chosen": -246.781982421875,
"logps/rejected": -232.3020477294922,
"loss": 1.1127,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": 2.182149887084961,
"rewards/margins": 6.110042095184326,
"rewards/rejected": -3.927891492843628,
"step": 80
},
{
"epoch": 0.46,
"grad_norm": 1414.9882610729042,
"learning_rate": 3.272542485937368e-07,
"logits/chosen": -5.056512355804443,
"logits/rejected": -5.19997501373291,
"logps/chosen": -236.23886108398438,
"logps/rejected": -219.4969940185547,
"loss": 1.1651,
"rewards/accuracies": 0.59375,
"rewards/chosen": 2.3071811199188232,
"rewards/margins": 4.593169212341309,
"rewards/rejected": -2.2859878540039062,
"step": 90
},
{
"epoch": 0.51,
"grad_norm": 1730.7459110414102,
"learning_rate": 2.8355831645441387e-07,
"logits/chosen": -5.051321506500244,
"logits/rejected": -5.197503089904785,
"logps/chosen": -245.94680786132812,
"logps/rejected": -224.7979278564453,
"loss": 1.1049,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": 2.0447471141815186,
"rewards/margins": 3.989384412765503,
"rewards/rejected": -1.9446370601654053,
"step": 100
},
{
"epoch": 0.56,
"grad_norm": 1376.721155787266,
"learning_rate": 2.3878379241237134e-07,
"logits/chosen": -5.05279541015625,
"logits/rejected": -5.2380499839782715,
"logps/chosen": -231.46408081054688,
"logps/rejected": -221.2686309814453,
"loss": 1.0653,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 2.9433412551879883,
"rewards/margins": 7.433489799499512,
"rewards/rejected": -4.490148544311523,
"step": 110
},
{
"epoch": 0.61,
"grad_norm": 1298.5481767381427,
"learning_rate": 1.9436976651092142e-07,
"logits/chosen": -4.989577293395996,
"logits/rejected": -5.143449306488037,
"logps/chosen": -250.3534698486328,
"logps/rejected": -237.04074096679688,
"loss": 1.0694,
"rewards/accuracies": 0.6343749761581421,
"rewards/chosen": 2.3243861198425293,
"rewards/margins": 8.470600128173828,
"rewards/rejected": -6.146214485168457,
"step": 120
},
{
"epoch": 0.66,
"grad_norm": 1456.9702892975145,
"learning_rate": 1.517437420865191e-07,
"logits/chosen": -5.036610126495361,
"logits/rejected": -5.181552886962891,
"logps/chosen": -234.2519073486328,
"logps/rejected": -226.05050659179688,
"loss": 1.1374,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 2.612969160079956,
"rewards/margins": 6.129396915435791,
"rewards/rejected": -3.516427516937256,
"step": 130
},
{
"epoch": 0.72,
"grad_norm": 1414.11944634508,
"learning_rate": 1.1227575463697439e-07,
"logits/chosen": -5.011117458343506,
"logits/rejected": -5.0677995681762695,
"logps/chosen": -246.2405242919922,
"logps/rejected": -240.97647094726562,
"loss": 1.0012,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 2.1312901973724365,
"rewards/margins": 6.49268102645874,
"rewards/rejected": -4.361390590667725,
"step": 140
},
{
"epoch": 0.77,
"grad_norm": 1391.6252979817953,
"learning_rate": 7.723433775328384e-08,
"logits/chosen": -5.031737327575684,
"logits/rejected": -5.141982078552246,
"logps/chosen": -247.31640625,
"logps/rejected": -245.01284790039062,
"loss": 1.0468,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 3.413778781890869,
"rewards/margins": 8.60617446899414,
"rewards/rejected": -5.19239616394043,
"step": 150
},
{
"epoch": 0.82,
"grad_norm": 1305.4800329449993,
"learning_rate": 4.774575140626316e-08,
"logits/chosen": -4.959289073944092,
"logits/rejected": -5.040767192840576,
"logps/chosen": -253.7027587890625,
"logps/rejected": -250.91659545898438,
"loss": 0.9992,
"rewards/accuracies": 0.6781250238418579,
"rewards/chosen": 3.046278476715088,
"rewards/margins": 8.344175338745117,
"rewards/rejected": -5.297896862030029,
"step": 160
},
{
"epoch": 0.87,
"grad_norm": 1228.1104796269808,
"learning_rate": 2.475778302439524e-08,
"logits/chosen": -5.096159934997559,
"logits/rejected": -5.178959369659424,
"logps/chosen": -251.2628631591797,
"logps/rejected": -233.06857299804688,
"loss": 1.0057,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": 2.824694871902466,
"rewards/margins": 6.200740814208984,
"rewards/rejected": -3.3760459423065186,
"step": 170
},
{
"epoch": 0.92,
"grad_norm": 1348.827014256151,
"learning_rate": 9.009284826036689e-09,
"logits/chosen": -4.995651721954346,
"logits/rejected": -5.102165222167969,
"logps/chosen": -237.61990356445312,
"logps/rejected": -232.7886962890625,
"loss": 0.9321,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": 2.423119068145752,
"rewards/margins": 4.8792009353637695,
"rewards/rejected": -2.4560813903808594,
"step": 180
},
{
"epoch": 0.97,
"grad_norm": 1117.1672982866971,
"learning_rate": 1.0064265011902328e-09,
"logits/chosen": -5.071808815002441,
"logits/rejected": -5.110179901123047,
"logps/chosen": -236.14224243164062,
"logps/rejected": -233.5693359375,
"loss": 0.9891,
"rewards/accuracies": 0.640625,
"rewards/chosen": 1.8652112483978271,
"rewards/margins": 5.820201873779297,
"rewards/rejected": -3.9549899101257324,
"step": 190
},
{
"epoch": 1.0,
"step": 195,
"total_flos": 0.0,
"train_loss": 0.9975380127246564,
"train_runtime": 5482.1546,
"train_samples_per_second": 9.12,
"train_steps_per_second": 0.036
}
],
"logging_steps": 10,
"max_steps": 195,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}