optune_iter0 / iter0 /trainer_state.json
Lichang-Chen
update
528119a
raw
history blame contribute delete
No virus
18.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 375,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.3157894736842104e-08,
"logits/generated": -3.0232396125793457,
"logits/real": -2.996844530105591,
"logps/generated": -291.56793212890625,
"logps/real": -340.7873840332031,
"loss": 0.3645,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.03,
"learning_rate": 1.3157894736842104e-07,
"logits/generated": -2.977639675140381,
"logits/real": -2.9781062602996826,
"logps/generated": -338.7113037109375,
"logps/real": -360.56146240234375,
"loss": 0.3584,
"rewards/accuracies": 0.5555555820465088,
"rewards/generated": 0.10237760096788406,
"rewards/margins": 0.04294492304325104,
"rewards/real": 0.1453225314617157,
"step": 10
},
{
"epoch": 0.05,
"learning_rate": 2.631578947368421e-07,
"logits/generated": -2.9867026805877686,
"logits/real": -2.990659236907959,
"logps/generated": -371.62164306640625,
"logps/real": -372.09954833984375,
"loss": 0.3391,
"rewards/accuracies": 0.59375,
"rewards/generated": 0.5660532712936401,
"rewards/margins": 0.15894225239753723,
"rewards/real": 0.7249955534934998,
"step": 20
},
{
"epoch": 0.08,
"learning_rate": 3.9473684210526315e-07,
"logits/generated": -2.940207004547119,
"logits/real": -2.945539951324463,
"logps/generated": -323.21282958984375,
"logps/real": -323.20733642578125,
"loss": 0.3061,
"rewards/accuracies": 0.668749988079071,
"rewards/generated": 0.2973577380180359,
"rewards/margins": 0.29796674847602844,
"rewards/real": 0.5953244566917419,
"step": 30
},
{
"epoch": 0.11,
"learning_rate": 4.970326409495548e-07,
"logits/generated": -2.849879026412964,
"logits/real": -2.868879556655884,
"logps/generated": -339.9267578125,
"logps/real": -348.660400390625,
"loss": 0.3043,
"rewards/accuracies": 0.668749988079071,
"rewards/generated": -0.18336713314056396,
"rewards/margins": 0.4493914246559143,
"rewards/real": 0.26602429151535034,
"step": 40
},
{
"epoch": 0.13,
"learning_rate": 4.821958456973294e-07,
"logits/generated": -2.8244385719299316,
"logits/real": -2.819532871246338,
"logps/generated": -345.12353515625,
"logps/real": -345.24334716796875,
"loss": 0.2707,
"rewards/accuracies": 0.762499988079071,
"rewards/generated": -1.0958898067474365,
"rewards/margins": 0.7357537150382996,
"rewards/real": -0.36013612151145935,
"step": 50
},
{
"epoch": 0.16,
"learning_rate": 4.673590504451038e-07,
"logits/generated": -2.7510242462158203,
"logits/real": -2.744049549102783,
"logps/generated": -343.3367614746094,
"logps/real": -353.568115234375,
"loss": 0.2658,
"rewards/accuracies": 0.706250011920929,
"rewards/generated": -1.349844217300415,
"rewards/margins": 0.7489473819732666,
"rewards/real": -0.6008970141410828,
"step": 60
},
{
"epoch": 0.19,
"learning_rate": 4.5252225519287835e-07,
"logits/generated": -2.787135362625122,
"logits/real": -2.7906911373138428,
"logps/generated": -380.27276611328125,
"logps/real": -390.9748840332031,
"loss": 0.2682,
"rewards/accuracies": 0.731249988079071,
"rewards/generated": -1.3749873638153076,
"rewards/margins": 0.8838955760002136,
"rewards/real": -0.49109163880348206,
"step": 70
},
{
"epoch": 0.21,
"learning_rate": 4.376854599406528e-07,
"logits/generated": -2.7812376022338867,
"logits/real": -2.79952073097229,
"logps/generated": -352.7367858886719,
"logps/real": -343.9632873535156,
"loss": 0.2784,
"rewards/accuracies": 0.78125,
"rewards/generated": -1.779193639755249,
"rewards/margins": 1.1407415866851807,
"rewards/real": -0.6384519934654236,
"step": 80
},
{
"epoch": 0.24,
"learning_rate": 4.228486646884273e-07,
"logits/generated": -2.80656099319458,
"logits/real": -2.7876017093658447,
"logps/generated": -369.83990478515625,
"logps/real": -381.7880859375,
"loss": 0.2742,
"rewards/accuracies": 0.7250000238418579,
"rewards/generated": -1.4693442583084106,
"rewards/margins": 0.8362933993339539,
"rewards/real": -0.6330507397651672,
"step": 90
},
{
"epoch": 0.27,
"learning_rate": 4.0801186943620176e-07,
"logits/generated": -2.7452383041381836,
"logits/real": -2.7657182216644287,
"logps/generated": -354.4010314941406,
"logps/real": -359.81219482421875,
"loss": 0.2657,
"rewards/accuracies": 0.6937500238418579,
"rewards/generated": -1.3197325468063354,
"rewards/margins": 0.8461551666259766,
"rewards/real": -0.4735774099826813,
"step": 100
},
{
"epoch": 0.29,
"learning_rate": 3.931750741839762e-07,
"logits/generated": -2.8132920265197754,
"logits/real": -2.8043882846832275,
"logps/generated": -357.61383056640625,
"logps/real": -354.3050537109375,
"loss": 0.2716,
"rewards/accuracies": 0.7250000238418579,
"rewards/generated": -1.6764816045761108,
"rewards/margins": 0.9828389883041382,
"rewards/real": -0.6936424374580383,
"step": 110
},
{
"epoch": 0.32,
"learning_rate": 3.7833827893175073e-07,
"logits/generated": -2.825605869293213,
"logits/real": -2.8103888034820557,
"logps/generated": -365.675537109375,
"logps/real": -368.09197998046875,
"loss": 0.2543,
"rewards/accuracies": 0.762499988079071,
"rewards/generated": -1.922782301902771,
"rewards/margins": 1.292311191558838,
"rewards/real": -0.6304711103439331,
"step": 120
},
{
"epoch": 0.35,
"learning_rate": 3.635014836795252e-07,
"logits/generated": -2.7962846755981445,
"logits/real": -2.795644521713257,
"logps/generated": -340.1669006347656,
"logps/real": -348.66583251953125,
"loss": 0.2602,
"rewards/accuracies": 0.6312500238418579,
"rewards/generated": -1.8341821432113647,
"rewards/margins": 0.9289523959159851,
"rewards/real": -0.9052297472953796,
"step": 130
},
{
"epoch": 0.37,
"learning_rate": 3.486646884272997e-07,
"logits/generated": -2.7868337631225586,
"logits/real": -2.7795639038085938,
"logps/generated": -358.3647766113281,
"logps/real": -362.6192321777344,
"loss": 0.2584,
"rewards/accuracies": 0.7437499761581421,
"rewards/generated": -1.9304630756378174,
"rewards/margins": 1.0024542808532715,
"rewards/real": -0.9280086755752563,
"step": 140
},
{
"epoch": 0.4,
"learning_rate": 3.3382789317507414e-07,
"logits/generated": -2.7806317806243896,
"logits/real": -2.773284435272217,
"logps/generated": -392.99273681640625,
"logps/real": -388.6888732910156,
"loss": 0.2429,
"rewards/accuracies": 0.668749988079071,
"rewards/generated": -2.132871389389038,
"rewards/margins": 0.8601642847061157,
"rewards/real": -1.272707223892212,
"step": 150
},
{
"epoch": 0.43,
"learning_rate": 3.189910979228487e-07,
"logits/generated": -2.7479450702667236,
"logits/real": -2.7415106296539307,
"logps/generated": -384.2051086425781,
"logps/real": -382.9107360839844,
"loss": 0.2518,
"rewards/accuracies": 0.7562500238418579,
"rewards/generated": -2.3066015243530273,
"rewards/margins": 1.2394059896469116,
"rewards/real": -1.0671956539154053,
"step": 160
},
{
"epoch": 0.45,
"learning_rate": 3.0415430267062316e-07,
"logits/generated": -2.7207372188568115,
"logits/real": -2.6968023777008057,
"logps/generated": -351.6153259277344,
"logps/real": -358.0864562988281,
"loss": 0.247,
"rewards/accuracies": 0.8062499761581421,
"rewards/generated": -2.2227485179901123,
"rewards/margins": 1.4021742343902588,
"rewards/real": -0.820574164390564,
"step": 170
},
{
"epoch": 0.48,
"learning_rate": 2.893175074183976e-07,
"logits/generated": -2.687243700027466,
"logits/real": -2.6896092891693115,
"logps/generated": -340.67498779296875,
"logps/real": -325.22259521484375,
"loss": 0.2683,
"rewards/accuracies": 0.762499988079071,
"rewards/generated": -2.342029094696045,
"rewards/margins": 1.0233131647109985,
"rewards/real": -1.3187161684036255,
"step": 180
},
{
"epoch": 0.51,
"learning_rate": 2.744807121661721e-07,
"logits/generated": -2.709791421890259,
"logits/real": -2.73317289352417,
"logps/generated": -396.40606689453125,
"logps/real": -388.1844482421875,
"loss": 0.2442,
"rewards/accuracies": 0.78125,
"rewards/generated": -2.465156078338623,
"rewards/margins": 1.4016426801681519,
"rewards/real": -1.063513159751892,
"step": 190
},
{
"epoch": 0.53,
"learning_rate": 2.596439169139466e-07,
"logits/generated": -2.7428107261657715,
"logits/real": -2.7355589866638184,
"logps/generated": -368.4299011230469,
"logps/real": -373.0939025878906,
"loss": 0.2451,
"rewards/accuracies": 0.75,
"rewards/generated": -2.1397910118103027,
"rewards/margins": 1.308272123336792,
"rewards/real": -0.8315190076828003,
"step": 200
},
{
"epoch": 0.56,
"learning_rate": 2.4480712166172106e-07,
"logits/generated": -2.703258752822876,
"logits/real": -2.693305015563965,
"logps/generated": -339.4871826171875,
"logps/real": -326.2037658691406,
"loss": 0.2395,
"rewards/accuracies": 0.78125,
"rewards/generated": -2.3122100830078125,
"rewards/margins": 1.2954694032669067,
"rewards/real": -1.0167406797409058,
"step": 210
},
{
"epoch": 0.59,
"learning_rate": 2.2997032640949554e-07,
"logits/generated": -2.7212119102478027,
"logits/real": -2.716545581817627,
"logps/generated": -339.74267578125,
"logps/real": -346.297607421875,
"loss": 0.2458,
"rewards/accuracies": 0.7437499761581421,
"rewards/generated": -1.9660396575927734,
"rewards/margins": 1.078840970993042,
"rewards/real": -0.8871987462043762,
"step": 220
},
{
"epoch": 0.61,
"learning_rate": 2.1513353115727e-07,
"logits/generated": -2.77765154838562,
"logits/real": -2.7591769695281982,
"logps/generated": -386.1648864746094,
"logps/real": -381.2674560546875,
"loss": 0.2324,
"rewards/accuracies": 0.831250011920929,
"rewards/generated": -2.414008617401123,
"rewards/margins": 1.5327675342559814,
"rewards/real": -0.8812410235404968,
"step": 230
},
{
"epoch": 0.64,
"learning_rate": 2.0029673590504451e-07,
"logits/generated": -2.7021536827087402,
"logits/real": -2.70768666267395,
"logps/generated": -354.3561706542969,
"logps/real": -353.68212890625,
"loss": 0.2492,
"rewards/accuracies": 0.706250011920929,
"rewards/generated": -2.4238786697387695,
"rewards/margins": 1.27177894115448,
"rewards/real": -1.152099847793579,
"step": 240
},
{
"epoch": 0.67,
"learning_rate": 1.8545994065281897e-07,
"logits/generated": -2.7076125144958496,
"logits/real": -2.7352890968322754,
"logps/generated": -365.26214599609375,
"logps/real": -355.78564453125,
"loss": 0.2426,
"rewards/accuracies": 0.762499988079071,
"rewards/generated": -2.567624092102051,
"rewards/margins": 1.3714964389801025,
"rewards/real": -1.1961278915405273,
"step": 250
},
{
"epoch": 0.69,
"learning_rate": 1.7062314540059346e-07,
"logits/generated": -2.7474026679992676,
"logits/real": -2.733513593673706,
"logps/generated": -370.26568603515625,
"logps/real": -366.9493713378906,
"loss": 0.2496,
"rewards/accuracies": 0.793749988079071,
"rewards/generated": -2.5266714096069336,
"rewards/margins": 1.390491247177124,
"rewards/real": -1.1361799240112305,
"step": 260
},
{
"epoch": 0.72,
"learning_rate": 1.5578635014836795e-07,
"logits/generated": -2.7382729053497314,
"logits/real": -2.7590155601501465,
"logps/generated": -339.4982604980469,
"logps/real": -354.5415954589844,
"loss": 0.2407,
"rewards/accuracies": 0.731249988079071,
"rewards/generated": -2.284700870513916,
"rewards/margins": 1.1465342044830322,
"rewards/real": -1.1381666660308838,
"step": 270
},
{
"epoch": 0.75,
"learning_rate": 1.4094955489614243e-07,
"logits/generated": -2.6945815086364746,
"logits/real": -2.695988416671753,
"logps/generated": -373.51385498046875,
"logps/real": -350.8352966308594,
"loss": 0.2303,
"rewards/accuracies": 0.762499988079071,
"rewards/generated": -2.4922609329223633,
"rewards/margins": 1.3119118213653564,
"rewards/real": -1.1803491115570068,
"step": 280
},
{
"epoch": 0.77,
"learning_rate": 1.261127596439169e-07,
"logits/generated": -2.7670834064483643,
"logits/real": -2.7600436210632324,
"logps/generated": -344.09136962890625,
"logps/real": -337.3023376464844,
"loss": 0.2435,
"rewards/accuracies": 0.8062499761581421,
"rewards/generated": -2.637000799179077,
"rewards/margins": 1.540818452835083,
"rewards/real": -1.0961825847625732,
"step": 290
},
{
"epoch": 0.8,
"learning_rate": 1.1127596439169139e-07,
"logits/generated": -2.6610119342803955,
"logits/real": -2.6668756008148193,
"logps/generated": -342.7873229980469,
"logps/real": -330.2555847167969,
"loss": 0.2424,
"rewards/accuracies": 0.800000011920929,
"rewards/generated": -2.5096726417541504,
"rewards/margins": 1.4919517040252686,
"rewards/real": -1.0177206993103027,
"step": 300
},
{
"epoch": 0.83,
"learning_rate": 9.643916913946587e-08,
"logits/generated": -2.7119061946868896,
"logits/real": -2.736443519592285,
"logps/generated": -364.1079406738281,
"logps/real": -365.68963623046875,
"loss": 0.2316,
"rewards/accuracies": 0.793749988079071,
"rewards/generated": -3.0718140602111816,
"rewards/margins": 1.5135959386825562,
"rewards/real": -1.5582183599472046,
"step": 310
},
{
"epoch": 0.85,
"learning_rate": 8.160237388724035e-08,
"logits/generated": -2.7183382511138916,
"logits/real": -2.735018253326416,
"logps/generated": -394.9755859375,
"logps/real": -377.31427001953125,
"loss": 0.2359,
"rewards/accuracies": 0.768750011920929,
"rewards/generated": -2.73679256439209,
"rewards/margins": 1.1780710220336914,
"rewards/real": -1.558721661567688,
"step": 320
},
{
"epoch": 0.88,
"learning_rate": 6.676557863501484e-08,
"logits/generated": -2.7515358924865723,
"logits/real": -2.742940664291382,
"logps/generated": -388.3130187988281,
"logps/real": -372.29437255859375,
"loss": 0.234,
"rewards/accuracies": 0.706250011920929,
"rewards/generated": -2.38558030128479,
"rewards/margins": 1.1235764026641846,
"rewards/real": -1.2620038986206055,
"step": 330
},
{
"epoch": 0.91,
"learning_rate": 5.192878338278932e-08,
"logits/generated": -2.695279121398926,
"logits/real": -2.6978631019592285,
"logps/generated": -365.2856140136719,
"logps/real": -363.0904235839844,
"loss": 0.2303,
"rewards/accuracies": 0.762499988079071,
"rewards/generated": -2.723789930343628,
"rewards/margins": 1.42342209815979,
"rewards/real": -1.300368070602417,
"step": 340
},
{
"epoch": 0.93,
"learning_rate": 3.709198813056379e-08,
"logits/generated": -2.662724018096924,
"logits/real": -2.675875186920166,
"logps/generated": -338.28704833984375,
"logps/real": -342.17462158203125,
"loss": 0.2333,
"rewards/accuracies": 0.768750011920929,
"rewards/generated": -2.581637144088745,
"rewards/margins": 1.3430696725845337,
"rewards/real": -1.2385674715042114,
"step": 350
},
{
"epoch": 0.96,
"learning_rate": 2.225519287833828e-08,
"logits/generated": -2.6803088188171387,
"logits/real": -2.704144239425659,
"logps/generated": -356.77703857421875,
"logps/real": -359.313720703125,
"loss": 0.2368,
"rewards/accuracies": 0.75,
"rewards/generated": -2.8369853496551514,
"rewards/margins": 1.2829147577285767,
"rewards/real": -1.554070234298706,
"step": 360
},
{
"epoch": 0.99,
"learning_rate": 7.418397626112759e-09,
"logits/generated": -2.7113311290740967,
"logits/real": -2.7457308769226074,
"logps/generated": -404.06756591796875,
"logps/real": -393.70843505859375,
"loss": 0.2369,
"rewards/accuracies": 0.78125,
"rewards/generated": -2.8109331130981445,
"rewards/margins": 1.4008702039718628,
"rewards/real": -1.4100630283355713,
"step": 370
},
{
"epoch": 1.0,
"step": 375,
"total_flos": 0.0,
"train_loss": 0.2572693068186442,
"train_runtime": 6192.5005,
"train_samples_per_second": 7.751,
"train_steps_per_second": 0.061
}
],
"logging_steps": 10,
"max_steps": 375,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}