|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 375, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3157894736842104e-08, |
|
"logits/generated": -3.0232396125793457, |
|
"logits/real": -2.996844530105591, |
|
"logps/generated": -291.56793212890625, |
|
"logps/real": -340.7873840332031, |
|
"loss": 0.3645, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3157894736842104e-07, |
|
"logits/generated": -2.977639675140381, |
|
"logits/real": -2.9781062602996826, |
|
"logps/generated": -338.7113037109375, |
|
"logps/real": -360.56146240234375, |
|
"loss": 0.3584, |
|
"rewards/accuracies": 0.5555555820465088, |
|
"rewards/generated": 0.10237760096788406, |
|
"rewards/margins": 0.04294492304325104, |
|
"rewards/real": 0.1453225314617157, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.631578947368421e-07, |
|
"logits/generated": -2.9867026805877686, |
|
"logits/real": -2.990659236907959, |
|
"logps/generated": -371.62164306640625, |
|
"logps/real": -372.09954833984375, |
|
"loss": 0.3391, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/generated": 0.5660532712936401, |
|
"rewards/margins": 0.15894225239753723, |
|
"rewards/real": 0.7249955534934998, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.9473684210526315e-07, |
|
"logits/generated": -2.940207004547119, |
|
"logits/real": -2.945539951324463, |
|
"logps/generated": -323.21282958984375, |
|
"logps/real": -323.20733642578125, |
|
"loss": 0.3061, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/generated": 0.2973577380180359, |
|
"rewards/margins": 0.29796674847602844, |
|
"rewards/real": 0.5953244566917419, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.970326409495548e-07, |
|
"logits/generated": -2.849879026412964, |
|
"logits/real": -2.868879556655884, |
|
"logps/generated": -339.9267578125, |
|
"logps/real": -348.660400390625, |
|
"loss": 0.3043, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/generated": -0.18336713314056396, |
|
"rewards/margins": 0.4493914246559143, |
|
"rewards/real": 0.26602429151535034, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.821958456973294e-07, |
|
"logits/generated": -2.8244385719299316, |
|
"logits/real": -2.819532871246338, |
|
"logps/generated": -345.12353515625, |
|
"logps/real": -345.24334716796875, |
|
"loss": 0.2707, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/generated": -1.0958898067474365, |
|
"rewards/margins": 0.7357537150382996, |
|
"rewards/real": -0.36013612151145935, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.673590504451038e-07, |
|
"logits/generated": -2.7510242462158203, |
|
"logits/real": -2.744049549102783, |
|
"logps/generated": -343.3367614746094, |
|
"logps/real": -353.568115234375, |
|
"loss": 0.2658, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/generated": -1.349844217300415, |
|
"rewards/margins": 0.7489473819732666, |
|
"rewards/real": -0.6008970141410828, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5252225519287835e-07, |
|
"logits/generated": -2.787135362625122, |
|
"logits/real": -2.7906911373138428, |
|
"logps/generated": -380.27276611328125, |
|
"logps/real": -390.9748840332031, |
|
"loss": 0.2682, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/generated": -1.3749873638153076, |
|
"rewards/margins": 0.8838955760002136, |
|
"rewards/real": -0.49109163880348206, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.376854599406528e-07, |
|
"logits/generated": -2.7812376022338867, |
|
"logits/real": -2.79952073097229, |
|
"logps/generated": -352.7367858886719, |
|
"logps/real": -343.9632873535156, |
|
"loss": 0.2784, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/generated": -1.779193639755249, |
|
"rewards/margins": 1.1407415866851807, |
|
"rewards/real": -0.6384519934654236, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.228486646884273e-07, |
|
"logits/generated": -2.80656099319458, |
|
"logits/real": -2.7876017093658447, |
|
"logps/generated": -369.83990478515625, |
|
"logps/real": -381.7880859375, |
|
"loss": 0.2742, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/generated": -1.4693442583084106, |
|
"rewards/margins": 0.8362933993339539, |
|
"rewards/real": -0.6330507397651672, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0801186943620176e-07, |
|
"logits/generated": -2.7452383041381836, |
|
"logits/real": -2.7657182216644287, |
|
"logps/generated": -354.4010314941406, |
|
"logps/real": -359.81219482421875, |
|
"loss": 0.2657, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/generated": -1.3197325468063354, |
|
"rewards/margins": 0.8461551666259766, |
|
"rewards/real": -0.4735774099826813, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.931750741839762e-07, |
|
"logits/generated": -2.8132920265197754, |
|
"logits/real": -2.8043882846832275, |
|
"logps/generated": -357.61383056640625, |
|
"logps/real": -354.3050537109375, |
|
"loss": 0.2716, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/generated": -1.6764816045761108, |
|
"rewards/margins": 0.9828389883041382, |
|
"rewards/real": -0.6936424374580383, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7833827893175073e-07, |
|
"logits/generated": -2.825605869293213, |
|
"logits/real": -2.8103888034820557, |
|
"logps/generated": -365.675537109375, |
|
"logps/real": -368.09197998046875, |
|
"loss": 0.2543, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/generated": -1.922782301902771, |
|
"rewards/margins": 1.292311191558838, |
|
"rewards/real": -0.6304711103439331, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.635014836795252e-07, |
|
"logits/generated": -2.7962846755981445, |
|
"logits/real": -2.795644521713257, |
|
"logps/generated": -340.1669006347656, |
|
"logps/real": -348.66583251953125, |
|
"loss": 0.2602, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/generated": -1.8341821432113647, |
|
"rewards/margins": 0.9289523959159851, |
|
"rewards/real": -0.9052297472953796, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.486646884272997e-07, |
|
"logits/generated": -2.7868337631225586, |
|
"logits/real": -2.7795639038085938, |
|
"logps/generated": -358.3647766113281, |
|
"logps/real": -362.6192321777344, |
|
"loss": 0.2584, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/generated": -1.9304630756378174, |
|
"rewards/margins": 1.0024542808532715, |
|
"rewards/real": -0.9280086755752563, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3382789317507414e-07, |
|
"logits/generated": -2.7806317806243896, |
|
"logits/real": -2.773284435272217, |
|
"logps/generated": -392.99273681640625, |
|
"logps/real": -388.6888732910156, |
|
"loss": 0.2429, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/generated": -2.132871389389038, |
|
"rewards/margins": 0.8601642847061157, |
|
"rewards/real": -1.272707223892212, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.189910979228487e-07, |
|
"logits/generated": -2.7479450702667236, |
|
"logits/real": -2.7415106296539307, |
|
"logps/generated": -384.2051086425781, |
|
"logps/real": -382.9107360839844, |
|
"loss": 0.2518, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/generated": -2.3066015243530273, |
|
"rewards/margins": 1.2394059896469116, |
|
"rewards/real": -1.0671956539154053, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0415430267062316e-07, |
|
"logits/generated": -2.7207372188568115, |
|
"logits/real": -2.6968023777008057, |
|
"logps/generated": -351.6153259277344, |
|
"logps/real": -358.0864562988281, |
|
"loss": 0.247, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/generated": -2.2227485179901123, |
|
"rewards/margins": 1.4021742343902588, |
|
"rewards/real": -0.820574164390564, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.893175074183976e-07, |
|
"logits/generated": -2.687243700027466, |
|
"logits/real": -2.6896092891693115, |
|
"logps/generated": -340.67498779296875, |
|
"logps/real": -325.22259521484375, |
|
"loss": 0.2683, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/generated": -2.342029094696045, |
|
"rewards/margins": 1.0233131647109985, |
|
"rewards/real": -1.3187161684036255, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.744807121661721e-07, |
|
"logits/generated": -2.709791421890259, |
|
"logits/real": -2.73317289352417, |
|
"logps/generated": -396.40606689453125, |
|
"logps/real": -388.1844482421875, |
|
"loss": 0.2442, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/generated": -2.465156078338623, |
|
"rewards/margins": 1.4016426801681519, |
|
"rewards/real": -1.063513159751892, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.596439169139466e-07, |
|
"logits/generated": -2.7428107261657715, |
|
"logits/real": -2.7355589866638184, |
|
"logps/generated": -368.4299011230469, |
|
"logps/real": -373.0939025878906, |
|
"loss": 0.2451, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -2.1397910118103027, |
|
"rewards/margins": 1.308272123336792, |
|
"rewards/real": -0.8315190076828003, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4480712166172106e-07, |
|
"logits/generated": -2.703258752822876, |
|
"logits/real": -2.693305015563965, |
|
"logps/generated": -339.4871826171875, |
|
"logps/real": -326.2037658691406, |
|
"loss": 0.2395, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/generated": -2.3122100830078125, |
|
"rewards/margins": 1.2954694032669067, |
|
"rewards/real": -1.0167406797409058, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2997032640949554e-07, |
|
"logits/generated": -2.7212119102478027, |
|
"logits/real": -2.716545581817627, |
|
"logps/generated": -339.74267578125, |
|
"logps/real": -346.297607421875, |
|
"loss": 0.2458, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/generated": -1.9660396575927734, |
|
"rewards/margins": 1.078840970993042, |
|
"rewards/real": -0.8871987462043762, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1513353115727e-07, |
|
"logits/generated": -2.77765154838562, |
|
"logits/real": -2.7591769695281982, |
|
"logps/generated": -386.1648864746094, |
|
"logps/real": -381.2674560546875, |
|
"loss": 0.2324, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/generated": -2.414008617401123, |
|
"rewards/margins": 1.5327675342559814, |
|
"rewards/real": -0.8812410235404968, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0029673590504451e-07, |
|
"logits/generated": -2.7021536827087402, |
|
"logits/real": -2.70768666267395, |
|
"logps/generated": -354.3561706542969, |
|
"logps/real": -353.68212890625, |
|
"loss": 0.2492, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/generated": -2.4238786697387695, |
|
"rewards/margins": 1.27177894115448, |
|
"rewards/real": -1.152099847793579, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8545994065281897e-07, |
|
"logits/generated": -2.7076125144958496, |
|
"logits/real": -2.7352890968322754, |
|
"logps/generated": -365.26214599609375, |
|
"logps/real": -355.78564453125, |
|
"loss": 0.2426, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/generated": -2.567624092102051, |
|
"rewards/margins": 1.3714964389801025, |
|
"rewards/real": -1.1961278915405273, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7062314540059346e-07, |
|
"logits/generated": -2.7474026679992676, |
|
"logits/real": -2.733513593673706, |
|
"logps/generated": -370.26568603515625, |
|
"logps/real": -366.9493713378906, |
|
"loss": 0.2496, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/generated": -2.5266714096069336, |
|
"rewards/margins": 1.390491247177124, |
|
"rewards/real": -1.1361799240112305, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5578635014836795e-07, |
|
"logits/generated": -2.7382729053497314, |
|
"logits/real": -2.7590155601501465, |
|
"logps/generated": -339.4982604980469, |
|
"logps/real": -354.5415954589844, |
|
"loss": 0.2407, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/generated": -2.284700870513916, |
|
"rewards/margins": 1.1465342044830322, |
|
"rewards/real": -1.1381666660308838, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4094955489614243e-07, |
|
"logits/generated": -2.6945815086364746, |
|
"logits/real": -2.695988416671753, |
|
"logps/generated": -373.51385498046875, |
|
"logps/real": -350.8352966308594, |
|
"loss": 0.2303, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/generated": -2.4922609329223633, |
|
"rewards/margins": 1.3119118213653564, |
|
"rewards/real": -1.1803491115570068, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.261127596439169e-07, |
|
"logits/generated": -2.7670834064483643, |
|
"logits/real": -2.7600436210632324, |
|
"logps/generated": -344.09136962890625, |
|
"logps/real": -337.3023376464844, |
|
"loss": 0.2435, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/generated": -2.637000799179077, |
|
"rewards/margins": 1.540818452835083, |
|
"rewards/real": -1.0961825847625732, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1127596439169139e-07, |
|
"logits/generated": -2.6610119342803955, |
|
"logits/real": -2.6668756008148193, |
|
"logps/generated": -342.7873229980469, |
|
"logps/real": -330.2555847167969, |
|
"loss": 0.2424, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/generated": -2.5096726417541504, |
|
"rewards/margins": 1.4919517040252686, |
|
"rewards/real": -1.0177206993103027, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.643916913946587e-08, |
|
"logits/generated": -2.7119061946868896, |
|
"logits/real": -2.736443519592285, |
|
"logps/generated": -364.1079406738281, |
|
"logps/real": -365.68963623046875, |
|
"loss": 0.2316, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/generated": -3.0718140602111816, |
|
"rewards/margins": 1.5135959386825562, |
|
"rewards/real": -1.5582183599472046, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.160237388724035e-08, |
|
"logits/generated": -2.7183382511138916, |
|
"logits/real": -2.735018253326416, |
|
"logps/generated": -394.9755859375, |
|
"logps/real": -377.31427001953125, |
|
"loss": 0.2359, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/generated": -2.73679256439209, |
|
"rewards/margins": 1.1780710220336914, |
|
"rewards/real": -1.558721661567688, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.676557863501484e-08, |
|
"logits/generated": -2.7515358924865723, |
|
"logits/real": -2.742940664291382, |
|
"logps/generated": -388.3130187988281, |
|
"logps/real": -372.29437255859375, |
|
"loss": 0.234, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/generated": -2.38558030128479, |
|
"rewards/margins": 1.1235764026641846, |
|
"rewards/real": -1.2620038986206055, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.192878338278932e-08, |
|
"logits/generated": -2.695279121398926, |
|
"logits/real": -2.6978631019592285, |
|
"logps/generated": -365.2856140136719, |
|
"logps/real": -363.0904235839844, |
|
"loss": 0.2303, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/generated": -2.723789930343628, |
|
"rewards/margins": 1.42342209815979, |
|
"rewards/real": -1.300368070602417, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.709198813056379e-08, |
|
"logits/generated": -2.662724018096924, |
|
"logits/real": -2.675875186920166, |
|
"logps/generated": -338.28704833984375, |
|
"logps/real": -342.17462158203125, |
|
"loss": 0.2333, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/generated": -2.581637144088745, |
|
"rewards/margins": 1.3430696725845337, |
|
"rewards/real": -1.2385674715042114, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.225519287833828e-08, |
|
"logits/generated": -2.6803088188171387, |
|
"logits/real": -2.704144239425659, |
|
"logps/generated": -356.77703857421875, |
|
"logps/real": -359.313720703125, |
|
"loss": 0.2368, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -2.8369853496551514, |
|
"rewards/margins": 1.2829147577285767, |
|
"rewards/real": -1.554070234298706, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.418397626112759e-09, |
|
"logits/generated": -2.7113311290740967, |
|
"logits/real": -2.7457308769226074, |
|
"logps/generated": -404.06756591796875, |
|
"logps/real": -393.70843505859375, |
|
"loss": 0.2369, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/generated": -2.8109331130981445, |
|
"rewards/margins": 1.4008702039718628, |
|
"rewards/real": -1.4100630283355713, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 375, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2572693068186442, |
|
"train_runtime": 6192.5005, |
|
"train_samples_per_second": 7.751, |
|
"train_steps_per_second": 0.061 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 375, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|