{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 375, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.3157894736842104e-08, "logits/generated": -3.0232396125793457, "logits/real": -2.996844530105591, "logps/generated": -291.56793212890625, "logps/real": -340.7873840332031, "loss": 0.3645, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.03, "learning_rate": 1.3157894736842104e-07, "logits/generated": -2.977639675140381, "logits/real": -2.9781062602996826, "logps/generated": -338.7113037109375, "logps/real": -360.56146240234375, "loss": 0.3584, "rewards/accuracies": 0.5555555820465088, "rewards/generated": 0.10237760096788406, "rewards/margins": 0.04294492304325104, "rewards/real": 0.1453225314617157, "step": 10 }, { "epoch": 0.05, "learning_rate": 2.631578947368421e-07, "logits/generated": -2.9867026805877686, "logits/real": -2.990659236907959, "logps/generated": -371.62164306640625, "logps/real": -372.09954833984375, "loss": 0.3391, "rewards/accuracies": 0.59375, "rewards/generated": 0.5660532712936401, "rewards/margins": 0.15894225239753723, "rewards/real": 0.7249955534934998, "step": 20 }, { "epoch": 0.08, "learning_rate": 3.9473684210526315e-07, "logits/generated": -2.940207004547119, "logits/real": -2.945539951324463, "logps/generated": -323.21282958984375, "logps/real": -323.20733642578125, "loss": 0.3061, "rewards/accuracies": 0.668749988079071, "rewards/generated": 0.2973577380180359, "rewards/margins": 0.29796674847602844, "rewards/real": 0.5953244566917419, "step": 30 }, { "epoch": 0.11, "learning_rate": 4.970326409495548e-07, "logits/generated": -2.849879026412964, "logits/real": -2.868879556655884, "logps/generated": -339.9267578125, "logps/real": -348.660400390625, "loss": 0.3043, "rewards/accuracies": 0.668749988079071, "rewards/generated": -0.18336713314056396, "rewards/margins": 0.4493914246559143, "rewards/real": 0.26602429151535034, "step": 40 }, { "epoch": 0.13, "learning_rate": 4.821958456973294e-07, "logits/generated": -2.8244385719299316, "logits/real": -2.819532871246338, "logps/generated": -345.12353515625, "logps/real": -345.24334716796875, "loss": 0.2707, "rewards/accuracies": 0.762499988079071, "rewards/generated": -1.0958898067474365, "rewards/margins": 0.7357537150382996, "rewards/real": -0.36013612151145935, "step": 50 }, { "epoch": 0.16, "learning_rate": 4.673590504451038e-07, "logits/generated": -2.7510242462158203, "logits/real": -2.744049549102783, "logps/generated": -343.3367614746094, "logps/real": -353.568115234375, "loss": 0.2658, "rewards/accuracies": 0.706250011920929, "rewards/generated": -1.349844217300415, "rewards/margins": 0.7489473819732666, "rewards/real": -0.6008970141410828, "step": 60 }, { "epoch": 0.19, "learning_rate": 4.5252225519287835e-07, "logits/generated": -2.787135362625122, "logits/real": -2.7906911373138428, "logps/generated": -380.27276611328125, "logps/real": -390.9748840332031, "loss": 0.2682, "rewards/accuracies": 0.731249988079071, "rewards/generated": -1.3749873638153076, "rewards/margins": 0.8838955760002136, "rewards/real": -0.49109163880348206, "step": 70 }, { "epoch": 0.21, "learning_rate": 4.376854599406528e-07, "logits/generated": -2.7812376022338867, "logits/real": -2.79952073097229, "logps/generated": -352.7367858886719, "logps/real": -343.9632873535156, "loss": 0.2784, "rewards/accuracies": 0.78125, "rewards/generated": -1.779193639755249, "rewards/margins": 1.1407415866851807, "rewards/real": -0.6384519934654236, "step": 80 }, { "epoch": 0.24, "learning_rate": 4.228486646884273e-07, "logits/generated": -2.80656099319458, "logits/real": -2.7876017093658447, "logps/generated": -369.83990478515625, "logps/real": -381.7880859375, "loss": 0.2742, "rewards/accuracies": 0.7250000238418579, "rewards/generated": -1.4693442583084106, "rewards/margins": 0.8362933993339539, "rewards/real": -0.6330507397651672, "step": 90 }, { "epoch": 0.27, "learning_rate": 4.0801186943620176e-07, "logits/generated": -2.7452383041381836, "logits/real": -2.7657182216644287, "logps/generated": -354.4010314941406, "logps/real": -359.81219482421875, "loss": 0.2657, "rewards/accuracies": 0.6937500238418579, "rewards/generated": -1.3197325468063354, "rewards/margins": 0.8461551666259766, "rewards/real": -0.4735774099826813, "step": 100 }, { "epoch": 0.29, "learning_rate": 3.931750741839762e-07, "logits/generated": -2.8132920265197754, "logits/real": -2.8043882846832275, "logps/generated": -357.61383056640625, "logps/real": -354.3050537109375, "loss": 0.2716, "rewards/accuracies": 0.7250000238418579, "rewards/generated": -1.6764816045761108, "rewards/margins": 0.9828389883041382, "rewards/real": -0.6936424374580383, "step": 110 }, { "epoch": 0.32, "learning_rate": 3.7833827893175073e-07, "logits/generated": -2.825605869293213, "logits/real": -2.8103888034820557, "logps/generated": -365.675537109375, "logps/real": -368.09197998046875, "loss": 0.2543, "rewards/accuracies": 0.762499988079071, "rewards/generated": -1.922782301902771, "rewards/margins": 1.292311191558838, "rewards/real": -0.6304711103439331, "step": 120 }, { "epoch": 0.35, "learning_rate": 3.635014836795252e-07, "logits/generated": -2.7962846755981445, "logits/real": -2.795644521713257, "logps/generated": -340.1669006347656, "logps/real": -348.66583251953125, "loss": 0.2602, "rewards/accuracies": 0.6312500238418579, "rewards/generated": -1.8341821432113647, "rewards/margins": 0.9289523959159851, "rewards/real": -0.9052297472953796, "step": 130 }, { "epoch": 0.37, "learning_rate": 3.486646884272997e-07, "logits/generated": -2.7868337631225586, "logits/real": -2.7795639038085938, "logps/generated": -358.3647766113281, "logps/real": -362.6192321777344, "loss": 0.2584, "rewards/accuracies": 0.7437499761581421, "rewards/generated": -1.9304630756378174, "rewards/margins": 1.0024542808532715, "rewards/real": -0.9280086755752563, "step": 140 }, { "epoch": 0.4, "learning_rate": 3.3382789317507414e-07, "logits/generated": -2.7806317806243896, "logits/real": -2.773284435272217, "logps/generated": -392.99273681640625, "logps/real": -388.6888732910156, "loss": 0.2429, "rewards/accuracies": 0.668749988079071, "rewards/generated": -2.132871389389038, "rewards/margins": 0.8601642847061157, "rewards/real": -1.272707223892212, "step": 150 }, { "epoch": 0.43, "learning_rate": 3.189910979228487e-07, "logits/generated": -2.7479450702667236, "logits/real": -2.7415106296539307, "logps/generated": -384.2051086425781, "logps/real": -382.9107360839844, "loss": 0.2518, "rewards/accuracies": 0.7562500238418579, "rewards/generated": -2.3066015243530273, "rewards/margins": 1.2394059896469116, "rewards/real": -1.0671956539154053, "step": 160 }, { "epoch": 0.45, "learning_rate": 3.0415430267062316e-07, "logits/generated": -2.7207372188568115, "logits/real": -2.6968023777008057, "logps/generated": -351.6153259277344, "logps/real": -358.0864562988281, "loss": 0.247, "rewards/accuracies": 0.8062499761581421, "rewards/generated": -2.2227485179901123, "rewards/margins": 1.4021742343902588, "rewards/real": -0.820574164390564, "step": 170 }, { "epoch": 0.48, "learning_rate": 2.893175074183976e-07, "logits/generated": -2.687243700027466, "logits/real": -2.6896092891693115, "logps/generated": -340.67498779296875, "logps/real": -325.22259521484375, "loss": 0.2683, "rewards/accuracies": 0.762499988079071, "rewards/generated": -2.342029094696045, "rewards/margins": 1.0233131647109985, "rewards/real": -1.3187161684036255, "step": 180 }, { "epoch": 0.51, "learning_rate": 2.744807121661721e-07, "logits/generated": -2.709791421890259, "logits/real": -2.73317289352417, "logps/generated": -396.40606689453125, "logps/real": -388.1844482421875, "loss": 0.2442, "rewards/accuracies": 0.78125, "rewards/generated": -2.465156078338623, "rewards/margins": 1.4016426801681519, "rewards/real": -1.063513159751892, "step": 190 }, { "epoch": 0.53, "learning_rate": 2.596439169139466e-07, "logits/generated": -2.7428107261657715, "logits/real": -2.7355589866638184, "logps/generated": -368.4299011230469, "logps/real": -373.0939025878906, "loss": 0.2451, "rewards/accuracies": 0.75, "rewards/generated": -2.1397910118103027, "rewards/margins": 1.308272123336792, "rewards/real": -0.8315190076828003, "step": 200 }, { "epoch": 0.56, "learning_rate": 2.4480712166172106e-07, "logits/generated": -2.703258752822876, "logits/real": -2.693305015563965, "logps/generated": -339.4871826171875, "logps/real": -326.2037658691406, "loss": 0.2395, "rewards/accuracies": 0.78125, "rewards/generated": -2.3122100830078125, "rewards/margins": 1.2954694032669067, "rewards/real": -1.0167406797409058, "step": 210 }, { "epoch": 0.59, "learning_rate": 2.2997032640949554e-07, "logits/generated": -2.7212119102478027, "logits/real": -2.716545581817627, "logps/generated": -339.74267578125, "logps/real": -346.297607421875, "loss": 0.2458, "rewards/accuracies": 0.7437499761581421, "rewards/generated": -1.9660396575927734, "rewards/margins": 1.078840970993042, "rewards/real": -0.8871987462043762, "step": 220 }, { "epoch": 0.61, "learning_rate": 2.1513353115727e-07, "logits/generated": -2.77765154838562, "logits/real": -2.7591769695281982, "logps/generated": -386.1648864746094, "logps/real": -381.2674560546875, "loss": 0.2324, "rewards/accuracies": 0.831250011920929, "rewards/generated": -2.414008617401123, "rewards/margins": 1.5327675342559814, "rewards/real": -0.8812410235404968, "step": 230 }, { "epoch": 0.64, "learning_rate": 2.0029673590504451e-07, "logits/generated": -2.7021536827087402, "logits/real": -2.70768666267395, "logps/generated": -354.3561706542969, "logps/real": -353.68212890625, "loss": 0.2492, "rewards/accuracies": 0.706250011920929, "rewards/generated": -2.4238786697387695, "rewards/margins": 1.27177894115448, "rewards/real": -1.152099847793579, "step": 240 }, { "epoch": 0.67, "learning_rate": 1.8545994065281897e-07, "logits/generated": -2.7076125144958496, "logits/real": -2.7352890968322754, "logps/generated": -365.26214599609375, "logps/real": -355.78564453125, "loss": 0.2426, "rewards/accuracies": 0.762499988079071, "rewards/generated": -2.567624092102051, "rewards/margins": 1.3714964389801025, "rewards/real": -1.1961278915405273, "step": 250 }, { "epoch": 0.69, "learning_rate": 1.7062314540059346e-07, "logits/generated": -2.7474026679992676, "logits/real": -2.733513593673706, "logps/generated": -370.26568603515625, "logps/real": -366.9493713378906, "loss": 0.2496, "rewards/accuracies": 0.793749988079071, "rewards/generated": -2.5266714096069336, "rewards/margins": 1.390491247177124, "rewards/real": -1.1361799240112305, "step": 260 }, { "epoch": 0.72, "learning_rate": 1.5578635014836795e-07, "logits/generated": -2.7382729053497314, "logits/real": -2.7590155601501465, "logps/generated": -339.4982604980469, "logps/real": -354.5415954589844, "loss": 0.2407, "rewards/accuracies": 0.731249988079071, "rewards/generated": -2.284700870513916, "rewards/margins": 1.1465342044830322, "rewards/real": -1.1381666660308838, "step": 270 }, { "epoch": 0.75, "learning_rate": 1.4094955489614243e-07, "logits/generated": -2.6945815086364746, "logits/real": -2.695988416671753, "logps/generated": -373.51385498046875, "logps/real": -350.8352966308594, "loss": 0.2303, "rewards/accuracies": 0.762499988079071, "rewards/generated": -2.4922609329223633, "rewards/margins": 1.3119118213653564, "rewards/real": -1.1803491115570068, "step": 280 }, { "epoch": 0.77, "learning_rate": 1.261127596439169e-07, "logits/generated": -2.7670834064483643, "logits/real": -2.7600436210632324, "logps/generated": -344.09136962890625, "logps/real": -337.3023376464844, "loss": 0.2435, "rewards/accuracies": 0.8062499761581421, "rewards/generated": -2.637000799179077, "rewards/margins": 1.540818452835083, "rewards/real": -1.0961825847625732, "step": 290 }, { "epoch": 0.8, "learning_rate": 1.1127596439169139e-07, "logits/generated": -2.6610119342803955, "logits/real": -2.6668756008148193, "logps/generated": -342.7873229980469, "logps/real": -330.2555847167969, "loss": 0.2424, "rewards/accuracies": 0.800000011920929, "rewards/generated": -2.5096726417541504, "rewards/margins": 1.4919517040252686, "rewards/real": -1.0177206993103027, "step": 300 }, { "epoch": 0.83, "learning_rate": 9.643916913946587e-08, "logits/generated": -2.7119061946868896, "logits/real": -2.736443519592285, "logps/generated": -364.1079406738281, "logps/real": -365.68963623046875, "loss": 0.2316, "rewards/accuracies": 0.793749988079071, "rewards/generated": -3.0718140602111816, "rewards/margins": 1.5135959386825562, "rewards/real": -1.5582183599472046, "step": 310 }, { "epoch": 0.85, "learning_rate": 8.160237388724035e-08, "logits/generated": -2.7183382511138916, "logits/real": -2.735018253326416, "logps/generated": -394.9755859375, "logps/real": -377.31427001953125, "loss": 0.2359, "rewards/accuracies": 0.768750011920929, "rewards/generated": -2.73679256439209, "rewards/margins": 1.1780710220336914, "rewards/real": -1.558721661567688, "step": 320 }, { "epoch": 0.88, "learning_rate": 6.676557863501484e-08, "logits/generated": -2.7515358924865723, "logits/real": -2.742940664291382, "logps/generated": -388.3130187988281, "logps/real": -372.29437255859375, "loss": 0.234, "rewards/accuracies": 0.706250011920929, "rewards/generated": -2.38558030128479, "rewards/margins": 1.1235764026641846, "rewards/real": -1.2620038986206055, "step": 330 }, { "epoch": 0.91, "learning_rate": 5.192878338278932e-08, "logits/generated": -2.695279121398926, "logits/real": -2.6978631019592285, "logps/generated": -365.2856140136719, "logps/real": -363.0904235839844, "loss": 0.2303, "rewards/accuracies": 0.762499988079071, "rewards/generated": -2.723789930343628, "rewards/margins": 1.42342209815979, "rewards/real": -1.300368070602417, "step": 340 }, { "epoch": 0.93, "learning_rate": 3.709198813056379e-08, "logits/generated": -2.662724018096924, "logits/real": -2.675875186920166, "logps/generated": -338.28704833984375, "logps/real": -342.17462158203125, "loss": 0.2333, "rewards/accuracies": 0.768750011920929, "rewards/generated": -2.581637144088745, "rewards/margins": 1.3430696725845337, "rewards/real": -1.2385674715042114, "step": 350 }, { "epoch": 0.96, "learning_rate": 2.225519287833828e-08, "logits/generated": -2.6803088188171387, "logits/real": -2.704144239425659, "logps/generated": -356.77703857421875, "logps/real": -359.313720703125, "loss": 0.2368, "rewards/accuracies": 0.75, "rewards/generated": -2.8369853496551514, "rewards/margins": 1.2829147577285767, "rewards/real": -1.554070234298706, "step": 360 }, { "epoch": 0.99, "learning_rate": 7.418397626112759e-09, "logits/generated": -2.7113311290740967, "logits/real": -2.7457308769226074, "logps/generated": -404.06756591796875, "logps/real": -393.70843505859375, "loss": 0.2369, "rewards/accuracies": 0.78125, "rewards/generated": -2.8109331130981445, "rewards/margins": 1.4008702039718628, "rewards/real": -1.4100630283355713, "step": 370 }, { "epoch": 1.0, "step": 375, "total_flos": 0.0, "train_loss": 0.2572693068186442, "train_runtime": 6192.5005, "train_samples_per_second": 7.751, "train_steps_per_second": 0.061 } ], "logging_steps": 10, "max_steps": 375, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }