{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 4728, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0570824524312896e-09, "logits/generated": -2.9680545330047607, "logits/real": -3.0339415073394775, "logps/generated": -179.1082763671875, "logps/real": -255.83349609375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.0570824524312896e-08, "logits/generated": -2.9810054302215576, "logits/real": -3.052731990814209, "logps/generated": -105.46451568603516, "logps/real": -221.724853515625, "loss": 0.6924, "rewards/accuracies": 0.4305555522441864, "rewards/generated": -0.0014893743209540844, "rewards/margins": 0.0027981153689324856, "rewards/real": 0.0013087405823171139, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.114164904862579e-08, "logits/generated": -2.968405246734619, "logits/real": -3.0509631633758545, "logps/generated": -104.47855377197266, "logps/real": -277.7776184082031, "loss": 0.6679, "rewards/accuracies": 0.7749999761581421, "rewards/generated": -0.029584383592009544, "rewards/margins": 0.053206123411655426, "rewards/real": 0.02362174168229103, "step": 20 }, { "epoch": 0.02, "learning_rate": 3.1712473572938685e-08, "logits/generated": -2.994403600692749, "logits/real": -3.063781499862671, "logps/generated": -98.96472930908203, "logps/real": -216.9051513671875, "loss": 0.5905, "rewards/accuracies": 0.9375, "rewards/generated": -0.14395593106746674, "rewards/margins": 0.2154092788696289, "rewards/real": 0.07145334035158157, "step": 30 }, { "epoch": 0.03, "learning_rate": 4.228329809725158e-08, "logits/generated": -2.9682469367980957, "logits/real": -3.0599234104156494, "logps/generated": -120.07786560058594, "logps/real": -250.1599578857422, "loss": 0.467, "rewards/accuracies": 0.987500011920929, "rewards/generated": -0.3916197419166565, "rewards/margins": 0.5931448936462402, "rewards/real": 0.2015252411365509, "step": 40 }, { "epoch": 0.03, "learning_rate": 5.285412262156448e-08, "logits/generated": -2.9771907329559326, "logits/real": -3.0221831798553467, "logps/generated": -100.34281158447266, "logps/real": -196.468505859375, "loss": 0.3908, "rewards/accuracies": 0.949999988079071, "rewards/generated": -0.585746169090271, "rewards/margins": 0.8785923719406128, "rewards/real": 0.2928462624549866, "step": 50 }, { "epoch": 0.04, "learning_rate": 6.342494714587737e-08, "logits/generated": -2.9906773567199707, "logits/real": -3.0186338424682617, "logps/generated": -121.91976165771484, "logps/real": -218.0440673828125, "loss": 0.2915, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -0.8651537895202637, "rewards/margins": 1.1812090873718262, "rewards/real": 0.3160552680492401, "step": 60 }, { "epoch": 0.04, "learning_rate": 7.399577167019028e-08, "logits/generated": -2.928558826446533, "logits/real": -3.018244743347168, "logps/generated": -128.43055725097656, "logps/real": -290.92919921875, "loss": 0.231, "rewards/accuracies": 0.949999988079071, "rewards/generated": -1.2124083042144775, "rewards/margins": 1.8828494548797607, "rewards/real": 0.6704407930374146, "step": 70 }, { "epoch": 0.05, "learning_rate": 8.456659619450317e-08, "logits/generated": -2.8757901191711426, "logits/real": -2.999915599822998, "logps/generated": -117.48758697509766, "logps/real": -263.73193359375, "loss": 0.2078, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.505746841430664, "rewards/margins": 2.246025800704956, "rewards/real": 0.7402790188789368, "step": 80 }, { "epoch": 0.06, "learning_rate": 9.513742071881606e-08, "logits/generated": -2.876457691192627, "logits/real": -2.995556116104126, "logps/generated": -131.91555786132812, "logps/real": -248.60205078125, "loss": 0.1751, "rewards/accuracies": 0.949999988079071, "rewards/generated": -1.616385817527771, "rewards/margins": 2.4295945167541504, "rewards/real": 0.8132089376449585, "step": 90 }, { "epoch": 0.06, "learning_rate": 1.0570824524312896e-07, "logits/generated": -2.8680336475372314, "logits/real": -2.983652114868164, "logps/generated": -135.1668701171875, "logps/real": -219.4280548095703, "loss": 0.1643, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.855940580368042, "rewards/margins": 2.530752420425415, "rewards/real": 0.6748121976852417, "step": 100 }, { "epoch": 0.07, "learning_rate": 1.1627906976744186e-07, "logits/generated": -2.908716917037964, "logits/real": -2.9902663230895996, "logps/generated": -116.56935119628906, "logps/real": -190.9403076171875, "loss": 0.1688, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.6852614879608154, "rewards/margins": 2.455672264099121, "rewards/real": 0.7704105973243713, "step": 110 }, { "epoch": 0.08, "learning_rate": 1.2684989429175474e-07, "logits/generated": -2.867933750152588, "logits/real": -2.9436843395233154, "logps/generated": -122.19034576416016, "logps/real": -200.4087371826172, "loss": 0.1407, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -2.132105827331543, "rewards/margins": 2.99078106880188, "rewards/real": 0.8586748838424683, "step": 120 }, { "epoch": 0.08, "learning_rate": 1.3742071881606765e-07, "logits/generated": -2.8560738563537598, "logits/real": -2.948706865310669, "logps/generated": -140.22650146484375, "logps/real": -238.3866729736328, "loss": 0.11, "rewards/accuracies": 0.949999988079071, "rewards/generated": -2.533663272857666, "rewards/margins": 3.416466474533081, "rewards/real": 0.8828039169311523, "step": 130 }, { "epoch": 0.09, "learning_rate": 1.4799154334038056e-07, "logits/generated": -2.7741053104400635, "logits/real": -2.9004416465759277, "logps/generated": -137.82418823242188, "logps/real": -271.0147705078125, "loss": 0.1159, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -2.4761903285980225, "rewards/margins": 3.3843002319335938, "rewards/real": 0.9081098437309265, "step": 140 }, { "epoch": 0.1, "learning_rate": 1.5856236786469342e-07, "logits/generated": -2.813859701156616, "logits/real": -2.9562556743621826, "logps/generated": -141.76651000976562, "logps/real": -253.9093017578125, "loss": 0.1004, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.886215925216675, "rewards/margins": 3.9585909843444824, "rewards/real": 1.0723752975463867, "step": 150 }, { "epoch": 0.1, "learning_rate": 1.6913319238900633e-07, "logits/generated": -2.755221366882324, "logits/real": -2.9063618183135986, "logps/generated": -142.30709838867188, "logps/real": -174.6675567626953, "loss": 0.0916, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.7275359630584717, "rewards/margins": 4.270571231842041, "rewards/real": 0.5430347323417664, "step": 160 }, { "epoch": 0.11, "learning_rate": 1.7970401691331924e-07, "logits/generated": -2.762349843978882, "logits/real": -2.9174437522888184, "logps/generated": -134.84609985351562, "logps/real": -218.52487182617188, "loss": 0.0826, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -3.727612018585205, "rewards/margins": 4.559648513793945, "rewards/real": 0.8320371508598328, "step": 170 }, { "epoch": 0.11, "learning_rate": 1.9027484143763213e-07, "logits/generated": -2.7523839473724365, "logits/real": -2.8980984687805176, "logps/generated": -142.16477966308594, "logps/real": -214.64688110351562, "loss": 0.1008, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.7176735401153564, "rewards/margins": 4.531271457672119, "rewards/real": 0.8135976791381836, "step": 180 }, { "epoch": 0.12, "learning_rate": 2.00845665961945e-07, "logits/generated": -2.7264347076416016, "logits/real": -2.9047653675079346, "logps/generated": -144.19976806640625, "logps/real": -226.42312622070312, "loss": 0.0598, "rewards/accuracies": 1.0, "rewards/generated": -4.002590179443359, "rewards/margins": 4.741377830505371, "rewards/real": 0.7387879490852356, "step": 190 }, { "epoch": 0.13, "learning_rate": 2.1141649048625792e-07, "logits/generated": -2.7650415897369385, "logits/real": -2.9020564556121826, "logps/generated": -150.42185974121094, "logps/real": -206.5177001953125, "loss": 0.0838, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -4.431978702545166, "rewards/margins": 5.01816463470459, "rewards/real": 0.5861854553222656, "step": 200 }, { "epoch": 0.13, "learning_rate": 2.219873150105708e-07, "logits/generated": -2.7214510440826416, "logits/real": -2.90799617767334, "logps/generated": -151.5577850341797, "logps/real": -270.7734680175781, "loss": 0.0901, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -4.565618515014648, "rewards/margins": 5.258214473724365, "rewards/real": 0.6925961375236511, "step": 210 }, { "epoch": 0.14, "learning_rate": 2.3255813953488372e-07, "logits/generated": -2.695213556289673, "logits/real": -2.896963596343994, "logps/generated": -154.06976318359375, "logps/real": -225.1235809326172, "loss": 0.0563, "rewards/accuracies": 0.987500011920929, "rewards/generated": -4.846016883850098, "rewards/margins": 5.5229692459106445, "rewards/real": 0.6769517660140991, "step": 220 }, { "epoch": 0.15, "learning_rate": 2.431289640591966e-07, "logits/generated": -2.684670925140381, "logits/real": -2.8483946323394775, "logps/generated": -158.3540496826172, "logps/real": -233.24765014648438, "loss": 0.0603, "rewards/accuracies": 1.0, "rewards/generated": -5.12518310546875, "rewards/margins": 5.8765363693237305, "rewards/real": 0.7513536810874939, "step": 230 }, { "epoch": 0.15, "learning_rate": 2.536997885835095e-07, "logits/generated": -2.7210946083068848, "logits/real": -2.8621644973754883, "logps/generated": -155.71336364746094, "logps/real": -272.6509094238281, "loss": 0.0587, "rewards/accuracies": 1.0, "rewards/generated": -5.054671287536621, "rewards/margins": 5.770359039306641, "rewards/real": 0.7156881093978882, "step": 240 }, { "epoch": 0.16, "learning_rate": 2.642706131078224e-07, "logits/generated": -2.684602737426758, "logits/real": -2.858766794204712, "logps/generated": -165.3872833251953, "logps/real": -220.634765625, "loss": 0.0519, "rewards/accuracies": 0.987500011920929, "rewards/generated": -6.2169084548950195, "rewards/margins": 6.802570343017578, "rewards/real": 0.585662305355072, "step": 250 }, { "epoch": 0.16, "learning_rate": 2.748414376321353e-07, "logits/generated": -2.650390148162842, "logits/real": -2.83626389503479, "logps/generated": -169.95376586914062, "logps/real": -221.45565795898438, "loss": 0.046, "rewards/accuracies": 0.987500011920929, "rewards/generated": -6.422726631164551, "rewards/margins": 6.729206085205078, "rewards/real": 0.3064800202846527, "step": 260 }, { "epoch": 0.17, "learning_rate": 2.854122621564482e-07, "logits/generated": -2.6237170696258545, "logits/real": -2.831840753555298, "logps/generated": -179.83026123046875, "logps/real": -269.57489013671875, "loss": 0.0466, "rewards/accuracies": 0.987500011920929, "rewards/generated": -6.582816123962402, "rewards/margins": 6.789361476898193, "rewards/real": 0.20654550194740295, "step": 270 }, { "epoch": 0.18, "learning_rate": 2.959830866807611e-07, "logits/generated": -2.6729588508605957, "logits/real": -2.7799630165100098, "logps/generated": -166.50579833984375, "logps/real": -209.8294219970703, "loss": 0.0534, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -6.923952579498291, "rewards/margins": 6.987476348876953, "rewards/real": 0.06352332979440689, "step": 280 }, { "epoch": 0.18, "learning_rate": 3.0655391120507393e-07, "logits/generated": -2.7076289653778076, "logits/real": -2.836456537246704, "logps/generated": -180.61297607421875, "logps/real": -224.1366424560547, "loss": 0.0573, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -7.590481758117676, "rewards/margins": 7.375975131988525, "rewards/real": -0.21450698375701904, "step": 290 }, { "epoch": 0.19, "learning_rate": 3.1712473572938684e-07, "logits/generated": -2.563586473464966, "logits/real": -2.80302357673645, "logps/generated": -202.00930786132812, "logps/real": -261.15875244140625, "loss": 0.044, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -8.740743637084961, "rewards/margins": 8.463761329650879, "rewards/real": -0.27698156237602234, "step": 300 }, { "epoch": 0.2, "learning_rate": 3.2769556025369975e-07, "logits/generated": -2.6333580017089844, "logits/real": -2.798062562942505, "logps/generated": -204.22036743164062, "logps/real": -244.3586883544922, "loss": 0.0585, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -8.76515007019043, "rewards/margins": 7.9285407066345215, "rewards/real": -0.8366090059280396, "step": 310 }, { "epoch": 0.2, "learning_rate": 3.3826638477801266e-07, "logits/generated": -2.666229248046875, "logits/real": -2.777111291885376, "logps/generated": -206.2598876953125, "logps/real": -258.5753173828125, "loss": 0.0494, "rewards/accuracies": 0.987500011920929, "rewards/generated": -9.166308403015137, "rewards/margins": 8.505932807922363, "rewards/real": -0.6603760719299316, "step": 320 }, { "epoch": 0.21, "learning_rate": 3.4883720930232557e-07, "logits/generated": -2.61083984375, "logits/real": -2.7282021045684814, "logps/generated": -204.8091583251953, "logps/real": -260.3985595703125, "loss": 0.0304, "rewards/accuracies": 1.0, "rewards/generated": -9.159486770629883, "rewards/margins": 8.657907485961914, "rewards/real": -0.5015801191329956, "step": 330 }, { "epoch": 0.22, "learning_rate": 3.594080338266385e-07, "logits/generated": -2.6197121143341064, "logits/real": -2.7505860328674316, "logps/generated": -198.1029815673828, "logps/real": -274.4460144042969, "loss": 0.0397, "rewards/accuracies": 1.0, "rewards/generated": -8.936513900756836, "rewards/margins": 8.366409301757812, "rewards/real": -0.570103108882904, "step": 340 }, { "epoch": 0.22, "learning_rate": 3.699788583509514e-07, "logits/generated": -2.5747499465942383, "logits/real": -2.700770854949951, "logps/generated": -195.65858459472656, "logps/real": -264.3505859375, "loss": 0.0555, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -9.730626106262207, "rewards/margins": 7.978546142578125, "rewards/real": -1.7520811557769775, "step": 350 }, { "epoch": 0.23, "learning_rate": 3.8054968287526425e-07, "logits/generated": -2.644111156463623, "logits/real": -2.7844269275665283, "logps/generated": -187.1933135986328, "logps/real": -274.1761779785156, "loss": 0.0478, "rewards/accuracies": 0.949999988079071, "rewards/generated": -8.151183128356934, "rewards/margins": 7.859720706939697, "rewards/real": -0.29146260023117065, "step": 360 }, { "epoch": 0.23, "learning_rate": 3.9112050739957716e-07, "logits/generated": -2.5913429260253906, "logits/real": -2.7488276958465576, "logps/generated": -188.7386016845703, "logps/real": -247.2688446044922, "loss": 0.0408, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -8.474884033203125, "rewards/margins": 8.537331581115723, "rewards/real": 0.062447331845760345, "step": 370 }, { "epoch": 0.24, "learning_rate": 4.0169133192389e-07, "logits/generated": -2.5858843326568604, "logits/real": -2.7015249729156494, "logps/generated": -195.01470947265625, "logps/real": -203.84634399414062, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/generated": -9.704763412475586, "rewards/margins": 8.604458808898926, "rewards/real": -1.1003044843673706, "step": 380 }, { "epoch": 0.25, "learning_rate": 4.1226215644820293e-07, "logits/generated": -2.5373871326446533, "logits/real": -2.662891387939453, "logps/generated": -196.2036590576172, "logps/real": -228.1417236328125, "loss": 0.0345, "rewards/accuracies": 0.949999988079071, "rewards/generated": -10.00887680053711, "rewards/margins": 8.654512405395508, "rewards/real": -1.3543639183044434, "step": 390 }, { "epoch": 0.25, "learning_rate": 4.2283298097251584e-07, "logits/generated": -2.5022478103637695, "logits/real": -2.657803773880005, "logps/generated": -204.67507934570312, "logps/real": -220.3046875, "loss": 0.0373, "rewards/accuracies": 0.987500011920929, "rewards/generated": -10.569047927856445, "rewards/margins": 8.992898941040039, "rewards/real": -1.5761499404907227, "step": 400 }, { "epoch": 0.26, "learning_rate": 4.3340380549682875e-07, "logits/generated": -2.590941905975342, "logits/real": -2.6890556812286377, "logps/generated": -225.4229736328125, "logps/real": -251.73074340820312, "loss": 0.0488, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -11.226091384887695, "rewards/margins": 9.168951034545898, "rewards/real": -2.0571413040161133, "step": 410 }, { "epoch": 0.27, "learning_rate": 4.439746300211416e-07, "logits/generated": -2.5578768253326416, "logits/real": -2.7013614177703857, "logps/generated": -228.48886108398438, "logps/real": -288.42193603515625, "loss": 0.0305, "rewards/accuracies": 0.987500011920929, "rewards/generated": -12.415830612182617, "rewards/margins": 10.27761459350586, "rewards/real": -2.1382155418395996, "step": 420 }, { "epoch": 0.27, "learning_rate": 4.545454545454545e-07, "logits/generated": -2.5126521587371826, "logits/real": -2.6469998359680176, "logps/generated": -242.6312255859375, "logps/real": -259.1160888671875, "loss": 0.0425, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -13.763842582702637, "rewards/margins": 11.178007125854492, "rewards/real": -2.5858359336853027, "step": 430 }, { "epoch": 0.28, "learning_rate": 4.6511627906976743e-07, "logits/generated": -2.511451482772827, "logits/real": -2.610137939453125, "logps/generated": -224.3479766845703, "logps/real": -240.7174530029297, "loss": 0.0462, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -11.783953666687012, "rewards/margins": 9.211058616638184, "rewards/real": -2.572895050048828, "step": 440 }, { "epoch": 0.29, "learning_rate": 4.7568710359408034e-07, "logits/generated": -2.508060932159424, "logits/real": -2.583813190460205, "logps/generated": -230.49850463867188, "logps/real": -259.2693786621094, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/generated": -11.978529930114746, "rewards/margins": 9.823976516723633, "rewards/real": -2.154552936553955, "step": 450 }, { "epoch": 0.29, "learning_rate": 4.862579281183933e-07, "logits/generated": -2.4459714889526367, "logits/real": -2.5999927520751953, "logps/generated": -222.58071899414062, "logps/real": -281.602783203125, "loss": 0.0271, "rewards/accuracies": 0.987500011920929, "rewards/generated": -11.706878662109375, "rewards/margins": 9.985689163208008, "rewards/real": -1.7211902141571045, "step": 460 }, { "epoch": 0.3, "learning_rate": 4.968287526427061e-07, "logits/generated": -2.5001561641693115, "logits/real": -2.614675998687744, "logps/generated": -231.21444702148438, "logps/real": -303.97711181640625, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/generated": -12.426594734191895, "rewards/margins": 10.142064094543457, "rewards/real": -2.2845306396484375, "step": 470 }, { "epoch": 0.3, "learning_rate": 4.991774383078731e-07, "logits/generated": -2.4237747192382812, "logits/real": -2.5837903022766113, "logps/generated": -235.39791870117188, "logps/real": -256.73016357421875, "loss": 0.031, "rewards/accuracies": 0.987500011920929, "rewards/generated": -13.370112419128418, "rewards/margins": 10.403009414672852, "rewards/real": -2.967103958129883, "step": 480 }, { "epoch": 0.31, "learning_rate": 4.980023501762632e-07, "logits/generated": -2.475121259689331, "logits/real": -2.6039159297943115, "logps/generated": -255.3181915283203, "logps/real": -247.61093139648438, "loss": 0.0309, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -15.310647964477539, "rewards/margins": 11.72684383392334, "rewards/real": -3.583803176879883, "step": 490 }, { "epoch": 0.32, "learning_rate": 4.968272620446533e-07, "logits/generated": -2.4924330711364746, "logits/real": -2.595700979232788, "logps/generated": -268.7317199707031, "logps/real": -251.86392211914062, "loss": 0.0354, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.943933486938477, "rewards/margins": 12.208364486694336, "rewards/real": -3.735567808151245, "step": 500 }, { "epoch": 0.32, "learning_rate": 4.956521739130435e-07, "logits/generated": -2.5180201530456543, "logits/real": -2.5667471885681152, "logps/generated": -257.37322998046875, "logps/real": -284.225830078125, "loss": 0.0206, "rewards/accuracies": 0.987500011920929, "rewards/generated": -14.81371021270752, "rewards/margins": 11.491992950439453, "rewards/real": -3.3217170238494873, "step": 510 }, { "epoch": 0.33, "learning_rate": 4.944770857814336e-07, "logits/generated": -2.4635119438171387, "logits/real": -2.536935567855835, "logps/generated": -259.5522766113281, "logps/real": -244.10079956054688, "loss": 0.026, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -15.024828910827637, "rewards/margins": 12.193452835083008, "rewards/real": -2.831378221511841, "step": 520 }, { "epoch": 0.34, "learning_rate": 4.933019976498237e-07, "logits/generated": -2.4268784523010254, "logits/real": -2.527644157409668, "logps/generated": -250.21347045898438, "logps/real": -240.0506591796875, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/generated": -14.466471672058105, "rewards/margins": 11.934263229370117, "rewards/real": -2.5322086811065674, "step": 530 }, { "epoch": 0.34, "learning_rate": 4.921269095182138e-07, "logits/generated": -2.436392068862915, "logits/real": -2.5244240760803223, "logps/generated": -228.0118408203125, "logps/real": -242.95556640625, "loss": 0.034, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -11.751724243164062, "rewards/margins": 10.017329216003418, "rewards/real": -1.7343953847885132, "step": 540 }, { "epoch": 0.35, "learning_rate": 4.909518213866039e-07, "logits/generated": -2.4732158184051514, "logits/real": -2.5478568077087402, "logps/generated": -229.0042266845703, "logps/real": -236.5896453857422, "loss": 0.0546, "rewards/accuracies": 0.987500011920929, "rewards/generated": -11.801587104797363, "rewards/margins": 10.913532257080078, "rewards/real": -0.8880546689033508, "step": 550 }, { "epoch": 0.36, "learning_rate": 4.897767332549941e-07, "logits/generated": -2.5068588256835938, "logits/real": -2.603188991546631, "logps/generated": -220.4393310546875, "logps/real": -260.52032470703125, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/generated": -11.580656051635742, "rewards/margins": 10.65870475769043, "rewards/real": -0.9219503402709961, "step": 560 }, { "epoch": 0.36, "learning_rate": 4.886016451233842e-07, "logits/generated": -2.5237538814544678, "logits/real": -2.5715441703796387, "logps/generated": -254.0616912841797, "logps/real": -278.89947509765625, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/generated": -13.214986801147461, "rewards/margins": 12.340234756469727, "rewards/real": -0.8747501373291016, "step": 570 }, { "epoch": 0.37, "learning_rate": 4.874265569917743e-07, "logits/generated": -2.4976603984832764, "logits/real": -2.5703940391540527, "logps/generated": -235.39816284179688, "logps/real": -229.18832397460938, "loss": 0.0568, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -12.485773086547852, "rewards/margins": 10.787823677062988, "rewards/real": -1.6979477405548096, "step": 580 }, { "epoch": 0.37, "learning_rate": 4.862514688601645e-07, "logits/generated": -2.4129319190979004, "logits/real": -2.5519614219665527, "logps/generated": -232.7602996826172, "logps/real": -230.69338989257812, "loss": 0.0194, "rewards/accuracies": 0.987500011920929, "rewards/generated": -12.507562637329102, "rewards/margins": 10.88932991027832, "rewards/real": -1.6182336807250977, "step": 590 }, { "epoch": 0.38, "learning_rate": 4.850763807285546e-07, "logits/generated": -2.368098735809326, "logits/real": -2.512528896331787, "logps/generated": -256.1530456542969, "logps/real": -293.58050537109375, "loss": 0.0229, "rewards/accuracies": 0.987500011920929, "rewards/generated": -14.072511672973633, "rewards/margins": 12.025641441345215, "rewards/real": -2.046870231628418, "step": 600 }, { "epoch": 0.39, "learning_rate": 4.839012925969447e-07, "logits/generated": -2.349189043045044, "logits/real": -2.4440290927886963, "logps/generated": -256.9201354980469, "logps/real": -251.1465606689453, "loss": 0.0206, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.40594482421875, "rewards/margins": 12.272120475769043, "rewards/real": -3.1338250637054443, "step": 610 }, { "epoch": 0.39, "learning_rate": 4.827262044653348e-07, "logits/generated": -2.3456802368164062, "logits/real": -2.4477171897888184, "logps/generated": -243.4130401611328, "logps/real": -298.19415283203125, "loss": 0.0407, "rewards/accuracies": 0.987500011920929, "rewards/generated": -13.486459732055664, "rewards/margins": 11.11083984375, "rewards/real": -2.3756182193756104, "step": 620 }, { "epoch": 0.4, "learning_rate": 4.81551116333725e-07, "logits/generated": -2.3130345344543457, "logits/real": -2.4230241775512695, "logps/generated": -234.4820556640625, "logps/real": -215.69741821289062, "loss": 0.0323, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -13.71208667755127, "rewards/margins": 12.096234321594238, "rewards/real": -1.6158527135849, "step": 630 }, { "epoch": 0.41, "learning_rate": 4.803760282021151e-07, "logits/generated": -2.439054012298584, "logits/real": -2.489902973175049, "logps/generated": -234.609619140625, "logps/real": -227.06698608398438, "loss": 0.0253, "rewards/accuracies": 0.987500011920929, "rewards/generated": -13.273165702819824, "rewards/margins": 10.841035842895508, "rewards/real": -2.432131052017212, "step": 640 }, { "epoch": 0.41, "learning_rate": 4.792009400705052e-07, "logits/generated": -2.3417305946350098, "logits/real": -2.4370510578155518, "logps/generated": -250.015380859375, "logps/real": -228.6704559326172, "loss": 0.0298, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -14.521652221679688, "rewards/margins": 12.285801887512207, "rewards/real": -2.2358508110046387, "step": 650 }, { "epoch": 0.42, "learning_rate": 4.780258519388953e-07, "logits/generated": -2.3547120094299316, "logits/real": -2.4297335147857666, "logps/generated": -259.03955078125, "logps/real": -303.78106689453125, "loss": 0.0474, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -13.853286743164062, "rewards/margins": 12.204731941223145, "rewards/real": -1.6485567092895508, "step": 660 }, { "epoch": 0.43, "learning_rate": 4.768507638072856e-07, "logits/generated": -2.316434383392334, "logits/real": -2.376094341278076, "logps/generated": -248.5943603515625, "logps/real": -269.3667297363281, "loss": 0.0195, "rewards/accuracies": 0.987500011920929, "rewards/generated": -14.337122917175293, "rewards/margins": 11.638051986694336, "rewards/real": -2.6990718841552734, "step": 670 }, { "epoch": 0.43, "learning_rate": 4.7567567567567566e-07, "logits/generated": -2.3710427284240723, "logits/real": -2.415874481201172, "logps/generated": -254.5215301513672, "logps/real": -308.25048828125, "loss": 0.0255, "rewards/accuracies": 0.987500011920929, "rewards/generated": -13.657424926757812, "rewards/margins": 11.751736640930176, "rewards/real": -1.9056885242462158, "step": 680 }, { "epoch": 0.44, "learning_rate": 4.745005875440658e-07, "logits/generated": -2.224567413330078, "logits/real": -2.3295130729675293, "logps/generated": -244.08151245117188, "logps/real": -279.2975769042969, "loss": 0.0331, "rewards/accuracies": 0.987500011920929, "rewards/generated": -13.952921867370605, "rewards/margins": 11.643460273742676, "rewards/real": -2.3094632625579834, "step": 690 }, { "epoch": 0.44, "learning_rate": 4.733254994124559e-07, "logits/generated": -2.297942638397217, "logits/real": -2.3956637382507324, "logps/generated": -259.2550354003906, "logps/real": -235.402587890625, "loss": 0.0367, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -15.151153564453125, "rewards/margins": 13.520169258117676, "rewards/real": -1.630985975265503, "step": 700 }, { "epoch": 0.45, "learning_rate": 4.72150411280846e-07, "logits/generated": -2.262821674346924, "logits/real": -2.3863885402679443, "logps/generated": -257.7832336425781, "logps/real": -250.14306640625, "loss": 0.0332, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.589390754699707, "rewards/margins": 14.08702564239502, "rewards/real": -1.502366065979004, "step": 710 }, { "epoch": 0.46, "learning_rate": 4.7097532314923617e-07, "logits/generated": -2.29247784614563, "logits/real": -2.328556537628174, "logps/generated": -274.10650634765625, "logps/real": -247.0130615234375, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/generated": -16.48426628112793, "rewards/margins": 13.664430618286133, "rewards/real": -2.8198394775390625, "step": 720 }, { "epoch": 0.46, "learning_rate": 4.6980023501762627e-07, "logits/generated": -2.256700277328491, "logits/real": -2.3285982608795166, "logps/generated": -269.7905578613281, "logps/real": -279.09906005859375, "loss": 0.0169, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.461273193359375, "rewards/margins": 13.154187202453613, "rewards/real": -2.3070871829986572, "step": 730 }, { "epoch": 0.47, "learning_rate": 4.686251468860165e-07, "logits/generated": -2.2194008827209473, "logits/real": -2.3112258911132812, "logps/generated": -287.9308776855469, "logps/real": -242.48416137695312, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/generated": -17.926103591918945, "rewards/margins": 14.351516723632812, "rewards/real": -3.5745856761932373, "step": 740 }, { "epoch": 0.48, "learning_rate": 4.674500587544066e-07, "logits/generated": -2.3291237354278564, "logits/real": -2.4051151275634766, "logps/generated": -270.6801452636719, "logps/real": -284.98931884765625, "loss": 0.0308, "rewards/accuracies": 0.987500011920929, "rewards/generated": -15.692561149597168, "rewards/margins": 12.790186882019043, "rewards/real": -2.902374267578125, "step": 750 }, { "epoch": 0.48, "learning_rate": 4.662749706227967e-07, "logits/generated": -2.228729724884033, "logits/real": -2.342529773712158, "logps/generated": -279.6976318359375, "logps/real": -259.60150146484375, "loss": 0.0426, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.11913299560547, "rewards/margins": 13.92261028289795, "rewards/real": -3.1965222358703613, "step": 760 }, { "epoch": 0.49, "learning_rate": 4.6509988249118683e-07, "logits/generated": -2.215961217880249, "logits/real": -2.3359737396240234, "logps/generated": -292.3762512207031, "logps/real": -271.7994384765625, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/generated": -18.02857780456543, "rewards/margins": 14.50737190246582, "rewards/real": -3.521205425262451, "step": 770 }, { "epoch": 0.49, "learning_rate": 4.6392479435957693e-07, "logits/generated": -2.204655408859253, "logits/real": -2.357146739959717, "logps/generated": -287.6128845214844, "logps/real": -313.80096435546875, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/generated": -18.052814483642578, "rewards/margins": 14.257978439331055, "rewards/real": -3.7948365211486816, "step": 780 }, { "epoch": 0.5, "learning_rate": 4.6274970622796704e-07, "logits/generated": -2.2601847648620605, "logits/real": -2.416104555130005, "logps/generated": -297.5831604003906, "logps/real": -311.974609375, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/generated": -18.84690284729004, "rewards/margins": 14.774507522583008, "rewards/real": -4.072394371032715, "step": 790 }, { "epoch": 0.51, "learning_rate": 4.6157461809635724e-07, "logits/generated": -2.2359187602996826, "logits/real": -2.4105429649353027, "logps/generated": -263.5639953613281, "logps/real": -253.03262329101562, "loss": 0.0287, "rewards/accuracies": 0.987500011920929, "rewards/generated": -16.32426643371582, "rewards/margins": 13.009058952331543, "rewards/real": -3.315206527709961, "step": 800 }, { "epoch": 0.51, "learning_rate": 4.6039952996474734e-07, "logits/generated": -2.2089052200317383, "logits/real": -2.3267462253570557, "logps/generated": -292.5827331542969, "logps/real": -283.88092041015625, "loss": 0.0183, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.413471221923828, "rewards/margins": 14.490208625793457, "rewards/real": -3.92326283454895, "step": 810 }, { "epoch": 0.52, "learning_rate": 4.5922444183313745e-07, "logits/generated": -2.1738524436950684, "logits/real": -2.3648948669433594, "logps/generated": -283.103759765625, "logps/real": -276.14984130859375, "loss": 0.0277, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.33072280883789, "rewards/margins": 14.203720092773438, "rewards/real": -3.1270031929016113, "step": 820 }, { "epoch": 0.53, "learning_rate": 4.580493537015276e-07, "logits/generated": -2.140481948852539, "logits/real": -2.2898101806640625, "logps/generated": -279.39434814453125, "logps/real": -256.742431640625, "loss": 0.0236, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -17.80463218688965, "rewards/margins": 14.53624153137207, "rewards/real": -3.2683918476104736, "step": 830 }, { "epoch": 0.53, "learning_rate": 4.568742655699177e-07, "logits/generated": -2.1728413105010986, "logits/real": -2.299516201019287, "logps/generated": -290.2254943847656, "logps/real": -296.55792236328125, "loss": 0.0188, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.140968322753906, "rewards/margins": 14.465237617492676, "rewards/real": -3.675732374191284, "step": 840 }, { "epoch": 0.54, "learning_rate": 4.5569917743830786e-07, "logits/generated": -2.078281879425049, "logits/real": -2.2794442176818848, "logps/generated": -303.69927978515625, "logps/real": -312.5650329589844, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/generated": -19.2463321685791, "rewards/margins": 15.750595092773438, "rewards/real": -3.4957356452941895, "step": 850 }, { "epoch": 0.55, "learning_rate": 4.54524089306698e-07, "logits/generated": -2.10638689994812, "logits/real": -2.2108185291290283, "logps/generated": -272.5052185058594, "logps/real": -257.61053466796875, "loss": 0.0467, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -16.4874210357666, "rewards/margins": 13.006558418273926, "rewards/real": -3.4808602333068848, "step": 860 }, { "epoch": 0.55, "learning_rate": 4.533490011750881e-07, "logits/generated": -2.015160083770752, "logits/real": -2.078425884246826, "logps/generated": -280.7167663574219, "logps/real": -296.03411865234375, "loss": 0.0448, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -17.296838760375977, "rewards/margins": 13.392329216003418, "rewards/real": -3.9045073986053467, "step": 870 }, { "epoch": 0.56, "learning_rate": 4.521739130434782e-07, "logits/generated": -2.0493757724761963, "logits/real": -2.024940252304077, "logps/generated": -295.7134704589844, "logps/real": -240.38882446289062, "loss": 0.0222, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -19.699758529663086, "rewards/margins": 15.107478141784668, "rewards/real": -4.59227991104126, "step": 880 }, { "epoch": 0.56, "learning_rate": 4.5099882491186837e-07, "logits/generated": -2.033402681350708, "logits/real": -2.106812000274658, "logps/generated": -281.4131774902344, "logps/real": -348.5048522949219, "loss": 0.0332, "rewards/accuracies": 0.949999988079071, "rewards/generated": -17.562549591064453, "rewards/margins": 13.78009033203125, "rewards/real": -3.782458543777466, "step": 890 }, { "epoch": 0.57, "learning_rate": 4.4982373678025847e-07, "logits/generated": -1.9644243717193604, "logits/real": -1.9998550415039062, "logps/generated": -292.21514892578125, "logps/real": -269.4073181152344, "loss": 0.0295, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -17.54524803161621, "rewards/margins": 13.79992961883545, "rewards/real": -3.7453200817108154, "step": 900 }, { "epoch": 0.58, "learning_rate": 4.486486486486487e-07, "logits/generated": -2.1318039894104004, "logits/real": -2.152733564376831, "logps/generated": -283.0270080566406, "logps/real": -276.83416748046875, "loss": 0.0394, "rewards/accuracies": 0.987500011920929, "rewards/generated": -16.132619857788086, "rewards/margins": 13.454730033874512, "rewards/real": -2.677889347076416, "step": 910 }, { "epoch": 0.58, "learning_rate": 4.474735605170388e-07, "logits/generated": -2.255167245864868, "logits/real": -2.1642110347747803, "logps/generated": -267.664306640625, "logps/real": -241.0842742919922, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/generated": -15.635882377624512, "rewards/margins": 13.6681547164917, "rewards/real": -1.9677283763885498, "step": 920 }, { "epoch": 0.59, "learning_rate": 4.462984723854289e-07, "logits/generated": -2.261536121368408, "logits/real": -2.163490056991577, "logps/generated": -273.19061279296875, "logps/real": -238.5223846435547, "loss": 0.0232, "rewards/accuracies": 0.987500011920929, "rewards/generated": -16.670108795166016, "rewards/margins": 14.045565605163574, "rewards/real": -2.624541759490967, "step": 930 }, { "epoch": 0.6, "learning_rate": 4.4512338425381903e-07, "logits/generated": -2.2644219398498535, "logits/real": -2.1735246181488037, "logps/generated": -272.95294189453125, "logps/real": -242.27676391601562, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/generated": -16.10274887084961, "rewards/margins": 13.76220989227295, "rewards/real": -2.340538740158081, "step": 940 }, { "epoch": 0.6, "learning_rate": 4.4394829612220913e-07, "logits/generated": -2.144902467727661, "logits/real": -2.092916250228882, "logps/generated": -276.24700927734375, "logps/real": -269.9608154296875, "loss": 0.0255, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.573253631591797, "rewards/margins": 15.12501049041748, "rewards/real": -2.44824481010437, "step": 950 }, { "epoch": 0.61, "learning_rate": 4.4277320799059924e-07, "logits/generated": -2.154425621032715, "logits/real": -2.0945446491241455, "logps/generated": -270.79022216796875, "logps/real": -268.6008605957031, "loss": 0.0272, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.448904037475586, "rewards/margins": 14.52673053741455, "rewards/real": -2.9221715927124023, "step": 960 }, { "epoch": 0.62, "learning_rate": 4.4159811985898944e-07, "logits/generated": -2.2013556957244873, "logits/real": -2.1177525520324707, "logps/generated": -279.6724548339844, "logps/real": -320.83197021484375, "loss": 0.0348, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -17.04720687866211, "rewards/margins": 14.274810791015625, "rewards/real": -2.7723937034606934, "step": 970 }, { "epoch": 0.62, "learning_rate": 4.4042303172737954e-07, "logits/generated": -2.160095453262329, "logits/real": -2.0706989765167236, "logps/generated": -312.0510559082031, "logps/real": -299.41876220703125, "loss": 0.034, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.43877410888672, "rewards/margins": 15.981279373168945, "rewards/real": -4.457496643066406, "step": 980 }, { "epoch": 0.63, "learning_rate": 4.3924794359576964e-07, "logits/generated": -2.0770351886749268, "logits/real": -2.0986149311065674, "logps/generated": -307.2867126464844, "logps/real": -285.21368408203125, "loss": 0.0327, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.678424835205078, "rewards/margins": 14.942266464233398, "rewards/real": -4.736159324645996, "step": 990 }, { "epoch": 0.63, "learning_rate": 4.380728554641598e-07, "logits/generated": -2.0771639347076416, "logits/real": -2.0399820804595947, "logps/generated": -300.96954345703125, "logps/real": -260.22320556640625, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/generated": -19.797773361206055, "rewards/margins": 15.988889694213867, "rewards/real": -3.8088836669921875, "step": 1000 }, { "epoch": 0.64, "learning_rate": 4.368977673325499e-07, "logits/generated": -2.097740650177002, "logits/real": -2.0250871181488037, "logps/generated": -314.9656677246094, "logps/real": -258.19146728515625, "loss": 0.0377, "rewards/accuracies": 0.949999988079071, "rewards/generated": -21.372987747192383, "rewards/margins": 15.312896728515625, "rewards/real": -6.060091495513916, "step": 1010 }, { "epoch": 0.65, "learning_rate": 4.3572267920094e-07, "logits/generated": -2.2000434398651123, "logits/real": -2.0983645915985107, "logps/generated": -341.3284606933594, "logps/real": -298.4183044433594, "loss": 0.0403, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.781925201416016, "rewards/margins": 16.64480972290039, "rewards/real": -6.137114524841309, "step": 1020 }, { "epoch": 0.65, "learning_rate": 4.345475910693302e-07, "logits/generated": -2.0708365440368652, "logits/real": -2.0243687629699707, "logps/generated": -331.35162353515625, "logps/real": -302.08990478515625, "loss": 0.0176, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.080236434936523, "rewards/margins": 15.170089721679688, "rewards/real": -6.9101457595825195, "step": 1030 }, { "epoch": 0.66, "learning_rate": 4.333725029377203e-07, "logits/generated": -2.1169724464416504, "logits/real": -2.1490702629089355, "logps/generated": -294.83856201171875, "logps/real": -280.41326904296875, "loss": 0.0464, "rewards/accuracies": 0.949999988079071, "rewards/generated": -19.257888793945312, "rewards/margins": 13.788156509399414, "rewards/real": -5.469732761383057, "step": 1040 }, { "epoch": 0.67, "learning_rate": 4.3219741480611046e-07, "logits/generated": -2.126469135284424, "logits/real": -2.1890926361083984, "logps/generated": -300.55047607421875, "logps/real": -277.67425537109375, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/generated": -19.871578216552734, "rewards/margins": 15.217971801757812, "rewards/real": -4.6536054611206055, "step": 1050 }, { "epoch": 0.67, "learning_rate": 4.3102232667450057e-07, "logits/generated": -2.235631227493286, "logits/real": -2.2010443210601807, "logps/generated": -284.49578857421875, "logps/real": -236.76010131835938, "loss": 0.0258, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -17.960590362548828, "rewards/margins": 13.84907341003418, "rewards/real": -4.111515522003174, "step": 1060 }, { "epoch": 0.68, "learning_rate": 4.2984723854289067e-07, "logits/generated": -2.219752073287964, "logits/real": -2.2477521896362305, "logps/generated": -294.33184814453125, "logps/real": -264.7086486816406, "loss": 0.0352, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -19.106578826904297, "rewards/margins": 14.367040634155273, "rewards/real": -4.73953914642334, "step": 1070 }, { "epoch": 0.69, "learning_rate": 4.286721504112809e-07, "logits/generated": -2.257949113845825, "logits/real": -2.2932074069976807, "logps/generated": -278.9601135253906, "logps/real": -228.11141967773438, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/generated": -16.96208953857422, "rewards/margins": 13.705500602722168, "rewards/real": -3.2565879821777344, "step": 1080 }, { "epoch": 0.69, "learning_rate": 4.27497062279671e-07, "logits/generated": -2.3263726234436035, "logits/real": -2.300952434539795, "logps/generated": -263.8218078613281, "logps/real": -272.5172119140625, "loss": 0.0365, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -15.995553970336914, "rewards/margins": 12.962613105773926, "rewards/real": -3.032942056655884, "step": 1090 }, { "epoch": 0.7, "learning_rate": 4.263219741480611e-07, "logits/generated": -2.301636219024658, "logits/real": -2.3038859367370605, "logps/generated": -280.7583923339844, "logps/real": -293.1648254394531, "loss": 0.0449, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -17.14383888244629, "rewards/margins": 13.275634765625, "rewards/real": -3.8682055473327637, "step": 1100 }, { "epoch": 0.7, "learning_rate": 4.2514688601645123e-07, "logits/generated": -2.284412384033203, "logits/real": -2.300344944000244, "logps/generated": -271.6402893066406, "logps/real": -303.3867492675781, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/generated": -16.277772903442383, "rewards/margins": 12.891380310058594, "rewards/real": -3.386394500732422, "step": 1110 }, { "epoch": 0.71, "learning_rate": 4.2397179788484133e-07, "logits/generated": -2.301110029220581, "logits/real": -2.283402681350708, "logps/generated": -274.2630615234375, "logps/real": -295.1095886230469, "loss": 0.0369, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -17.09488296508789, "rewards/margins": 13.71021842956543, "rewards/real": -3.3846638202667236, "step": 1120 }, { "epoch": 0.72, "learning_rate": 4.2279670975323143e-07, "logits/generated": -2.281606912612915, "logits/real": -2.1891345977783203, "logps/generated": -287.2510070800781, "logps/real": -249.9571075439453, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/generated": -17.902332305908203, "rewards/margins": 13.83057689666748, "rewards/real": -4.071754455566406, "step": 1130 }, { "epoch": 0.72, "learning_rate": 4.2162162162162164e-07, "logits/generated": -2.1958327293395996, "logits/real": -2.1260383129119873, "logps/generated": -294.9903869628906, "logps/real": -247.8839111328125, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/generated": -19.26121711730957, "rewards/margins": 15.105463027954102, "rewards/real": -4.155752658843994, "step": 1140 }, { "epoch": 0.73, "learning_rate": 4.2044653349001174e-07, "logits/generated": -2.204456090927124, "logits/real": -2.0998289585113525, "logps/generated": -301.88909912109375, "logps/real": -258.03753662109375, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/generated": -18.723281860351562, "rewards/margins": 14.72815990447998, "rewards/real": -3.9951229095458984, "step": 1150 }, { "epoch": 0.74, "learning_rate": 4.1927144535840184e-07, "logits/generated": -2.1646523475646973, "logits/real": -2.1553101539611816, "logps/generated": -297.005126953125, "logps/real": -272.0830078125, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/generated": -18.245548248291016, "rewards/margins": 15.432889938354492, "rewards/real": -2.8126583099365234, "step": 1160 }, { "epoch": 0.74, "learning_rate": 4.18096357226792e-07, "logits/generated": -2.1961166858673096, "logits/real": -2.1357979774475098, "logps/generated": -313.2271423339844, "logps/real": -236.79226684570312, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/generated": -20.204418182373047, "rewards/margins": 16.67409896850586, "rewards/real": -3.530319929122925, "step": 1170 }, { "epoch": 0.75, "learning_rate": 4.169212690951821e-07, "logits/generated": -2.21925687789917, "logits/real": -2.141052484512329, "logps/generated": -313.30389404296875, "logps/real": -259.11871337890625, "loss": 0.0232, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.646581649780273, "rewards/margins": 16.518220901489258, "rewards/real": -3.1283624172210693, "step": 1180 }, { "epoch": 0.76, "learning_rate": 4.157461809635722e-07, "logits/generated": -2.2323949337005615, "logits/real": -2.228459119796753, "logps/generated": -323.08868408203125, "logps/real": -308.6040344238281, "loss": 0.049, "rewards/accuracies": 1.0, "rewards/generated": -20.510068893432617, "rewards/margins": 17.15546989440918, "rewards/real": -3.3545982837677, "step": 1190 }, { "epoch": 0.76, "learning_rate": 4.145710928319624e-07, "logits/generated": -2.214876890182495, "logits/real": -2.1600141525268555, "logps/generated": -309.6695556640625, "logps/real": -240.92129516601562, "loss": 0.02, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.384716033935547, "rewards/margins": 17.16757583618164, "rewards/real": -3.2171401977539062, "step": 1200 }, { "epoch": 0.77, "learning_rate": 4.133960047003525e-07, "logits/generated": -2.215092182159424, "logits/real": -2.1977219581604004, "logps/generated": -305.69342041015625, "logps/real": -273.934326171875, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/generated": -19.206113815307617, "rewards/margins": 16.1297550201416, "rewards/real": -3.0763602256774902, "step": 1210 }, { "epoch": 0.77, "learning_rate": 4.1222091656874266e-07, "logits/generated": -2.2863411903381348, "logits/real": -2.1990232467651367, "logps/generated": -325.60723876953125, "logps/real": -249.89712524414062, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/generated": -21.734128952026367, "rewards/margins": 17.138214111328125, "rewards/real": -4.595917224884033, "step": 1220 }, { "epoch": 0.78, "learning_rate": 4.1104582843713276e-07, "logits/generated": -2.2235512733459473, "logits/real": -2.2082717418670654, "logps/generated": -331.63336181640625, "logps/real": -294.22552490234375, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/generated": -22.272472381591797, "rewards/margins": 16.936038970947266, "rewards/real": -5.336433410644531, "step": 1230 }, { "epoch": 0.79, "learning_rate": 4.0987074030552287e-07, "logits/generated": -2.2295403480529785, "logits/real": -2.2075161933898926, "logps/generated": -303.949951171875, "logps/real": -290.83447265625, "loss": 0.0344, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.734106063842773, "rewards/margins": 16.062904357910156, "rewards/real": -3.6712021827697754, "step": 1240 }, { "epoch": 0.79, "learning_rate": 4.0869565217391307e-07, "logits/generated": -2.329646110534668, "logits/real": -2.2754032611846924, "logps/generated": -284.7631530761719, "logps/real": -276.9230651855469, "loss": 0.0143, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.821731567382812, "rewards/margins": 14.999728202819824, "rewards/real": -2.82200288772583, "step": 1250 }, { "epoch": 0.8, "learning_rate": 4.075205640423032e-07, "logits/generated": -2.24617075920105, "logits/real": -2.208388090133667, "logps/generated": -290.4934997558594, "logps/real": -255.40194702148438, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/generated": -18.316036224365234, "rewards/margins": 14.29789924621582, "rewards/real": -4.018136024475098, "step": 1260 }, { "epoch": 0.81, "learning_rate": 4.063454759106933e-07, "logits/generated": -2.3056511878967285, "logits/real": -2.1358304023742676, "logps/generated": -316.7949523925781, "logps/real": -265.46905517578125, "loss": 0.0177, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.009098052978516, "rewards/margins": 17.545692443847656, "rewards/real": -3.4634087085723877, "step": 1270 }, { "epoch": 0.81, "learning_rate": 4.0517038777908343e-07, "logits/generated": -2.2054314613342285, "logits/real": -2.018893003463745, "logps/generated": -298.08404541015625, "logps/real": -229.26156616210938, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/generated": -19.959003448486328, "rewards/margins": 16.036542892456055, "rewards/real": -3.9224586486816406, "step": 1280 }, { "epoch": 0.82, "learning_rate": 4.0399529964747353e-07, "logits/generated": -2.284658908843994, "logits/real": -2.1389191150665283, "logps/generated": -315.28826904296875, "logps/real": -349.39276123046875, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/generated": -20.028697967529297, "rewards/margins": 16.19293212890625, "rewards/real": -3.8357670307159424, "step": 1290 }, { "epoch": 0.82, "learning_rate": 4.0282021151586363e-07, "logits/generated": -2.1421546936035156, "logits/real": -2.045635223388672, "logps/generated": -334.4831237792969, "logps/real": -308.2193908691406, "loss": 0.0187, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -22.108678817749023, "rewards/margins": 16.95539093017578, "rewards/real": -5.153290748596191, "step": 1300 }, { "epoch": 0.83, "learning_rate": 4.0164512338425384e-07, "logits/generated": -2.219916582107544, "logits/real": -2.0313782691955566, "logps/generated": -324.0411682128906, "logps/real": -280.9827575683594, "loss": 0.036, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -21.63995361328125, "rewards/margins": 16.822376251220703, "rewards/real": -4.81757926940918, "step": 1310 }, { "epoch": 0.84, "learning_rate": 4.0047003525264394e-07, "logits/generated": -2.0729808807373047, "logits/real": -1.9643628597259521, "logps/generated": -303.87921142578125, "logps/real": -236.41629028320312, "loss": 0.0336, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.983474731445312, "rewards/margins": 16.176816940307617, "rewards/real": -3.806657075881958, "step": 1320 }, { "epoch": 0.84, "learning_rate": 3.9929494712103404e-07, "logits/generated": -2.189826488494873, "logits/real": -2.0320651531219482, "logps/generated": -296.17340087890625, "logps/real": -300.67254638671875, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/generated": -18.916046142578125, "rewards/margins": 15.765054702758789, "rewards/real": -3.150991439819336, "step": 1330 }, { "epoch": 0.85, "learning_rate": 3.981198589894242e-07, "logits/generated": -2.255155086517334, "logits/real": -2.0348448753356934, "logps/generated": -292.5631408691406, "logps/real": -286.8785705566406, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/generated": -18.44003677368164, "rewards/margins": 14.815027236938477, "rewards/real": -3.625012159347534, "step": 1340 }, { "epoch": 0.86, "learning_rate": 3.969447708578143e-07, "logits/generated": -2.151961088180542, "logits/real": -1.9714977741241455, "logps/generated": -295.259765625, "logps/real": -267.87994384765625, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/generated": -18.496400833129883, "rewards/margins": 14.87242317199707, "rewards/real": -3.6239781379699707, "step": 1350 }, { "epoch": 0.86, "learning_rate": 3.957696827262044e-07, "logits/generated": -2.140820026397705, "logits/real": -1.9414198398590088, "logps/generated": -283.1053161621094, "logps/real": -241.42709350585938, "loss": 0.0286, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -19.189056396484375, "rewards/margins": 14.983909606933594, "rewards/real": -4.2051472663879395, "step": 1360 }, { "epoch": 0.87, "learning_rate": 3.945945945945946e-07, "logits/generated": -2.200963258743286, "logits/real": -2.045172691345215, "logps/generated": -293.6392517089844, "logps/real": -296.48577880859375, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/generated": -18.247238159179688, "rewards/margins": 14.320365905761719, "rewards/real": -3.926875352859497, "step": 1370 }, { "epoch": 0.88, "learning_rate": 3.934195064629847e-07, "logits/generated": -2.1450352668762207, "logits/real": -2.019554376602173, "logps/generated": -287.7925720214844, "logps/real": -268.22845458984375, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/generated": -18.483325958251953, "rewards/margins": 14.118812561035156, "rewards/real": -4.364512920379639, "step": 1380 }, { "epoch": 0.88, "learning_rate": 3.9224441833137486e-07, "logits/generated": -2.3144712448120117, "logits/real": -1.952749252319336, "logps/generated": -294.29168701171875, "logps/real": -282.1815490722656, "loss": 0.0145, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.526988983154297, "rewards/margins": 14.965646743774414, "rewards/real": -3.561342716217041, "step": 1390 }, { "epoch": 0.89, "learning_rate": 3.9106933019976496e-07, "logits/generated": -2.3015847206115723, "logits/real": -2.000420093536377, "logps/generated": -287.97747802734375, "logps/real": -263.9286804199219, "loss": 0.018, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -18.587892532348633, "rewards/margins": 13.602892875671387, "rewards/real": -4.984999656677246, "step": 1400 }, { "epoch": 0.89, "learning_rate": 3.8989424206815507e-07, "logits/generated": -2.424217462539673, "logits/real": -1.9993460178375244, "logps/generated": -296.1605529785156, "logps/real": -274.6358337402344, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/generated": -19.653118133544922, "rewards/margins": 16.015432357788086, "rewards/real": -3.637685775756836, "step": 1410 }, { "epoch": 0.9, "learning_rate": 3.887191539365452e-07, "logits/generated": -2.4005563259124756, "logits/real": -2.1184263229370117, "logps/generated": -280.97833251953125, "logps/real": -252.4258270263672, "loss": 0.0204, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -17.730958938598633, "rewards/margins": 13.906275749206543, "rewards/real": -3.824683666229248, "step": 1420 }, { "epoch": 0.91, "learning_rate": 3.8754406580493537e-07, "logits/generated": -2.380354642868042, "logits/real": -2.1375575065612793, "logps/generated": -308.86285400390625, "logps/real": -265.4056701660156, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/generated": -19.611974716186523, "rewards/margins": 15.560144424438477, "rewards/real": -4.051828384399414, "step": 1430 }, { "epoch": 0.91, "learning_rate": 3.863689776733255e-07, "logits/generated": -2.4837889671325684, "logits/real": -2.1749095916748047, "logps/generated": -292.39959716796875, "logps/real": -292.68499755859375, "loss": 0.0178, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.81622886657715, "rewards/margins": 13.84477424621582, "rewards/real": -3.97145414352417, "step": 1440 }, { "epoch": 0.92, "learning_rate": 3.8519388954171563e-07, "logits/generated": -2.411600351333618, "logits/real": -2.198923110961914, "logps/generated": -272.6438903808594, "logps/real": -268.46417236328125, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/generated": -17.41283416748047, "rewards/margins": 13.593576431274414, "rewards/real": -3.8192572593688965, "step": 1450 }, { "epoch": 0.93, "learning_rate": 3.8401880141010573e-07, "logits/generated": -2.5035364627838135, "logits/real": -2.099947452545166, "logps/generated": -310.13330078125, "logps/real": -264.05078125, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/generated": -20.967391967773438, "rewards/margins": 15.722890853881836, "rewards/real": -5.244499206542969, "step": 1460 }, { "epoch": 0.93, "learning_rate": 3.8284371327849583e-07, "logits/generated": -2.3421576023101807, "logits/real": -2.2294716835021973, "logps/generated": -308.58416748046875, "logps/real": -290.80450439453125, "loss": 0.0264, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.871484756469727, "rewards/margins": 14.30445384979248, "rewards/real": -5.567030906677246, "step": 1470 }, { "epoch": 0.94, "learning_rate": 3.8166862514688604e-07, "logits/generated": -2.4322938919067383, "logits/real": -2.192739725112915, "logps/generated": -290.4706726074219, "logps/real": -280.22711181640625, "loss": 0.0275, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -18.550111770629883, "rewards/margins": 13.982562065124512, "rewards/real": -4.567549228668213, "step": 1480 }, { "epoch": 0.95, "learning_rate": 3.8049353701527614e-07, "logits/generated": -2.4540488719940186, "logits/real": -2.1258809566497803, "logps/generated": -314.3543395996094, "logps/real": -242.69906616210938, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/generated": -21.094104766845703, "rewards/margins": 17.1605167388916, "rewards/real": -3.933588743209839, "step": 1490 }, { "epoch": 0.95, "learning_rate": 3.7931844888366624e-07, "logits/generated": -2.429776668548584, "logits/real": -2.1748576164245605, "logps/generated": -290.11431884765625, "logps/real": -276.1110534667969, "loss": 0.0456, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -18.669795989990234, "rewards/margins": 14.223612785339355, "rewards/real": -4.446183204650879, "step": 1500 }, { "epoch": 0.96, "learning_rate": 3.781433607520564e-07, "logits/generated": -2.382094621658325, "logits/real": -2.138918399810791, "logps/generated": -302.57012939453125, "logps/real": -249.3428955078125, "loss": 0.0155, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.439178466796875, "rewards/margins": 16.179622650146484, "rewards/real": -4.259556770324707, "step": 1510 }, { "epoch": 0.96, "learning_rate": 3.769682726204465e-07, "logits/generated": -2.4343438148498535, "logits/real": -2.256923198699951, "logps/generated": -300.0426330566406, "logps/real": -285.530517578125, "loss": 0.0361, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.440828323364258, "rewards/margins": 14.015217781066895, "rewards/real": -4.425610542297363, "step": 1520 }, { "epoch": 0.97, "learning_rate": 3.757931844888366e-07, "logits/generated": -2.4278175830841064, "logits/real": -2.1669082641601562, "logps/generated": -304.4680480957031, "logps/real": -273.3132629394531, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/generated": -19.61075782775879, "rewards/margins": 15.039899826049805, "rewards/real": -4.570856094360352, "step": 1530 }, { "epoch": 0.98, "learning_rate": 3.746180963572268e-07, "logits/generated": -2.434357166290283, "logits/real": -2.2451558113098145, "logps/generated": -300.28118896484375, "logps/real": -304.6499328613281, "loss": 0.0146, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.55888557434082, "rewards/margins": 14.898167610168457, "rewards/real": -4.66071891784668, "step": 1540 }, { "epoch": 0.98, "learning_rate": 3.734430082256169e-07, "logits/generated": -2.454746961593628, "logits/real": -2.167454719543457, "logps/generated": -290.23675537109375, "logps/real": -286.0035400390625, "loss": 0.0331, "rewards/accuracies": 0.949999988079071, "rewards/generated": -17.961822509765625, "rewards/margins": 13.359112739562988, "rewards/real": -4.602707386016846, "step": 1550 }, { "epoch": 0.99, "learning_rate": 3.7226792009400706e-07, "logits/generated": -2.4925215244293213, "logits/real": -2.185725688934326, "logps/generated": -306.9207763671875, "logps/real": -302.2175598144531, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/generated": -20.345661163330078, "rewards/margins": 13.471136093139648, "rewards/real": -6.8745245933532715, "step": 1560 }, { "epoch": 1.0, "learning_rate": 3.7109283196239716e-07, "logits/generated": -2.415444850921631, "logits/real": -2.0123257637023926, "logps/generated": -339.48565673828125, "logps/real": -237.3209991455078, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/generated": -24.1875, "rewards/margins": 17.469690322875977, "rewards/real": -6.717806816101074, "step": 1570 }, { "epoch": 1.0, "learning_rate": 3.6991774383078726e-07, "logits/generated": -2.374286651611328, "logits/real": -2.0234928131103516, "logps/generated": -306.5343017578125, "logps/real": -287.781005859375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -20.14816665649414, "rewards/margins": 15.472940444946289, "rewards/real": -4.675227165222168, "step": 1580 }, { "epoch": 1.01, "learning_rate": 3.687426556991774e-07, "logits/generated": -2.4474167823791504, "logits/real": -2.071176052093506, "logps/generated": -317.44989013671875, "logps/real": -285.42474365234375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -21.355384826660156, "rewards/margins": 15.974954605102539, "rewards/real": -5.380428314208984, "step": 1590 }, { "epoch": 1.02, "learning_rate": 3.6756756756756757e-07, "logits/generated": -2.3674721717834473, "logits/real": -1.9707956314086914, "logps/generated": -306.5394592285156, "logps/real": -225.94546508789062, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/generated": -20.928173065185547, "rewards/margins": 14.604655265808105, "rewards/real": -6.323517799377441, "step": 1600 }, { "epoch": 1.02, "learning_rate": 3.663924794359577e-07, "logits/generated": -2.42570424079895, "logits/real": -1.969363808631897, "logps/generated": -321.4077453613281, "logps/real": -271.72955322265625, "loss": 0.008, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.87816619873047, "rewards/margins": 16.646554946899414, "rewards/real": -5.231610298156738, "step": 1610 }, { "epoch": 1.03, "learning_rate": 3.6521739130434783e-07, "logits/generated": -2.327263593673706, "logits/real": -1.942073106765747, "logps/generated": -299.33856201171875, "logps/real": -233.4484100341797, "loss": 0.0087, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.722368240356445, "rewards/margins": 15.216009140014648, "rewards/real": -5.5063581466674805, "step": 1620 }, { "epoch": 1.03, "learning_rate": 3.6404230317273793e-07, "logits/generated": -2.434544563293457, "logits/real": -1.9652414321899414, "logps/generated": -332.652587890625, "logps/real": -301.4515380859375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -20.612462997436523, "rewards/margins": 15.830297470092773, "rewards/real": -4.782168388366699, "step": 1630 }, { "epoch": 1.04, "learning_rate": 3.6286721504112803e-07, "logits/generated": -2.3795437812805176, "logits/real": -1.8615341186523438, "logps/generated": -334.30499267578125, "logps/real": -264.4176330566406, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/generated": -22.38288116455078, "rewards/margins": 17.554964065551758, "rewards/real": -4.827913761138916, "step": 1640 }, { "epoch": 1.05, "learning_rate": 3.6169212690951824e-07, "logits/generated": -2.3490805625915527, "logits/real": -1.8895342350006104, "logps/generated": -323.92572021484375, "logps/real": -287.7126159667969, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -21.20674705505371, "rewards/margins": 16.43838882446289, "rewards/real": -4.76835823059082, "step": 1650 }, { "epoch": 1.05, "learning_rate": 3.6051703877790834e-07, "logits/generated": -2.3346450328826904, "logits/real": -1.9004275798797607, "logps/generated": -321.3191833496094, "logps/real": -248.30148315429688, "loss": 0.0075, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.776212692260742, "rewards/margins": 15.979314804077148, "rewards/real": -5.796901702880859, "step": 1660 }, { "epoch": 1.06, "learning_rate": 3.5934195064629844e-07, "logits/generated": -2.3258790969848633, "logits/real": -1.9253677129745483, "logps/generated": -332.10076904296875, "logps/real": -287.5823669433594, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -22.906774520874023, "rewards/margins": 17.70983123779297, "rewards/real": -5.196944236755371, "step": 1670 }, { "epoch": 1.07, "learning_rate": 3.581668625146886e-07, "logits/generated": -2.4416744709014893, "logits/real": -1.9654200077056885, "logps/generated": -338.466552734375, "logps/real": -289.2925720214844, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -23.339372634887695, "rewards/margins": 18.226560592651367, "rewards/real": -5.1128129959106445, "step": 1680 }, { "epoch": 1.07, "learning_rate": 3.569917743830787e-07, "logits/generated": -2.3743972778320312, "logits/real": -2.0106990337371826, "logps/generated": -321.8802795410156, "logps/real": -289.3930969238281, "loss": 0.01, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.401901245117188, "rewards/margins": 16.5495662689209, "rewards/real": -4.852335453033447, "step": 1690 }, { "epoch": 1.08, "learning_rate": 3.558166862514688e-07, "logits/generated": -2.3224287033081055, "logits/real": -1.9159963130950928, "logps/generated": -354.5892333984375, "logps/real": -291.2265625, "loss": 0.0149, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.254032135009766, "rewards/margins": 19.105968475341797, "rewards/real": -5.148062229156494, "step": 1700 }, { "epoch": 1.09, "learning_rate": 3.54641598119859e-07, "logits/generated": -2.3815436363220215, "logits/real": -1.973968744277954, "logps/generated": -334.0134582519531, "logps/real": -333.6390686035156, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/generated": -22.35097312927246, "rewards/margins": 16.910682678222656, "rewards/real": -5.440291404724121, "step": 1710 }, { "epoch": 1.09, "learning_rate": 3.534665099882491e-07, "logits/generated": -2.2737233638763428, "logits/real": -1.969150185585022, "logps/generated": -332.0345764160156, "logps/real": -276.24163818359375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/generated": -22.814863204956055, "rewards/margins": 17.44947052001953, "rewards/real": -5.36539363861084, "step": 1720 }, { "epoch": 1.1, "learning_rate": 3.5229142185663926e-07, "logits/generated": -2.4095771312713623, "logits/real": -2.06756329536438, "logps/generated": -310.903076171875, "logps/real": -290.26885986328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -21.025936126708984, "rewards/margins": 16.35614585876465, "rewards/real": -4.669791221618652, "step": 1730 }, { "epoch": 1.1, "learning_rate": 3.5111633372502936e-07, "logits/generated": -2.394035577774048, "logits/real": -1.9228935241699219, "logps/generated": -344.68133544921875, "logps/real": -322.188232421875, "loss": 0.0064, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.82350730895996, "rewards/margins": 18.79680633544922, "rewards/real": -4.026702880859375, "step": 1740 }, { "epoch": 1.11, "learning_rate": 3.4994124559341946e-07, "logits/generated": -2.3307666778564453, "logits/real": -2.019087314605713, "logps/generated": -348.3106689453125, "logps/real": -280.13470458984375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/generated": -24.350341796875, "rewards/margins": 18.29078483581543, "rewards/real": -6.059556007385254, "step": 1750 }, { "epoch": 1.12, "learning_rate": 3.487661574618096e-07, "logits/generated": -2.2875423431396484, "logits/real": -1.8283237218856812, "logps/generated": -355.9295654296875, "logps/real": -286.44342041015625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -25.10162353515625, "rewards/margins": 19.49599838256836, "rewards/real": -5.605628490447998, "step": 1760 }, { "epoch": 1.12, "learning_rate": 3.4759106933019977e-07, "logits/generated": -2.326918840408325, "logits/real": -2.001236915588379, "logps/generated": -351.1276550292969, "logps/real": -286.3841247558594, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -24.509958267211914, "rewards/margins": 18.328205108642578, "rewards/real": -6.181752681732178, "step": 1770 }, { "epoch": 1.13, "learning_rate": 3.4641598119858987e-07, "logits/generated": -2.197887420654297, "logits/real": -1.8538007736206055, "logps/generated": -384.99749755859375, "logps/real": -274.5433044433594, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -28.36118507385254, "rewards/margins": 22.02522087097168, "rewards/real": -6.335963249206543, "step": 1780 }, { "epoch": 1.14, "learning_rate": 3.4524089306698003e-07, "logits/generated": -2.345053195953369, "logits/real": -2.0094006061553955, "logps/generated": -370.4590759277344, "logps/real": -316.2205810546875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -25.606197357177734, "rewards/margins": 18.697460174560547, "rewards/real": -6.908738613128662, "step": 1790 }, { "epoch": 1.14, "learning_rate": 3.4406580493537013e-07, "logits/generated": -2.285144329071045, "logits/real": -1.9462623596191406, "logps/generated": -373.7235412597656, "logps/real": -283.7071228027344, "loss": 0.0057, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.2155818939209, "rewards/margins": 19.813146591186523, "rewards/real": -7.4024338722229, "step": 1800 }, { "epoch": 1.15, "learning_rate": 3.4289071680376023e-07, "logits/generated": -2.254729747772217, "logits/real": -2.026987075805664, "logps/generated": -378.70941162109375, "logps/real": -323.30426025390625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -27.278667449951172, "rewards/margins": 20.472742080688477, "rewards/real": -6.805926322937012, "step": 1810 }, { "epoch": 1.15, "learning_rate": 3.417156286721504e-07, "logits/generated": -2.235236644744873, "logits/real": -2.0165162086486816, "logps/generated": -397.2226867675781, "logps/real": -347.4537658691406, "loss": 0.0074, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.368255615234375, "rewards/margins": 20.647584915161133, "rewards/real": -7.720673561096191, "step": 1820 }, { "epoch": 1.16, "learning_rate": 3.4054054054054054e-07, "logits/generated": -2.2384140491485596, "logits/real": -1.9251244068145752, "logps/generated": -350.0628356933594, "logps/real": -244.8567657470703, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/generated": -25.298246383666992, "rewards/margins": 20.636980056762695, "rewards/real": -4.661267280578613, "step": 1830 }, { "epoch": 1.17, "learning_rate": 3.3936545240893064e-07, "logits/generated": -2.224591016769409, "logits/real": -1.7972519397735596, "logps/generated": -367.85107421875, "logps/real": -289.4563903808594, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/generated": -25.207212448120117, "rewards/margins": 21.348243713378906, "rewards/real": -3.85896372795105, "step": 1840 }, { "epoch": 1.17, "learning_rate": 3.381903642773208e-07, "logits/generated": -2.298875570297241, "logits/real": -1.9161163568496704, "logps/generated": -343.57244873046875, "logps/real": -288.15765380859375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -23.460208892822266, "rewards/margins": 18.997722625732422, "rewards/real": -4.462485313415527, "step": 1850 }, { "epoch": 1.18, "learning_rate": 3.370152761457109e-07, "logits/generated": -2.2626256942749023, "logits/real": -2.0237679481506348, "logps/generated": -328.1484375, "logps/real": -256.3814697265625, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/generated": -22.351253509521484, "rewards/margins": 17.940073013305664, "rewards/real": -4.411181449890137, "step": 1860 }, { "epoch": 1.19, "learning_rate": 3.35840188014101e-07, "logits/generated": -2.2844908237457275, "logits/real": -2.081831216812134, "logps/generated": -356.357177734375, "logps/real": -267.3996887207031, "loss": 0.0076, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.618431091308594, "rewards/margins": 20.21932029724121, "rewards/real": -4.399110317230225, "step": 1870 }, { "epoch": 1.19, "learning_rate": 3.346650998824912e-07, "logits/generated": -2.4111740589141846, "logits/real": -2.1501283645629883, "logps/generated": -320.0719299316406, "logps/real": -321.5926513671875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -21.9744930267334, "rewards/margins": 18.086454391479492, "rewards/real": -3.888040065765381, "step": 1880 }, { "epoch": 1.2, "learning_rate": 3.334900117508813e-07, "logits/generated": -2.406731128692627, "logits/real": -2.109961748123169, "logps/generated": -331.38629150390625, "logps/real": -285.94610595703125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -21.635053634643555, "rewards/margins": 17.40793228149414, "rewards/real": -4.227120399475098, "step": 1890 }, { "epoch": 1.21, "learning_rate": 3.3231492361927146e-07, "logits/generated": -2.4639508724212646, "logits/real": -2.219212293624878, "logps/generated": -325.2287902832031, "logps/real": -297.9587097167969, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/generated": -20.667682647705078, "rewards/margins": 17.30364418029785, "rewards/real": -3.3640379905700684, "step": 1900 }, { "epoch": 1.21, "learning_rate": 3.3113983548766156e-07, "logits/generated": -2.548128604888916, "logits/real": -2.028677463531494, "logps/generated": -308.02490234375, "logps/real": -280.04034423828125, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/generated": -21.040822982788086, "rewards/margins": 17.85296058654785, "rewards/real": -3.1878647804260254, "step": 1910 }, { "epoch": 1.22, "learning_rate": 3.2996474735605166e-07, "logits/generated": -2.484825849533081, "logits/real": -2.0903892517089844, "logps/generated": -325.17352294921875, "logps/real": -269.5104675292969, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/generated": -21.7929744720459, "rewards/margins": 17.698253631591797, "rewards/real": -4.094720840454102, "step": 1920 }, { "epoch": 1.22, "learning_rate": 3.287896592244418e-07, "logits/generated": -2.398820400238037, "logits/real": -2.1054134368896484, "logps/generated": -337.5372009277344, "logps/real": -260.09661865234375, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/generated": -23.03034019470215, "rewards/margins": 18.337459564208984, "rewards/real": -4.6928815841674805, "step": 1930 }, { "epoch": 1.23, "learning_rate": 3.2761457109283197e-07, "logits/generated": -2.3103232383728027, "logits/real": -2.0682365894317627, "logps/generated": -361.6377868652344, "logps/real": -288.559814453125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -25.030508041381836, "rewards/margins": 18.72498321533203, "rewards/real": -6.305525779724121, "step": 1940 }, { "epoch": 1.24, "learning_rate": 3.2643948296122207e-07, "logits/generated": -2.2990386486053467, "logits/real": -1.9596431255340576, "logps/generated": -370.11602783203125, "logps/real": -297.407958984375, "loss": 0.0084, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.362558364868164, "rewards/margins": 19.419109344482422, "rewards/real": -6.943448066711426, "step": 1950 }, { "epoch": 1.24, "learning_rate": 3.252643948296122e-07, "logits/generated": -2.5760021209716797, "logits/real": -2.365118980407715, "logps/generated": -314.21612548828125, "logps/real": -329.44818115234375, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/generated": -20.608551025390625, "rewards/margins": 14.474385261535645, "rewards/real": -6.134167671203613, "step": 1960 }, { "epoch": 1.25, "learning_rate": 3.2408930669800233e-07, "logits/generated": -2.529139995574951, "logits/real": -2.3194754123687744, "logps/generated": -322.12579345703125, "logps/real": -315.3408508300781, "loss": 0.0078, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -21.443510055541992, "rewards/margins": 16.046594619750977, "rewards/real": -5.396914958953857, "step": 1970 }, { "epoch": 1.26, "learning_rate": 3.2291421856639243e-07, "logits/generated": -2.455733299255371, "logits/real": -2.1376829147338867, "logps/generated": -340.5116271972656, "logps/real": -303.23089599609375, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/generated": -23.338191986083984, "rewards/margins": 17.037410736083984, "rewards/real": -6.300785064697266, "step": 1980 }, { "epoch": 1.26, "learning_rate": 3.217391304347826e-07, "logits/generated": -2.468418598175049, "logits/real": -2.248774528503418, "logps/generated": -329.9716491699219, "logps/real": -286.23785400390625, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/generated": -22.26080322265625, "rewards/margins": 16.494421005249023, "rewards/real": -5.766382694244385, "step": 1990 }, { "epoch": 1.27, "learning_rate": 3.2056404230317274e-07, "logits/generated": -2.521620988845825, "logits/real": -2.2400965690612793, "logps/generated": -331.94195556640625, "logps/real": -314.6190490722656, "loss": 0.0116, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.115577697753906, "rewards/margins": 16.175901412963867, "rewards/real": -5.9396748542785645, "step": 2000 }, { "epoch": 1.28, "learning_rate": 3.1938895417156284e-07, "logits/generated": -2.4937617778778076, "logits/real": -2.2404730319976807, "logps/generated": -320.19561767578125, "logps/real": -296.76220703125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -21.591772079467773, "rewards/margins": 15.425150871276855, "rewards/real": -6.166622161865234, "step": 2010 }, { "epoch": 1.28, "learning_rate": 3.18213866039953e-07, "logits/generated": -2.459237813949585, "logits/real": -2.1428635120391846, "logps/generated": -328.6792297363281, "logps/real": -273.9725036621094, "loss": 0.0092, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.400270462036133, "rewards/margins": 16.682750701904297, "rewards/real": -5.7175188064575195, "step": 2020 }, { "epoch": 1.29, "learning_rate": 3.170387779083431e-07, "logits/generated": -2.4984331130981445, "logits/real": -2.219433307647705, "logps/generated": -333.45123291015625, "logps/real": -290.7151794433594, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -22.205299377441406, "rewards/margins": 16.75266456604004, "rewards/real": -5.452635765075684, "step": 2030 }, { "epoch": 1.29, "learning_rate": 3.1586368977673325e-07, "logits/generated": -2.4460034370422363, "logits/real": -2.1568732261657715, "logps/generated": -311.125244140625, "logps/real": -252.8068084716797, "loss": 0.0151, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.626285552978516, "rewards/margins": 16.041940689086914, "rewards/real": -5.584343910217285, "step": 2040 }, { "epoch": 1.3, "learning_rate": 3.146886016451234e-07, "logits/generated": -2.4558825492858887, "logits/real": -2.202268362045288, "logps/generated": -321.90008544921875, "logps/real": -299.6604309082031, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/generated": -20.283735275268555, "rewards/margins": 16.26697540283203, "rewards/real": -4.016759395599365, "step": 2050 }, { "epoch": 1.31, "learning_rate": 3.135135135135135e-07, "logits/generated": -2.520723581314087, "logits/real": -2.143216371536255, "logps/generated": -329.4117126464844, "logps/real": -260.57342529296875, "loss": 0.0183, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -22.833818435668945, "rewards/margins": 17.800403594970703, "rewards/real": -5.0334153175354, "step": 2060 }, { "epoch": 1.31, "learning_rate": 3.1233842538190366e-07, "logits/generated": -2.4640583992004395, "logits/real": -2.2413322925567627, "logps/generated": -304.47332763671875, "logps/real": -314.59661865234375, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/generated": -19.477767944335938, "rewards/margins": 13.840822219848633, "rewards/real": -5.636946201324463, "step": 2070 }, { "epoch": 1.32, "learning_rate": 3.1116333725029376e-07, "logits/generated": -2.416250228881836, "logits/real": -2.14082670211792, "logps/generated": -334.43267822265625, "logps/real": -253.3500213623047, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -23.195547103881836, "rewards/margins": 17.476577758789062, "rewards/real": -5.718966484069824, "step": 2080 }, { "epoch": 1.33, "learning_rate": 3.0998824911868386e-07, "logits/generated": -2.4012951850891113, "logits/real": -2.0450942516326904, "logps/generated": -344.83856201171875, "logps/real": -263.33087158203125, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/generated": -23.860132217407227, "rewards/margins": 18.692014694213867, "rewards/real": -5.168116092681885, "step": 2090 }, { "epoch": 1.33, "learning_rate": 3.08813160987074e-07, "logits/generated": -2.4515933990478516, "logits/real": -2.1537697315216064, "logps/generated": -335.5617370605469, "logps/real": -284.315673828125, "loss": 0.0059, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.91217613220215, "rewards/margins": 17.602642059326172, "rewards/real": -5.30953311920166, "step": 2100 }, { "epoch": 1.34, "learning_rate": 3.0763807285546417e-07, "logits/generated": -2.421034812927246, "logits/real": -2.1092395782470703, "logps/generated": -337.59063720703125, "logps/real": -293.2365417480469, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -21.965925216674805, "rewards/margins": 17.585596084594727, "rewards/real": -4.380329132080078, "step": 2110 }, { "epoch": 1.35, "learning_rate": 3.0646298472385427e-07, "logits/generated": -2.4205338954925537, "logits/real": -2.0897164344787598, "logps/generated": -326.11993408203125, "logps/real": -333.59722900390625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -21.24331283569336, "rewards/margins": 17.81305503845215, "rewards/real": -3.4302544593811035, "step": 2120 }, { "epoch": 1.35, "learning_rate": 3.052878965922444e-07, "logits/generated": -2.4457616806030273, "logits/real": -2.1007394790649414, "logps/generated": -354.89190673828125, "logps/real": -308.13482666015625, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/generated": -23.513587951660156, "rewards/margins": 19.220561981201172, "rewards/real": -4.293027400970459, "step": 2130 }, { "epoch": 1.36, "learning_rate": 3.041128084606345e-07, "logits/generated": -2.5040667057037354, "logits/real": -2.0916154384613037, "logps/generated": -297.73785400390625, "logps/real": -264.8382568359375, "loss": 0.0124, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.15200424194336, "rewards/margins": 15.560193061828613, "rewards/real": -4.591813087463379, "step": 2140 }, { "epoch": 1.36, "learning_rate": 3.0293772032902463e-07, "logits/generated": -2.433756113052368, "logits/real": -2.0441482067108154, "logps/generated": -331.0630187988281, "logps/real": -275.1962890625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/generated": -21.667695999145508, "rewards/margins": 18.11298370361328, "rewards/real": -3.5547127723693848, "step": 2150 }, { "epoch": 1.37, "learning_rate": 3.017626321974148e-07, "logits/generated": -2.4524292945861816, "logits/real": -2.1112122535705566, "logps/generated": -310.28179931640625, "logps/real": -256.42669677734375, "loss": 0.011, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.27758026123047, "rewards/margins": 16.748332977294922, "rewards/real": -4.529247283935547, "step": 2160 }, { "epoch": 1.38, "learning_rate": 3.0058754406580494e-07, "logits/generated": -2.4680449962615967, "logits/real": -2.1266109943389893, "logps/generated": -323.96221923828125, "logps/real": -285.35064697265625, "loss": 0.0071, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.724130630493164, "rewards/margins": 17.49283218383789, "rewards/real": -4.231298923492432, "step": 2170 }, { "epoch": 1.38, "learning_rate": 2.9941245593419504e-07, "logits/generated": -2.382336378097534, "logits/real": -1.9932057857513428, "logps/generated": -331.06640625, "logps/real": -238.3595428466797, "loss": 0.0085, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.42551040649414, "rewards/margins": 17.899635314941406, "rewards/real": -5.525876045227051, "step": 2180 }, { "epoch": 1.39, "learning_rate": 2.982373678025852e-07, "logits/generated": -2.5244622230529785, "logits/real": -2.1495394706726074, "logps/generated": -330.15972900390625, "logps/real": -266.6393127441406, "loss": 0.0218, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -22.55251693725586, "rewards/margins": 18.512014389038086, "rewards/real": -4.040503978729248, "step": 2190 }, { "epoch": 1.4, "learning_rate": 2.970622796709753e-07, "logits/generated": -2.536439895629883, "logits/real": -2.2411656379699707, "logps/generated": -324.23486328125, "logps/real": -313.6152038574219, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/generated": -21.34421157836914, "rewards/margins": 17.209888458251953, "rewards/real": -4.134321212768555, "step": 2200 }, { "epoch": 1.4, "learning_rate": 2.9588719153936545e-07, "logits/generated": -2.4607491493225098, "logits/real": -2.160658597946167, "logps/generated": -324.77288818359375, "logps/real": -290.04840087890625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/generated": -21.049968719482422, "rewards/margins": 16.71952247619629, "rewards/real": -4.330449104309082, "step": 2210 }, { "epoch": 1.41, "learning_rate": 2.9471210340775555e-07, "logits/generated": -2.4545581340789795, "logits/real": -2.1188526153564453, "logps/generated": -321.65863037109375, "logps/real": -264.7879333496094, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/generated": -21.988567352294922, "rewards/margins": 17.094676971435547, "rewards/real": -4.893891334533691, "step": 2220 }, { "epoch": 1.41, "learning_rate": 2.935370152761457e-07, "logits/generated": -2.501826763153076, "logits/real": -2.1853764057159424, "logps/generated": -314.2751770019531, "logps/real": -334.19024658203125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -20.757062911987305, "rewards/margins": 16.270082473754883, "rewards/real": -4.486981391906738, "step": 2230 }, { "epoch": 1.42, "learning_rate": 2.9236192714453586e-07, "logits/generated": -2.4264206886291504, "logits/real": -2.102842330932617, "logps/generated": -314.3070373535156, "logps/real": -282.86907958984375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -20.95997428894043, "rewards/margins": 16.236602783203125, "rewards/real": -4.723372459411621, "step": 2240 }, { "epoch": 1.43, "learning_rate": 2.9118683901292596e-07, "logits/generated": -2.4942514896392822, "logits/real": -2.1962990760803223, "logps/generated": -355.8966064453125, "logps/real": -321.4674377441406, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -23.816083908081055, "rewards/margins": 18.28668212890625, "rewards/real": -5.529400825500488, "step": 2250 }, { "epoch": 1.43, "learning_rate": 2.9001175088131606e-07, "logits/generated": -2.455756425857544, "logits/real": -2.1011083126068115, "logps/generated": -342.75665283203125, "logps/real": -316.2607727050781, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/generated": -22.9534969329834, "rewards/margins": 18.188405990600586, "rewards/real": -4.7650885581970215, "step": 2260 }, { "epoch": 1.44, "learning_rate": 2.888366627497062e-07, "logits/generated": -2.4029483795166016, "logits/real": -1.9473835229873657, "logps/generated": -339.0224304199219, "logps/real": -281.7505798339844, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/generated": -23.03743553161621, "rewards/margins": 18.093334197998047, "rewards/real": -4.944101810455322, "step": 2270 }, { "epoch": 1.45, "learning_rate": 2.8766157461809637e-07, "logits/generated": -2.419862747192383, "logits/real": -1.9635975360870361, "logps/generated": -363.16534423828125, "logps/real": -276.90020751953125, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/generated": -25.56096839904785, "rewards/margins": 21.17633628845215, "rewards/real": -4.3846282958984375, "step": 2280 }, { "epoch": 1.45, "learning_rate": 2.8648648648648647e-07, "logits/generated": -2.450591564178467, "logits/real": -2.1148898601531982, "logps/generated": -319.2438659667969, "logps/real": -307.49114990234375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -21.591182708740234, "rewards/margins": 17.235889434814453, "rewards/real": -4.355295658111572, "step": 2290 }, { "epoch": 1.46, "learning_rate": 2.853113983548766e-07, "logits/generated": -2.463535785675049, "logits/real": -2.1628036499023438, "logps/generated": -348.6790771484375, "logps/real": -287.131103515625, "loss": 0.0112, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.492891311645508, "rewards/margins": 18.62226104736328, "rewards/real": -4.870633125305176, "step": 2300 }, { "epoch": 1.47, "learning_rate": 2.841363102232667e-07, "logits/generated": -2.4891059398651123, "logits/real": -2.136472463607788, "logps/generated": -311.2068786621094, "logps/real": -282.12969970703125, "loss": 0.0072, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.66542625427246, "rewards/margins": 17.774410247802734, "rewards/real": -2.8910176753997803, "step": 2310 }, { "epoch": 1.47, "learning_rate": 2.829612220916568e-07, "logits/generated": -2.3496716022491455, "logits/real": -1.9166462421417236, "logps/generated": -323.8047790527344, "logps/real": -230.07870483398438, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/generated": -22.36273765563965, "rewards/margins": 18.25948715209961, "rewards/real": -4.103250503540039, "step": 2320 }, { "epoch": 1.48, "learning_rate": 2.81786133960047e-07, "logits/generated": -2.4094340801239014, "logits/real": -2.199237823486328, "logps/generated": -298.130615234375, "logps/real": -329.5294189453125, "loss": 0.0084, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.52131462097168, "rewards/margins": 15.532516479492188, "rewards/real": -2.9887986183166504, "step": 2330 }, { "epoch": 1.48, "learning_rate": 2.8061104582843713e-07, "logits/generated": -2.443497896194458, "logits/real": -2.0504519939422607, "logps/generated": -318.1686096191406, "logps/real": -290.20123291015625, "loss": 0.0195, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -20.709726333618164, "rewards/margins": 16.9930362701416, "rewards/real": -3.7166907787323, "step": 2340 }, { "epoch": 1.49, "learning_rate": 2.794359576968273e-07, "logits/generated": -2.394792318344116, "logits/real": -1.9892990589141846, "logps/generated": -327.5433349609375, "logps/real": -269.41571044921875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -22.122539520263672, "rewards/margins": 18.496726989746094, "rewards/real": -3.6258106231689453, "step": 2350 }, { "epoch": 1.5, "learning_rate": 2.782608695652174e-07, "logits/generated": -2.366842746734619, "logits/real": -2.0693068504333496, "logps/generated": -334.1366882324219, "logps/real": -284.13916015625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -21.300800323486328, "rewards/margins": 17.710512161254883, "rewards/real": -3.59028697013855, "step": 2360 }, { "epoch": 1.5, "learning_rate": 2.770857814336075e-07, "logits/generated": -2.446584701538086, "logits/real": -2.0087201595306396, "logps/generated": -345.32623291015625, "logps/real": -279.03729248046875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -23.770870208740234, "rewards/margins": 19.3250732421875, "rewards/real": -4.445800304412842, "step": 2370 }, { "epoch": 1.51, "learning_rate": 2.7591069330199765e-07, "logits/generated": -2.334024667739868, "logits/real": -2.0111477375030518, "logps/generated": -323.5222473144531, "logps/real": -255.93240356445312, "loss": 0.0047, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.07223129272461, "rewards/margins": 18.25441551208496, "rewards/real": -4.817814826965332, "step": 2380 }, { "epoch": 1.52, "learning_rate": 2.7473560517038775e-07, "logits/generated": -2.2881593704223633, "logits/real": -1.9688920974731445, "logps/generated": -337.32891845703125, "logps/real": -274.38018798828125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -23.056621551513672, "rewards/margins": 18.873544692993164, "rewards/real": -4.183079719543457, "step": 2390 }, { "epoch": 1.52, "learning_rate": 2.735605170387779e-07, "logits/generated": -2.383574962615967, "logits/real": -2.0922164916992188, "logps/generated": -316.5880432128906, "logps/real": -335.4088439941406, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/generated": -20.414888381958008, "rewards/margins": 17.6447811126709, "rewards/real": -2.7701072692871094, "step": 2400 }, { "epoch": 1.53, "learning_rate": 2.7238542890716806e-07, "logits/generated": -2.3858695030212402, "logits/real": -2.084942102432251, "logps/generated": -297.20037841796875, "logps/real": -269.3205261230469, "loss": 0.012, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.70669937133789, "rewards/margins": 16.86494255065918, "rewards/real": -2.8417534828186035, "step": 2410 }, { "epoch": 1.54, "learning_rate": 2.7121034077555816e-07, "logits/generated": -2.400209903717041, "logits/real": -2.1528279781341553, "logps/generated": -299.55523681640625, "logps/real": -293.73187255859375, "loss": 0.015, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -19.58156394958496, "rewards/margins": 16.313032150268555, "rewards/real": -3.268529176712036, "step": 2420 }, { "epoch": 1.54, "learning_rate": 2.7003525264394826e-07, "logits/generated": -2.476315975189209, "logits/real": -2.0876708030700684, "logps/generated": -320.7831726074219, "logps/real": -248.1786651611328, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/generated": -21.875595092773438, "rewards/margins": 17.460813522338867, "rewards/real": -4.4147844314575195, "step": 2430 }, { "epoch": 1.55, "learning_rate": 2.688601645123384e-07, "logits/generated": -2.327359676361084, "logits/real": -2.1029770374298096, "logps/generated": -296.45855712890625, "logps/real": -255.8355255126953, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/generated": -19.623727798461914, "rewards/margins": 16.048782348632812, "rewards/real": -3.574944019317627, "step": 2440 }, { "epoch": 1.55, "learning_rate": 2.6768507638072857e-07, "logits/generated": -2.342031240463257, "logits/real": -2.1226744651794434, "logps/generated": -310.0341796875, "logps/real": -291.40167236328125, "loss": 0.0111, "rewards/accuracies": 0.987500011920929, "rewards/generated": -20.308788299560547, "rewards/margins": 16.74529457092285, "rewards/real": -3.5634942054748535, "step": 2450 }, { "epoch": 1.56, "learning_rate": 2.6650998824911867e-07, "logits/generated": -2.3393919467926025, "logits/real": -2.1731131076812744, "logps/generated": -310.68505859375, "logps/real": -282.8341064453125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/generated": -19.373615264892578, "rewards/margins": 16.252016067504883, "rewards/real": -3.121600866317749, "step": 2460 }, { "epoch": 1.57, "learning_rate": 2.653349001175088e-07, "logits/generated": -2.316004753112793, "logits/real": -2.080775499343872, "logps/generated": -315.99456787109375, "logps/real": -242.0931396484375, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/generated": -20.821147918701172, "rewards/margins": 17.72539710998535, "rewards/real": -3.0957531929016113, "step": 2470 }, { "epoch": 1.57, "learning_rate": 2.641598119858989e-07, "logits/generated": -2.381744146347046, "logits/real": -2.0672965049743652, "logps/generated": -303.83160400390625, "logps/real": -283.36956787109375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/generated": -19.453947067260742, "rewards/margins": 16.01351547241211, "rewards/real": -3.440433979034424, "step": 2480 }, { "epoch": 1.58, "learning_rate": 2.62984723854289e-07, "logits/generated": -2.3063855171203613, "logits/real": -2.1104612350463867, "logps/generated": -336.24542236328125, "logps/real": -223.45474243164062, "loss": 0.0132, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -22.51688003540039, "rewards/margins": 18.345937728881836, "rewards/real": -4.170942783355713, "step": 2490 }, { "epoch": 1.59, "learning_rate": 2.618096357226792e-07, "logits/generated": -2.3197531700134277, "logits/real": -2.018913984298706, "logps/generated": -336.23980712890625, "logps/real": -262.60992431640625, "loss": 0.0069, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.693714141845703, "rewards/margins": 19.307472229003906, "rewards/real": -4.386241436004639, "step": 2500 }, { "epoch": 1.59, "learning_rate": 2.6063454759106933e-07, "logits/generated": -2.2846219539642334, "logits/real": -2.0191433429718018, "logps/generated": -331.71783447265625, "logps/real": -268.02398681640625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/generated": -23.27705955505371, "rewards/margins": 19.076175689697266, "rewards/real": -4.2008867263793945, "step": 2510 }, { "epoch": 1.6, "learning_rate": 2.594594594594595e-07, "logits/generated": -2.3178963661193848, "logits/real": -2.035947561264038, "logps/generated": -323.0597229003906, "logps/real": -245.92855834960938, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -22.565059661865234, "rewards/margins": 18.452144622802734, "rewards/real": -4.112914085388184, "step": 2520 }, { "epoch": 1.61, "learning_rate": 2.582843713278496e-07, "logits/generated": -2.245072364807129, "logits/real": -1.9775609970092773, "logps/generated": -341.42755126953125, "logps/real": -241.91702270507812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -23.08028221130371, "rewards/margins": 19.214641571044922, "rewards/real": -3.865643262863159, "step": 2530 }, { "epoch": 1.61, "learning_rate": 2.571092831962397e-07, "logits/generated": -2.287692070007324, "logits/real": -1.9296283721923828, "logps/generated": -337.61688232421875, "logps/real": -272.8384704589844, "loss": 0.0114, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.93185043334961, "rewards/margins": 19.479442596435547, "rewards/real": -4.452404975891113, "step": 2540 }, { "epoch": 1.62, "learning_rate": 2.5593419506462984e-07, "logits/generated": -2.231574535369873, "logits/real": -1.9272006750106812, "logps/generated": -347.8257141113281, "logps/real": -286.9981994628906, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/generated": -23.885425567626953, "rewards/margins": 19.772022247314453, "rewards/real": -4.113402843475342, "step": 2550 }, { "epoch": 1.62, "learning_rate": 2.5475910693301995e-07, "logits/generated": -2.2364954948425293, "logits/real": -1.9917691946029663, "logps/generated": -348.7269592285156, "logps/real": -309.1827392578125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -22.005229949951172, "rewards/margins": 17.796672821044922, "rewards/real": -4.208555698394775, "step": 2560 }, { "epoch": 1.63, "learning_rate": 2.535840188014101e-07, "logits/generated": -2.2995412349700928, "logits/real": -2.10380482673645, "logps/generated": -352.90167236328125, "logps/real": -314.281982421875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -23.920124053955078, "rewards/margins": 18.855754852294922, "rewards/real": -5.064369201660156, "step": 2570 }, { "epoch": 1.64, "learning_rate": 2.5240893066980025e-07, "logits/generated": -2.256836414337158, "logits/real": -1.9991710186004639, "logps/generated": -344.7554016113281, "logps/real": -267.4646301269531, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/generated": -23.83219337463379, "rewards/margins": 19.01470375061035, "rewards/real": -4.817486763000488, "step": 2580 }, { "epoch": 1.64, "learning_rate": 2.5123384253819036e-07, "logits/generated": -2.294147491455078, "logits/real": -2.00473952293396, "logps/generated": -336.1775817871094, "logps/real": -269.46502685546875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/generated": -23.781789779663086, "rewards/margins": 19.8565731048584, "rewards/real": -3.9252192974090576, "step": 2590 }, { "epoch": 1.65, "learning_rate": 2.5005875440658046e-07, "logits/generated": -2.3603179454803467, "logits/real": -1.9096410274505615, "logps/generated": -358.67877197265625, "logps/real": -259.40484619140625, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/generated": -24.424976348876953, "rewards/margins": 21.086185455322266, "rewards/real": -3.338792324066162, "step": 2600 }, { "epoch": 1.66, "learning_rate": 2.488836662749706e-07, "logits/generated": -2.351179599761963, "logits/real": -2.0530447959899902, "logps/generated": -332.7562561035156, "logps/real": -271.2536926269531, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/generated": -22.84016227722168, "rewards/margins": 19.333303451538086, "rewards/real": -3.5068557262420654, "step": 2610 }, { "epoch": 1.66, "learning_rate": 2.4770857814336077e-07, "logits/generated": -2.1961145401000977, "logits/real": -1.9656083583831787, "logps/generated": -326.59356689453125, "logps/real": -220.73312377929688, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/generated": -22.737462997436523, "rewards/margins": 19.191755294799805, "rewards/real": -3.5457072257995605, "step": 2620 }, { "epoch": 1.67, "learning_rate": 2.4653349001175087e-07, "logits/generated": -2.2957189083099365, "logits/real": -2.001795768737793, "logps/generated": -318.9930725097656, "logps/real": -281.51959228515625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -21.853168487548828, "rewards/margins": 18.380849838256836, "rewards/real": -3.4723198413848877, "step": 2630 }, { "epoch": 1.68, "learning_rate": 2.45358401880141e-07, "logits/generated": -2.263975143432617, "logits/real": -1.9058456420898438, "logps/generated": -349.5631103515625, "logps/real": -259.2726745605469, "loss": 0.0103, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.311424255371094, "rewards/margins": 20.01760482788086, "rewards/real": -4.29381799697876, "step": 2640 }, { "epoch": 1.68, "learning_rate": 2.441833137485311e-07, "logits/generated": -2.22051739692688, "logits/real": -2.042902708053589, "logps/generated": -346.29327392578125, "logps/real": -249.6488800048828, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -23.093563079833984, "rewards/margins": 19.136564254760742, "rewards/real": -3.956998825073242, "step": 2650 }, { "epoch": 1.69, "learning_rate": 2.430082256169212e-07, "logits/generated": -2.354282855987549, "logits/real": -1.9278910160064697, "logps/generated": -352.00714111328125, "logps/real": -265.89508056640625, "loss": 0.0077, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -24.244333267211914, "rewards/margins": 20.059101104736328, "rewards/real": -4.185232162475586, "step": 2660 }, { "epoch": 1.69, "learning_rate": 2.418331374853114e-07, "logits/generated": -2.28190541267395, "logits/real": -2.0574002265930176, "logps/generated": -319.3107604980469, "logps/real": -277.8495788574219, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -22.27080726623535, "rewards/margins": 18.873634338378906, "rewards/real": -3.397172212600708, "step": 2670 }, { "epoch": 1.7, "learning_rate": 2.4065804935370153e-07, "logits/generated": -2.241192102432251, "logits/real": -1.9571336507797241, "logps/generated": -327.8152770996094, "logps/real": -257.9501953125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/generated": -22.218231201171875, "rewards/margins": 17.811767578125, "rewards/real": -4.40646505355835, "step": 2680 }, { "epoch": 1.71, "learning_rate": 2.3948296122209163e-07, "logits/generated": -2.2373881340026855, "logits/real": -2.0191516876220703, "logps/generated": -345.68438720703125, "logps/real": -284.0022888183594, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/generated": -23.410137176513672, "rewards/margins": 19.024240493774414, "rewards/real": -4.385898113250732, "step": 2690 }, { "epoch": 1.71, "learning_rate": 2.383078730904818e-07, "logits/generated": -2.1850571632385254, "logits/real": -1.893228530883789, "logps/generated": -329.25799560546875, "logps/real": -266.69317626953125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -22.37875747680664, "rewards/margins": 18.571931838989258, "rewards/real": -3.806826114654541, "step": 2700 }, { "epoch": 1.72, "learning_rate": 2.371327849588719e-07, "logits/generated": -2.1856093406677246, "logits/real": -1.9036449193954468, "logps/generated": -352.7874755859375, "logps/real": -255.8990936279297, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/generated": -25.45499610900879, "rewards/margins": 20.177873611450195, "rewards/real": -5.277122497558594, "step": 2710 }, { "epoch": 1.73, "learning_rate": 2.3595769682726202e-07, "logits/generated": -2.1994104385375977, "logits/real": -2.0348970890045166, "logps/generated": -356.5615234375, "logps/real": -291.3761291503906, "loss": 0.0109, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.836057662963867, "rewards/margins": 19.16860580444336, "rewards/real": -5.667454719543457, "step": 2720 }, { "epoch": 1.73, "learning_rate": 2.3478260869565217e-07, "logits/generated": -2.1490817070007324, "logits/real": -1.9687789678573608, "logps/generated": -370.4169006347656, "logps/real": -257.3158264160156, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -26.3896484375, "rewards/margins": 21.185771942138672, "rewards/real": -5.20387601852417, "step": 2730 }, { "epoch": 1.74, "learning_rate": 2.336075205640423e-07, "logits/generated": -2.1517624855041504, "logits/real": -1.960091233253479, "logps/generated": -379.77825927734375, "logps/real": -356.1867980957031, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/generated": -26.38129234313965, "rewards/margins": 19.99581527709961, "rewards/real": -6.385481357574463, "step": 2740 }, { "epoch": 1.74, "learning_rate": 2.3243243243243243e-07, "logits/generated": -2.0364327430725098, "logits/real": -1.8880960941314697, "logps/generated": -373.9236755371094, "logps/real": -324.3224182128906, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/generated": -26.41314697265625, "rewards/margins": 20.459888458251953, "rewards/real": -5.953259468078613, "step": 2750 }, { "epoch": 1.75, "learning_rate": 2.3125734430082255e-07, "logits/generated": -2.0131492614746094, "logits/real": -1.8854057788848877, "logps/generated": -390.64788818359375, "logps/real": -305.11285400390625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -28.03790283203125, "rewards/margins": 22.16775131225586, "rewards/real": -5.870150566101074, "step": 2760 }, { "epoch": 1.76, "learning_rate": 2.3008225616921268e-07, "logits/generated": -1.956024408340454, "logits/real": -1.9189265966415405, "logps/generated": -396.72283935546875, "logps/real": -286.6882019042969, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/generated": -29.13210678100586, "rewards/margins": 21.822053909301758, "rewards/real": -7.310055732727051, "step": 2770 }, { "epoch": 1.76, "learning_rate": 2.289071680376028e-07, "logits/generated": -2.030214309692383, "logits/real": -1.8881438970565796, "logps/generated": -383.7499084472656, "logps/real": -250.5139617919922, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/generated": -27.844661712646484, "rewards/margins": 21.812536239624023, "rewards/real": -6.032127380371094, "step": 2780 }, { "epoch": 1.77, "learning_rate": 2.2773207990599294e-07, "logits/generated": -2.08001708984375, "logits/real": -1.891178846359253, "logps/generated": -351.56988525390625, "logps/real": -296.24371337890625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/generated": -24.752206802368164, "rewards/margins": 18.19687271118164, "rewards/real": -6.555333614349365, "step": 2790 }, { "epoch": 1.78, "learning_rate": 2.2655699177438307e-07, "logits/generated": -2.0704877376556396, "logits/real": -1.90077805519104, "logps/generated": -355.61767578125, "logps/real": -274.2396545410156, "loss": 0.0074, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.99367904663086, "rewards/margins": 19.7630615234375, "rewards/real": -5.230617523193359, "step": 2800 }, { "epoch": 1.78, "learning_rate": 2.2538190364277322e-07, "logits/generated": -2.140763282775879, "logits/real": -1.9581050872802734, "logps/generated": -362.32049560546875, "logps/real": -258.9219055175781, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/generated": -25.47673988342285, "rewards/margins": 20.179256439208984, "rewards/real": -5.2974853515625, "step": 2810 }, { "epoch": 1.79, "learning_rate": 2.2420681551116332e-07, "logits/generated": -2.1344077587127686, "logits/real": -1.9745477437973022, "logps/generated": -365.5986022949219, "logps/real": -306.57513427734375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/generated": -24.825016021728516, "rewards/margins": 19.670211791992188, "rewards/real": -5.154806613922119, "step": 2820 }, { "epoch": 1.8, "learning_rate": 2.2303172737955345e-07, "logits/generated": -2.124387264251709, "logits/real": -1.9389822483062744, "logps/generated": -334.8365173339844, "logps/real": -278.90850830078125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -22.859643936157227, "rewards/margins": 17.987585067749023, "rewards/real": -4.8720574378967285, "step": 2830 }, { "epoch": 1.8, "learning_rate": 2.218566392479436e-07, "logits/generated": -2.079254627227783, "logits/real": -1.930846929550171, "logps/generated": -367.32586669921875, "logps/real": -301.3092346191406, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -26.252771377563477, "rewards/margins": 21.149532318115234, "rewards/real": -5.103243350982666, "step": 2840 }, { "epoch": 1.81, "learning_rate": 2.206815511163337e-07, "logits/generated": -2.1294503211975098, "logits/real": -1.9343706369400024, "logps/generated": -354.76513671875, "logps/real": -296.0406799316406, "loss": 0.0058, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.67839241027832, "rewards/margins": 18.78042221069336, "rewards/real": -5.897971153259277, "step": 2850 }, { "epoch": 1.81, "learning_rate": 2.1950646298472383e-07, "logits/generated": -2.1347403526306152, "logits/real": -1.9254436492919922, "logps/generated": -368.8210754394531, "logps/real": -288.74053955078125, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/generated": -25.642257690429688, "rewards/margins": 21.017126083374023, "rewards/real": -4.625129222869873, "step": 2860 }, { "epoch": 1.82, "learning_rate": 2.18331374853114e-07, "logits/generated": -2.1848769187927246, "logits/real": -2.0680902004241943, "logps/generated": -366.6266784667969, "logps/real": -328.2076416015625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -24.437532424926758, "rewards/margins": 19.320016860961914, "rewards/real": -5.117516040802002, "step": 2870 }, { "epoch": 1.83, "learning_rate": 2.171562867215041e-07, "logits/generated": -2.0878078937530518, "logits/real": -1.9517886638641357, "logps/generated": -348.5774230957031, "logps/real": -299.02374267578125, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/generated": -24.685237884521484, "rewards/margins": 19.952234268188477, "rewards/real": -4.733007431030273, "step": 2880 }, { "epoch": 1.83, "learning_rate": 2.1598119858989422e-07, "logits/generated": -2.1011247634887695, "logits/real": -1.9725592136383057, "logps/generated": -357.71051025390625, "logps/real": -290.8995056152344, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/generated": -25.373262405395508, "rewards/margins": 20.132232666015625, "rewards/real": -5.241027355194092, "step": 2890 }, { "epoch": 1.84, "learning_rate": 2.1480611045828437e-07, "logits/generated": -2.1049911975860596, "logits/real": -1.9649174213409424, "logps/generated": -385.41241455078125, "logps/real": -274.30303955078125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/generated": -27.450220108032227, "rewards/margins": 21.493106842041016, "rewards/real": -5.957114219665527, "step": 2900 }, { "epoch": 1.85, "learning_rate": 2.136310223266745e-07, "logits/generated": -2.1464877128601074, "logits/real": -1.9520257711410522, "logps/generated": -358.3988342285156, "logps/real": -258.34478759765625, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/generated": -25.753520965576172, "rewards/margins": 20.76874351501465, "rewards/real": -4.984780311584473, "step": 2910 }, { "epoch": 1.85, "learning_rate": 2.124559341950646e-07, "logits/generated": -2.197218418121338, "logits/real": -2.0531296730041504, "logps/generated": -342.52374267578125, "logps/real": -299.6061096191406, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/generated": -22.946630477905273, "rewards/margins": 19.103984832763672, "rewards/real": -3.842647075653076, "step": 2920 }, { "epoch": 1.86, "learning_rate": 2.1128084606345475e-07, "logits/generated": -2.251016139984131, "logits/real": -2.0284006595611572, "logps/generated": -350.19488525390625, "logps/real": -281.47698974609375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -24.785280227661133, "rewards/margins": 19.8306827545166, "rewards/real": -4.9545979499816895, "step": 2930 }, { "epoch": 1.87, "learning_rate": 2.1010575793184488e-07, "logits/generated": -2.2240631580352783, "logits/real": -2.088017702102661, "logps/generated": -347.8515319824219, "logps/real": -317.2982177734375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -23.338415145874023, "rewards/margins": 19.154300689697266, "rewards/real": -4.184115409851074, "step": 2940 }, { "epoch": 1.87, "learning_rate": 2.08930669800235e-07, "logits/generated": -2.1802897453308105, "logits/real": -2.0314040184020996, "logps/generated": -345.7802429199219, "logps/real": -295.5107421875, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/generated": -23.624263763427734, "rewards/margins": 18.748220443725586, "rewards/real": -4.876040458679199, "step": 2950 }, { "epoch": 1.88, "learning_rate": 2.0775558166862514e-07, "logits/generated": -2.257678270339966, "logits/real": -2.0878357887268066, "logps/generated": -353.88946533203125, "logps/real": -299.9291687011719, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/generated": -24.746679306030273, "rewards/margins": 19.866544723510742, "rewards/real": -4.880134582519531, "step": 2960 }, { "epoch": 1.88, "learning_rate": 2.0658049353701526e-07, "logits/generated": -2.3007264137268066, "logits/real": -2.110032320022583, "logps/generated": -344.11639404296875, "logps/real": -284.4814453125, "loss": 0.0182, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.620412826538086, "rewards/margins": 18.702817916870117, "rewards/real": -3.917595386505127, "step": 2970 }, { "epoch": 1.89, "learning_rate": 2.0540540540540542e-07, "logits/generated": -2.27628755569458, "logits/real": -2.1205649375915527, "logps/generated": -329.26556396484375, "logps/real": -263.96136474609375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -22.701011657714844, "rewards/margins": 19.02513313293457, "rewards/real": -3.675877332687378, "step": 2980 }, { "epoch": 1.9, "learning_rate": 2.0423031727379552e-07, "logits/generated": -2.3053290843963623, "logits/real": -2.0642311573028564, "logps/generated": -334.71331787109375, "logps/real": -276.95147705078125, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/generated": -22.66802215576172, "rewards/margins": 18.36069107055664, "rewards/real": -4.30733585357666, "step": 2990 }, { "epoch": 1.9, "learning_rate": 2.0305522914218565e-07, "logits/generated": -2.26841402053833, "logits/real": -2.039511203765869, "logps/generated": -342.7603759765625, "logps/real": -269.6524963378906, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -23.502750396728516, "rewards/margins": 18.23208236694336, "rewards/real": -5.270669460296631, "step": 3000 }, { "epoch": 1.91, "learning_rate": 2.018801410105758e-07, "logits/generated": -2.2973265647888184, "logits/real": -2.1449685096740723, "logps/generated": -337.704345703125, "logps/real": -259.8895568847656, "loss": 0.009, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.042863845825195, "rewards/margins": 18.184226989746094, "rewards/real": -4.858633041381836, "step": 3010 }, { "epoch": 1.92, "learning_rate": 2.007050528789659e-07, "logits/generated": -2.286045551300049, "logits/real": -2.046161413192749, "logps/generated": -351.5479736328125, "logps/real": -288.12640380859375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -23.482677459716797, "rewards/margins": 19.257810592651367, "rewards/real": -4.2248687744140625, "step": 3020 }, { "epoch": 1.92, "learning_rate": 1.9952996474735603e-07, "logits/generated": -2.2416133880615234, "logits/real": -2.0939717292785645, "logps/generated": -358.086669921875, "logps/real": -261.29815673828125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -25.18197250366211, "rewards/margins": 20.0886287689209, "rewards/real": -5.0933451652526855, "step": 3030 }, { "epoch": 1.93, "learning_rate": 1.9835487661574619e-07, "logits/generated": -2.221487283706665, "logits/real": -2.0919318199157715, "logps/generated": -351.2422180175781, "logps/real": -277.42901611328125, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/generated": -24.407487869262695, "rewards/margins": 19.2531795501709, "rewards/real": -5.154306888580322, "step": 3040 }, { "epoch": 1.94, "learning_rate": 1.971797884841363e-07, "logits/generated": -2.1991019248962402, "logits/real": -2.0188393592834473, "logps/generated": -374.80938720703125, "logps/real": -256.5660705566406, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/generated": -26.856903076171875, "rewards/margins": 21.417938232421875, "rewards/real": -5.438962936401367, "step": 3050 }, { "epoch": 1.94, "learning_rate": 1.9600470035252641e-07, "logits/generated": -2.1711456775665283, "logits/real": -2.041792392730713, "logps/generated": -346.64105224609375, "logps/real": -298.97564697265625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -23.955524444580078, "rewards/margins": 19.128503799438477, "rewards/real": -4.827020645141602, "step": 3060 }, { "epoch": 1.95, "learning_rate": 1.9482961222091657e-07, "logits/generated": -2.20910906791687, "logits/real": -2.0180556774139404, "logps/generated": -336.7091369628906, "logps/real": -261.6615295410156, "loss": 0.0099, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -24.166288375854492, "rewards/margins": 19.48313331604004, "rewards/real": -4.683154106140137, "step": 3070 }, { "epoch": 1.95, "learning_rate": 1.936545240893067e-07, "logits/generated": -2.190821409225464, "logits/real": -1.9756942987442017, "logps/generated": -337.36322021484375, "logps/real": -244.9541778564453, "loss": 0.0045, "rewards/accuracies": 0.987500011920929, "rewards/generated": -24.287092208862305, "rewards/margins": 19.412704467773438, "rewards/real": -4.874386787414551, "step": 3080 }, { "epoch": 1.96, "learning_rate": 1.924794359576968e-07, "logits/generated": -2.2208573818206787, "logits/real": -1.976784110069275, "logps/generated": -346.1979675292969, "logps/real": -275.9041442871094, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -24.10281753540039, "rewards/margins": 19.66716194152832, "rewards/real": -4.435654640197754, "step": 3090 }, { "epoch": 1.97, "learning_rate": 1.9130434782608695e-07, "logits/generated": -2.166905403137207, "logits/real": -2.006438732147217, "logps/generated": -344.233154296875, "logps/real": -315.53936767578125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -23.461597442626953, "rewards/margins": 19.2020263671875, "rewards/real": -4.259572505950928, "step": 3100 }, { "epoch": 1.97, "learning_rate": 1.9012925969447708e-07, "logits/generated": -2.227118730545044, "logits/real": -2.0388541221618652, "logps/generated": -336.47052001953125, "logps/real": -268.1602783203125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/generated": -23.303186416625977, "rewards/margins": 18.488948822021484, "rewards/real": -4.814234733581543, "step": 3110 }, { "epoch": 1.98, "learning_rate": 1.8895417156286718e-07, "logits/generated": -2.1890883445739746, "logits/real": -2.04649019241333, "logps/generated": -353.79461669921875, "logps/real": -307.52423095703125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -24.001995086669922, "rewards/margins": 19.549617767333984, "rewards/real": -4.452378749847412, "step": 3120 }, { "epoch": 1.99, "learning_rate": 1.8777908343125734e-07, "logits/generated": -2.1731953620910645, "logits/real": -1.982879638671875, "logps/generated": -369.5123596191406, "logps/real": -300.3362121582031, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/generated": -26.723047256469727, "rewards/margins": 22.299365997314453, "rewards/real": -4.423679828643799, "step": 3130 }, { "epoch": 1.99, "learning_rate": 1.8660399529964746e-07, "logits/generated": -2.1560447216033936, "logits/real": -2.040362596511841, "logps/generated": -342.84783935546875, "logps/real": -278.237548828125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -24.324447631835938, "rewards/margins": 19.210416793823242, "rewards/real": -5.114028453826904, "step": 3140 }, { "epoch": 2.0, "learning_rate": 1.8542890716803762e-07, "logits/generated": -2.146101474761963, "logits/real": -2.0181069374084473, "logps/generated": -369.8932189941406, "logps/real": -274.1222839355469, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -26.28493309020996, "rewards/margins": 20.953115463256836, "rewards/real": -5.331815719604492, "step": 3150 }, { "epoch": 2.01, "learning_rate": 1.8425381903642772e-07, "logits/generated": -2.1590821743011475, "logits/real": -1.9754966497421265, "logps/generated": -386.6169738769531, "logps/real": -297.4996643066406, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -26.224761962890625, "rewards/margins": 21.103734970092773, "rewards/real": -5.121026039123535, "step": 3160 }, { "epoch": 2.01, "learning_rate": 1.8307873090481785e-07, "logits/generated": -2.1612915992736816, "logits/real": -1.959288239479065, "logps/generated": -363.1042785644531, "logps/real": -258.10174560546875, "loss": 0.0029, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.38741111755371, "rewards/margins": 20.513093948364258, "rewards/real": -4.874313831329346, "step": 3170 }, { "epoch": 2.02, "learning_rate": 1.81903642773208e-07, "logits/generated": -2.098233461380005, "logits/real": -1.9145100116729736, "logps/generated": -371.4063720703125, "logps/real": -243.34945678710938, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.423503875732422, "rewards/margins": 22.34528923034668, "rewards/real": -5.078217506408691, "step": 3180 }, { "epoch": 2.02, "learning_rate": 1.807285546415981e-07, "logits/generated": -2.1498446464538574, "logits/real": -1.9860435724258423, "logps/generated": -361.0787658691406, "logps/real": -271.49951171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -25.6298885345459, "rewards/margins": 20.82622718811035, "rewards/real": -4.803657531738281, "step": 3190 }, { "epoch": 2.03, "learning_rate": 1.7955346650998823e-07, "logits/generated": -2.120079278945923, "logits/real": -2.00551700592041, "logps/generated": -360.60699462890625, "logps/real": -287.0360107421875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -25.6106014251709, "rewards/margins": 20.524362564086914, "rewards/real": -5.086238861083984, "step": 3200 }, { "epoch": 2.04, "learning_rate": 1.7837837837837838e-07, "logits/generated": -2.1424782276153564, "logits/real": -1.9207490682601929, "logps/generated": -393.1436462402344, "logps/real": -282.522705078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -28.278972625732422, "rewards/margins": 22.59919548034668, "rewards/real": -5.679778099060059, "step": 3210 }, { "epoch": 2.04, "learning_rate": 1.772032902467685e-07, "logits/generated": -2.0800764560699463, "logits/real": -2.001923084259033, "logps/generated": -348.87725830078125, "logps/real": -298.7556457519531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -24.367755889892578, "rewards/margins": 18.799022674560547, "rewards/real": -5.568735599517822, "step": 3220 }, { "epoch": 2.05, "learning_rate": 1.7602820211515861e-07, "logits/generated": -2.1074421405792236, "logits/real": -2.005859375, "logps/generated": -356.49200439453125, "logps/real": -305.5742492675781, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/generated": -24.63992691040039, "rewards/margins": 19.582958221435547, "rewards/real": -5.056967735290527, "step": 3230 }, { "epoch": 2.06, "learning_rate": 1.7485311398354877e-07, "logits/generated": -2.0994040966033936, "logits/real": -2.007887601852417, "logps/generated": -365.8727111816406, "logps/real": -317.9673156738281, "loss": 0.0045, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.743017196655273, "rewards/margins": 20.144943237304688, "rewards/real": -5.598074913024902, "step": 3240 }, { "epoch": 2.06, "learning_rate": 1.736780258519389e-07, "logits/generated": -2.0684878826141357, "logits/real": -2.0286765098571777, "logps/generated": -356.78631591796875, "logps/real": -321.638916015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -25.5567626953125, "rewards/margins": 20.853425979614258, "rewards/real": -4.703336715698242, "step": 3250 }, { "epoch": 2.07, "learning_rate": 1.72502937720329e-07, "logits/generated": -2.081726312637329, "logits/real": -1.862388014793396, "logps/generated": -380.224853515625, "logps/real": -282.75091552734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -27.150707244873047, "rewards/margins": 22.10866928100586, "rewards/real": -5.042040824890137, "step": 3260 }, { "epoch": 2.07, "learning_rate": 1.7132784958871915e-07, "logits/generated": -2.0935699939727783, "logits/real": -1.931623101234436, "logps/generated": -376.69622802734375, "logps/real": -274.62872314453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.69793128967285, "rewards/margins": 22.377689361572266, "rewards/real": -5.3202433586120605, "step": 3270 }, { "epoch": 2.08, "learning_rate": 1.7015276145710928e-07, "logits/generated": -2.098092794418335, "logits/real": -1.9440534114837646, "logps/generated": -383.3734436035156, "logps/real": -311.0740661621094, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -27.666534423828125, "rewards/margins": 22.305049896240234, "rewards/real": -5.361481666564941, "step": 3280 }, { "epoch": 2.09, "learning_rate": 1.6897767332549938e-07, "logits/generated": -2.1007964611053467, "logits/real": -1.9998801946640015, "logps/generated": -395.33355712890625, "logps/real": -310.078857421875, "loss": 0.0046, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.96969985961914, "rewards/margins": 24.22079086303711, "rewards/real": -4.748910903930664, "step": 3290 }, { "epoch": 2.09, "learning_rate": 1.6780258519388953e-07, "logits/generated": -2.099114179611206, "logits/real": -1.94070303440094, "logps/generated": -377.9015197753906, "logps/real": -274.0469970703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -27.645259857177734, "rewards/margins": 22.4710750579834, "rewards/real": -5.174184322357178, "step": 3300 }, { "epoch": 2.1, "learning_rate": 1.6662749706227966e-07, "logits/generated": -2.0564610958099365, "logits/real": -1.9704822301864624, "logps/generated": -345.58868408203125, "logps/real": -283.3597717285156, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -24.705806732177734, "rewards/margins": 20.07907485961914, "rewards/real": -4.626730918884277, "step": 3310 }, { "epoch": 2.11, "learning_rate": 1.654524089306698e-07, "logits/generated": -2.08727765083313, "logits/real": -1.8853626251220703, "logps/generated": -369.41058349609375, "logps/real": -255.86831665039062, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -26.35872459411621, "rewards/margins": 21.617361068725586, "rewards/real": -4.741362571716309, "step": 3320 }, { "epoch": 2.11, "learning_rate": 1.6427732079905992e-07, "logits/generated": -1.9915502071380615, "logits/real": -1.8262687921524048, "logps/generated": -378.84918212890625, "logps/real": -285.84429931640625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -27.37435531616211, "rewards/margins": 21.355920791625977, "rewards/real": -6.018433570861816, "step": 3330 }, { "epoch": 2.12, "learning_rate": 1.6310223266745005e-07, "logits/generated": -2.0596718788146973, "logits/real": -1.914754867553711, "logps/generated": -378.89105224609375, "logps/real": -328.7146301269531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.448780059814453, "rewards/margins": 21.816377639770508, "rewards/real": -5.6324052810668945, "step": 3340 }, { "epoch": 2.13, "learning_rate": 1.619271445358402e-07, "logits/generated": -2.040947198867798, "logits/real": -1.8854957818984985, "logps/generated": -413.3182067871094, "logps/real": -317.62567138671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -30.338998794555664, "rewards/margins": 25.05344009399414, "rewards/real": -5.285560607910156, "step": 3350 }, { "epoch": 2.13, "learning_rate": 1.607520564042303e-07, "logits/generated": -2.012472152709961, "logits/real": -1.8490098714828491, "logps/generated": -397.6828918457031, "logps/real": -272.8224182128906, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -29.052814483642578, "rewards/margins": 23.327375411987305, "rewards/real": -5.72544002532959, "step": 3360 }, { "epoch": 2.14, "learning_rate": 1.5957696827262043e-07, "logits/generated": -2.029719829559326, "logits/real": -1.8433088064193726, "logps/generated": -398.36297607421875, "logps/real": -265.16351318359375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -29.465087890625, "rewards/margins": 23.546363830566406, "rewards/real": -5.918723106384277, "step": 3370 }, { "epoch": 2.14, "learning_rate": 1.5840188014101058e-07, "logits/generated": -1.9572408199310303, "logits/real": -1.790989875793457, "logps/generated": -419.62109375, "logps/real": -249.32080078125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -32.099082946777344, "rewards/margins": 25.258312225341797, "rewards/real": -6.840768337249756, "step": 3380 }, { "epoch": 2.15, "learning_rate": 1.572267920094007e-07, "logits/generated": -1.9638580083847046, "logits/real": -1.9275802373886108, "logps/generated": -400.74395751953125, "logps/real": -334.06036376953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -29.58945083618164, "rewards/margins": 23.080732345581055, "rewards/real": -6.508718967437744, "step": 3390 }, { "epoch": 2.16, "learning_rate": 1.560517038777908e-07, "logits/generated": -2.0270586013793945, "logits/real": -1.8505998849868774, "logps/generated": -407.79156494140625, "logps/real": -288.97918701171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -30.032482147216797, "rewards/margins": 25.01006507873535, "rewards/real": -5.0224175453186035, "step": 3400 }, { "epoch": 2.16, "learning_rate": 1.5487661574618097e-07, "logits/generated": -2.016819477081299, "logits/real": -1.882765769958496, "logps/generated": -400.1058349609375, "logps/real": -288.342529296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -29.87357521057129, "rewards/margins": 23.768878936767578, "rewards/real": -6.104691028594971, "step": 3410 }, { "epoch": 2.17, "learning_rate": 1.537015276145711e-07, "logits/generated": -1.9431253671646118, "logits/real": -1.842599868774414, "logps/generated": -386.32745361328125, "logps/real": -266.6620788574219, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -28.412429809570312, "rewards/margins": 22.158546447753906, "rewards/real": -6.253881454467773, "step": 3420 }, { "epoch": 2.18, "learning_rate": 1.525264394829612e-07, "logits/generated": -1.9972089529037476, "logits/real": -1.828897476196289, "logps/generated": -403.88665771484375, "logps/real": -264.700927734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -28.998998641967773, "rewards/margins": 23.43665885925293, "rewards/real": -5.5623393058776855, "step": 3430 }, { "epoch": 2.18, "learning_rate": 1.5135135135135135e-07, "logits/generated": -1.9987987279891968, "logits/real": -1.8746980428695679, "logps/generated": -408.3876037597656, "logps/real": -303.7450256347656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -29.954736709594727, "rewards/margins": 23.751495361328125, "rewards/real": -6.203239440917969, "step": 3440 }, { "epoch": 2.19, "learning_rate": 1.5017626321974148e-07, "logits/generated": -1.9847347736358643, "logits/real": -1.795657753944397, "logps/generated": -410.84539794921875, "logps/real": -282.5588684082031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -30.85659408569336, "rewards/margins": 24.667091369628906, "rewards/real": -6.189502239227295, "step": 3450 }, { "epoch": 2.2, "learning_rate": 1.4900117508813158e-07, "logits/generated": -1.9983108043670654, "logits/real": -1.780178427696228, "logps/generated": -420.0068359375, "logps/real": -287.4795227050781, "loss": 0.0029, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.99736976623535, "rewards/margins": 24.340909957885742, "rewards/real": -6.656461238861084, "step": 3460 }, { "epoch": 2.2, "learning_rate": 1.4782608695652173e-07, "logits/generated": -2.0104641914367676, "logits/real": -1.8685280084609985, "logps/generated": -394.21917724609375, "logps/real": -291.6011962890625, "loss": 0.0047, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.079885482788086, "rewards/margins": 22.35186767578125, "rewards/real": -6.728022575378418, "step": 3470 }, { "epoch": 2.21, "learning_rate": 1.4665099882491186e-07, "logits/generated": -1.9756362438201904, "logits/real": -1.8447771072387695, "logps/generated": -391.41680908203125, "logps/real": -349.69464111328125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -28.011831283569336, "rewards/margins": 22.25851058959961, "rewards/real": -5.753323554992676, "step": 3480 }, { "epoch": 2.21, "learning_rate": 1.45475910693302e-07, "logits/generated": -1.9767194986343384, "logits/real": -1.779309630393982, "logps/generated": -391.83380126953125, "logps/real": -278.58905029296875, "loss": 0.005, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.746475219726562, "rewards/margins": 22.14328384399414, "rewards/real": -6.603189945220947, "step": 3490 }, { "epoch": 2.22, "learning_rate": 1.4430082256169212e-07, "logits/generated": -1.9377864599227905, "logits/real": -1.8163446187973022, "logps/generated": -418.55731201171875, "logps/real": -318.5293273925781, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -30.8940486907959, "rewards/margins": 25.708526611328125, "rewards/real": -5.18552303314209, "step": 3500 }, { "epoch": 2.23, "learning_rate": 1.4312573443008224e-07, "logits/generated": -1.9508612155914307, "logits/real": -1.8122934103012085, "logps/generated": -394.3963928222656, "logps/real": -335.4398498535156, "loss": 0.0025, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.22671127319336, "rewards/margins": 22.93834114074707, "rewards/real": -5.288372993469238, "step": 3510 }, { "epoch": 2.23, "learning_rate": 1.4195064629847237e-07, "logits/generated": -1.9361099004745483, "logits/real": -1.7594772577285767, "logps/generated": -423.53515625, "logps/real": -293.36834716796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -31.2425479888916, "rewards/margins": 25.244579315185547, "rewards/real": -5.9979681968688965, "step": 3520 }, { "epoch": 2.24, "learning_rate": 1.407755581668625e-07, "logits/generated": -1.9259124994277954, "logits/real": -1.7739746570587158, "logps/generated": -394.9298095703125, "logps/real": -288.7347412109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -28.674121856689453, "rewards/margins": 22.565471649169922, "rewards/real": -6.108652114868164, "step": 3530 }, { "epoch": 2.25, "learning_rate": 1.3960047003525263e-07, "logits/generated": -1.945650339126587, "logits/real": -1.7504583597183228, "logps/generated": -425.6507263183594, "logps/real": -254.51651000976562, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -30.8286190032959, "rewards/margins": 24.612564086914062, "rewards/real": -6.216059684753418, "step": 3540 }, { "epoch": 2.25, "learning_rate": 1.3842538190364278e-07, "logits/generated": -1.948203444480896, "logits/real": -1.8336362838745117, "logps/generated": -414.30035400390625, "logps/real": -326.8367614746094, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -29.730371475219727, "rewards/margins": 23.398197174072266, "rewards/real": -6.332175254821777, "step": 3550 }, { "epoch": 2.26, "learning_rate": 1.372502937720329e-07, "logits/generated": -2.027785539627075, "logits/real": -1.9192225933074951, "logps/generated": -383.18121337890625, "logps/real": -313.5802307128906, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -26.630611419677734, "rewards/margins": 22.166791915893555, "rewards/real": -4.463819980621338, "step": 3560 }, { "epoch": 2.27, "learning_rate": 1.36075205640423e-07, "logits/generated": -1.9468467235565186, "logits/real": -1.8661794662475586, "logps/generated": -399.8686828613281, "logps/real": -270.86627197265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -28.764408111572266, "rewards/margins": 23.469295501708984, "rewards/real": -5.295111179351807, "step": 3570 }, { "epoch": 2.27, "learning_rate": 1.3490011750881317e-07, "logits/generated": -2.049177408218384, "logits/real": -1.8789154291152954, "logps/generated": -396.53472900390625, "logps/real": -308.8271484375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -28.53740882873535, "rewards/margins": 24.08509063720703, "rewards/real": -4.4523186683654785, "step": 3580 }, { "epoch": 2.28, "learning_rate": 1.337250293772033e-07, "logits/generated": -2.0029730796813965, "logits/real": -1.934480905532837, "logps/generated": -374.8182067871094, "logps/real": -322.31915283203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -26.162517547607422, "rewards/margins": 20.880863189697266, "rewards/real": -5.281655788421631, "step": 3590 }, { "epoch": 2.28, "learning_rate": 1.325499412455934e-07, "logits/generated": -1.9719984531402588, "logits/real": -1.801230788230896, "logps/generated": -394.8279724121094, "logps/real": -306.5439453125, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.897329330444336, "rewards/margins": 23.881816864013672, "rewards/real": -5.015511512756348, "step": 3600 }, { "epoch": 2.29, "learning_rate": 1.3137485311398355e-07, "logits/generated": -1.9651342630386353, "logits/real": -1.8825359344482422, "logps/generated": -364.9615478515625, "logps/real": -305.80230712890625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -25.72686195373535, "rewards/margins": 20.991674423217773, "rewards/real": -4.735187530517578, "step": 3610 }, { "epoch": 2.3, "learning_rate": 1.3019976498237368e-07, "logits/generated": -2.0056333541870117, "logits/real": -1.8786531686782837, "logps/generated": -405.5434265136719, "logps/real": -294.9462585449219, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -29.38471031188965, "rewards/margins": 24.157129287719727, "rewards/real": -5.2275800704956055, "step": 3620 }, { "epoch": 2.3, "learning_rate": 1.290246768507638e-07, "logits/generated": -1.947729468345642, "logits/real": -1.8916778564453125, "logps/generated": -375.58026123046875, "logps/real": -303.40283203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -26.317413330078125, "rewards/margins": 21.212438583374023, "rewards/real": -5.104973793029785, "step": 3630 }, { "epoch": 2.31, "learning_rate": 1.2784958871915393e-07, "logits/generated": -1.9016625881195068, "logits/real": -1.7552967071533203, "logps/generated": -396.3576965332031, "logps/real": -253.3724822998047, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -29.431873321533203, "rewards/margins": 23.697856903076172, "rewards/real": -5.734013080596924, "step": 3640 }, { "epoch": 2.32, "learning_rate": 1.2667450058754406e-07, "logits/generated": -1.9619117975234985, "logits/real": -1.8128383159637451, "logps/generated": -364.29071044921875, "logps/real": -247.4691619873047, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.770038604736328, "rewards/margins": 21.507999420166016, "rewards/real": -5.262039661407471, "step": 3650 }, { "epoch": 2.32, "learning_rate": 1.254994124559342e-07, "logits/generated": -1.9781490564346313, "logits/real": -1.8369346857070923, "logps/generated": -394.2939453125, "logps/real": -295.5604553222656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -28.346759796142578, "rewards/margins": 23.18613624572754, "rewards/real": -5.160625457763672, "step": 3660 }, { "epoch": 2.33, "learning_rate": 1.2432432432432432e-07, "logits/generated": -1.9349294900894165, "logits/real": -1.8810224533081055, "logps/generated": -378.1846008300781, "logps/real": -301.382080078125, "loss": 0.0025, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.532123565673828, "rewards/margins": 20.59892463684082, "rewards/real": -5.933199882507324, "step": 3670 }, { "epoch": 2.34, "learning_rate": 1.2314923619271444e-07, "logits/generated": -1.957461953163147, "logits/real": -1.8917419910430908, "logps/generated": -388.87432861328125, "logps/real": -350.3866271972656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -27.713695526123047, "rewards/margins": 22.36217498779297, "rewards/real": -5.3515214920043945, "step": 3680 }, { "epoch": 2.34, "learning_rate": 1.2197414806110457e-07, "logits/generated": -1.8699737787246704, "logits/real": -1.782846212387085, "logps/generated": -400.0876770019531, "logps/real": -277.35076904296875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -29.43486976623535, "rewards/margins": 23.6584529876709, "rewards/real": -5.776413917541504, "step": 3690 }, { "epoch": 2.35, "learning_rate": 1.207990599294947e-07, "logits/generated": -1.9608417749404907, "logits/real": -1.8951002359390259, "logps/generated": -411.26190185546875, "logps/real": -317.6622009277344, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -29.566543579101562, "rewards/margins": 23.481670379638672, "rewards/real": -6.084873676300049, "step": 3700 }, { "epoch": 2.35, "learning_rate": 1.1962397179788483e-07, "logits/generated": -1.9031436443328857, "logits/real": -1.8433917760849, "logps/generated": -397.88836669921875, "logps/real": -258.14495849609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -29.519550323486328, "rewards/margins": 22.443387985229492, "rewards/real": -7.076161861419678, "step": 3710 }, { "epoch": 2.36, "learning_rate": 1.1844888366627497e-07, "logits/generated": -1.9236103296279907, "logits/real": -1.829207181930542, "logps/generated": -396.1493225097656, "logps/real": -270.80133056640625, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.64163589477539, "rewards/margins": 23.829097747802734, "rewards/real": -5.8125386238098145, "step": 3720 }, { "epoch": 2.37, "learning_rate": 1.172737955346651e-07, "logits/generated": -1.953190565109253, "logits/real": -1.8170249462127686, "logps/generated": -422.35650634765625, "logps/real": -285.5227966308594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -31.606027603149414, "rewards/margins": 25.138118743896484, "rewards/real": -6.467909336090088, "step": 3730 }, { "epoch": 2.37, "learning_rate": 1.1609870740305522e-07, "logits/generated": -1.9567396640777588, "logits/real": -1.8386499881744385, "logps/generated": -411.81378173828125, "logps/real": -301.4488830566406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -30.37715721130371, "rewards/margins": 24.879119873046875, "rewards/real": -5.498034477233887, "step": 3740 }, { "epoch": 2.38, "learning_rate": 1.1492361927144535e-07, "logits/generated": -1.8939313888549805, "logits/real": -1.7057090997695923, "logps/generated": -398.8463439941406, "logps/real": -330.5086364746094, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -29.181045532226562, "rewards/margins": 23.68384552001953, "rewards/real": -5.497200965881348, "step": 3750 }, { "epoch": 2.39, "learning_rate": 1.1374853113983548e-07, "logits/generated": -1.9329249858856201, "logits/real": -1.820165991783142, "logps/generated": -422.8020935058594, "logps/real": -280.1080627441406, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.731647491455078, "rewards/margins": 24.493200302124023, "rewards/real": -6.238447666168213, "step": 3760 }, { "epoch": 2.39, "learning_rate": 1.1257344300822562e-07, "logits/generated": -1.8722028732299805, "logits/real": -1.7556512355804443, "logps/generated": -412.02276611328125, "logps/real": -309.0514831542969, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -30.253271102905273, "rewards/margins": 24.02978515625, "rewards/real": -6.223486423492432, "step": 3770 }, { "epoch": 2.4, "learning_rate": 1.1139835487661573e-07, "logits/generated": -1.808394193649292, "logits/real": -1.8020694255828857, "logps/generated": -403.3237609863281, "logps/real": -283.5267333984375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -30.11172866821289, "rewards/margins": 23.824481964111328, "rewards/real": -6.287243843078613, "step": 3780 }, { "epoch": 2.4, "learning_rate": 1.1022326674500588e-07, "logits/generated": -1.7356446981430054, "logits/real": -1.7262321710586548, "logps/generated": -414.942626953125, "logps/real": -302.17474365234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -31.184499740600586, "rewards/margins": 25.247180938720703, "rewards/real": -5.937318801879883, "step": 3790 }, { "epoch": 2.41, "learning_rate": 1.09048178613396e-07, "logits/generated": -1.8033393621444702, "logits/real": -1.7795436382293701, "logps/generated": -420.39971923828125, "logps/real": -293.73248291015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -30.607690811157227, "rewards/margins": 24.23845672607422, "rewards/real": -6.369235992431641, "step": 3800 }, { "epoch": 2.42, "learning_rate": 1.0787309048178613e-07, "logits/generated": -1.8363456726074219, "logits/real": -1.8129303455352783, "logps/generated": -397.9834289550781, "logps/real": -297.1370544433594, "loss": 0.0045, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -28.858022689819336, "rewards/margins": 22.88436508178711, "rewards/real": -5.973654747009277, "step": 3810 }, { "epoch": 2.42, "learning_rate": 1.0669800235017626e-07, "logits/generated": -1.8176523447036743, "logits/real": -1.774566888809204, "logps/generated": -408.4759826660156, "logps/real": -288.6650390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -29.875314712524414, "rewards/margins": 23.64436912536621, "rewards/real": -6.230945110321045, "step": 3820 }, { "epoch": 2.43, "learning_rate": 1.0552291421856639e-07, "logits/generated": -1.7467482089996338, "logits/real": -1.7439444065093994, "logps/generated": -422.25213623046875, "logps/real": -269.0238037109375, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.126392364501953, "rewards/margins": 24.541561126708984, "rewards/real": -6.584831237792969, "step": 3830 }, { "epoch": 2.44, "learning_rate": 1.0434782608695651e-07, "logits/generated": -1.732426404953003, "logits/real": -1.7286033630371094, "logps/generated": -398.12841796875, "logps/real": -310.280029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -29.06282615661621, "rewards/margins": 22.96421241760254, "rewards/real": -6.098611354827881, "step": 3840 }, { "epoch": 2.44, "learning_rate": 1.0317273795534664e-07, "logits/generated": -1.806668996810913, "logits/real": -1.7185096740722656, "logps/generated": -387.23248291015625, "logps/real": -307.28485107421875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -28.445404052734375, "rewards/margins": 22.187774658203125, "rewards/real": -6.25762939453125, "step": 3850 }, { "epoch": 2.45, "learning_rate": 1.0199764982373678e-07, "logits/generated": -1.764177680015564, "logits/real": -1.779284119606018, "logps/generated": -427.274658203125, "logps/real": -308.47650146484375, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.830997467041016, "rewards/margins": 24.836002349853516, "rewards/real": -6.994994163513184, "step": 3860 }, { "epoch": 2.46, "learning_rate": 1.0082256169212691e-07, "logits/generated": -1.8138262033462524, "logits/real": -1.734763741493225, "logps/generated": -422.82421875, "logps/real": -307.7763671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -31.90139389038086, "rewards/margins": 25.225627899169922, "rewards/real": -6.675767421722412, "step": 3870 }, { "epoch": 2.46, "learning_rate": 9.964747356051703e-08, "logits/generated": -1.687376618385315, "logits/real": -1.636805534362793, "logps/generated": -409.3494567871094, "logps/real": -281.611328125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -31.65591049194336, "rewards/margins": 24.733543395996094, "rewards/real": -6.922367095947266, "step": 3880 }, { "epoch": 2.47, "learning_rate": 9.847238542890717e-08, "logits/generated": -1.6962316036224365, "logits/real": -1.7696120738983154, "logps/generated": -443.807373046875, "logps/real": -335.1094970703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.494110107421875, "rewards/margins": 26.92751693725586, "rewards/real": -6.566593170166016, "step": 3890 }, { "epoch": 2.47, "learning_rate": 9.72972972972973e-08, "logits/generated": -1.7020161151885986, "logits/real": -1.681674599647522, "logps/generated": -434.5680236816406, "logps/real": -280.8212585449219, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -33.23873519897461, "rewards/margins": 26.639150619506836, "rewards/real": -6.599585056304932, "step": 3900 }, { "epoch": 2.48, "learning_rate": 9.612220916568742e-08, "logits/generated": -1.7404295206069946, "logits/real": -1.73974609375, "logps/generated": -434.4542541503906, "logps/real": -315.81671142578125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/generated": -31.91022300720215, "rewards/margins": 25.870519638061523, "rewards/real": -6.039700508117676, "step": 3910 }, { "epoch": 2.49, "learning_rate": 9.494712103407755e-08, "logits/generated": -1.7896816730499268, "logits/real": -1.7941644191741943, "logps/generated": -401.29730224609375, "logps/real": -345.435302734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -29.499197006225586, "rewards/margins": 22.433208465576172, "rewards/real": -7.065989017486572, "step": 3920 }, { "epoch": 2.49, "learning_rate": 9.377203290246769e-08, "logits/generated": -1.8484700918197632, "logits/real": -1.916161298751831, "logps/generated": -363.75433349609375, "logps/real": -331.8744201660156, "loss": 0.0045, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -25.15157699584961, "rewards/margins": 19.572826385498047, "rewards/real": -5.578749656677246, "step": 3930 }, { "epoch": 2.5, "learning_rate": 9.25969447708578e-08, "logits/generated": -1.7649660110473633, "logits/real": -1.7312488555908203, "logps/generated": -420.1351623535156, "logps/real": -292.35882568359375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -31.093624114990234, "rewards/margins": 24.103261947631836, "rewards/real": -6.990364074707031, "step": 3940 }, { "epoch": 2.51, "learning_rate": 9.142185663924793e-08, "logits/generated": -1.7428464889526367, "logits/real": -1.7745319604873657, "logps/generated": -418.396484375, "logps/real": -293.8400573730469, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -31.799205780029297, "rewards/margins": 24.954181671142578, "rewards/real": -6.845028877258301, "step": 3950 }, { "epoch": 2.51, "learning_rate": 9.024676850763807e-08, "logits/generated": -1.7720359563827515, "logits/real": -1.7069841623306274, "logps/generated": -431.8590393066406, "logps/real": -297.2078552246094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -32.12538528442383, "rewards/margins": 26.22249984741211, "rewards/real": -5.902883529663086, "step": 3960 }, { "epoch": 2.52, "learning_rate": 8.90716803760282e-08, "logits/generated": -1.7318195104599, "logits/real": -1.752964735031128, "logps/generated": -418.92169189453125, "logps/real": -314.60302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -31.43752670288086, "rewards/margins": 25.329748153686523, "rewards/real": -6.1077775955200195, "step": 3970 }, { "epoch": 2.53, "learning_rate": 8.789659224441833e-08, "logits/generated": -1.7232677936553955, "logits/real": -1.6864397525787354, "logps/generated": -420.5328674316406, "logps/real": -290.80902099609375, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.34931564331055, "rewards/margins": 26.023815155029297, "rewards/real": -6.325500011444092, "step": 3980 }, { "epoch": 2.53, "learning_rate": 8.672150411280846e-08, "logits/generated": -1.7739341259002686, "logits/real": -1.8019405603408813, "logps/generated": -415.83062744140625, "logps/real": -326.75469970703125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -30.948598861694336, "rewards/margins": 25.07623291015625, "rewards/real": -5.872367858886719, "step": 3990 }, { "epoch": 2.54, "learning_rate": 8.554641598119859e-08, "logits/generated": -1.8021968603134155, "logits/real": -1.7830060720443726, "logps/generated": -425.39697265625, "logps/real": -320.0240478515625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -31.33600425720215, "rewards/margins": 25.411022186279297, "rewards/real": -5.924983024597168, "step": 4000 }, { "epoch": 2.54, "learning_rate": 8.437132784958871e-08, "logits/generated": -1.7438474893569946, "logits/real": -1.7602970600128174, "logps/generated": -406.46856689453125, "logps/real": -296.0639343261719, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -30.566293716430664, "rewards/margins": 23.20583152770996, "rewards/real": -7.360462188720703, "step": 4010 }, { "epoch": 2.55, "learning_rate": 8.319623971797884e-08, "logits/generated": -1.8322248458862305, "logits/real": -1.86432683467865, "logps/generated": -406.3897705078125, "logps/real": -310.910400390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -29.910282135009766, "rewards/margins": 23.635074615478516, "rewards/real": -6.275205612182617, "step": 4020 }, { "epoch": 2.56, "learning_rate": 8.202115158636898e-08, "logits/generated": -1.7475402355194092, "logits/real": -1.744633674621582, "logps/generated": -435.2589416503906, "logps/real": -313.73150634765625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -32.4385986328125, "rewards/margins": 25.712047576904297, "rewards/real": -6.7265496253967285, "step": 4030 }, { "epoch": 2.56, "learning_rate": 8.08460634547591e-08, "logits/generated": -1.7663837671279907, "logits/real": -1.74166738986969, "logps/generated": -437.02069091796875, "logps/real": -333.59344482421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -32.60531997680664, "rewards/margins": 25.910425186157227, "rewards/real": -6.6948981285095215, "step": 4040 }, { "epoch": 2.57, "learning_rate": 7.967097532314922e-08, "logits/generated": -1.7162328958511353, "logits/real": -1.7188522815704346, "logps/generated": -453.78546142578125, "logps/real": -318.1512145996094, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.322872161865234, "rewards/margins": 27.458202362060547, "rewards/real": -6.8646674156188965, "step": 4050 }, { "epoch": 2.58, "learning_rate": 7.849588719153937e-08, "logits/generated": -1.7017135620117188, "logits/real": -1.7324234247207642, "logps/generated": -450.5570373535156, "logps/real": -289.6382141113281, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -33.93928527832031, "rewards/margins": 27.395299911499023, "rewards/real": -6.54398250579834, "step": 4060 }, { "epoch": 2.58, "learning_rate": 7.73207990599295e-08, "logits/generated": -1.6585533618927002, "logits/real": -1.634086012840271, "logps/generated": -448.7232360839844, "logps/real": -277.66802978515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -34.147247314453125, "rewards/margins": 26.556921005249023, "rewards/real": -7.590325355529785, "step": 4070 }, { "epoch": 2.59, "learning_rate": 7.614571092831962e-08, "logits/generated": -1.621957778930664, "logits/real": -1.6922699213027954, "logps/generated": -462.8789978027344, "logps/real": -305.51947021484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -35.89826202392578, "rewards/margins": 28.3109188079834, "rewards/real": -7.587339878082275, "step": 4080 }, { "epoch": 2.6, "learning_rate": 7.497062279670975e-08, "logits/generated": -1.6756019592285156, "logits/real": -1.705483078956604, "logps/generated": -430.5467224121094, "logps/real": -286.50982666015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -32.59349060058594, "rewards/margins": 24.432825088500977, "rewards/real": -8.160660743713379, "step": 4090 }, { "epoch": 2.6, "learning_rate": 7.379553466509989e-08, "logits/generated": -1.6760860681533813, "logits/real": -1.7032572031021118, "logps/generated": -439.35498046875, "logps/real": -293.1243591308594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -33.49091339111328, "rewards/margins": 26.223369598388672, "rewards/real": -7.267544746398926, "step": 4100 }, { "epoch": 2.61, "learning_rate": 7.262044653349e-08, "logits/generated": -1.6767972707748413, "logits/real": -1.6770479679107666, "logps/generated": -453.43463134765625, "logps/real": -309.2659912109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -34.969093322753906, "rewards/margins": 27.015491485595703, "rewards/real": -7.953605651855469, "step": 4110 }, { "epoch": 2.61, "learning_rate": 7.144535840188013e-08, "logits/generated": -1.6518837213516235, "logits/real": -1.661682367324829, "logps/generated": -444.19732666015625, "logps/real": -248.5184783935547, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -34.29254150390625, "rewards/margins": 27.253955841064453, "rewards/real": -7.0385894775390625, "step": 4120 }, { "epoch": 2.62, "learning_rate": 7.027027027027027e-08, "logits/generated": -1.6208521127700806, "logits/real": -1.684266448020935, "logps/generated": -450.17913818359375, "logps/real": -303.62945556640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -34.04570388793945, "rewards/margins": 27.82986831665039, "rewards/real": -6.215835094451904, "step": 4130 }, { "epoch": 2.63, "learning_rate": 6.909518213866039e-08, "logits/generated": -1.7041343450546265, "logits/real": -1.7098076343536377, "logps/generated": -449.44219970703125, "logps/real": -314.94830322265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -34.25445556640625, "rewards/margins": 28.510257720947266, "rewards/real": -5.74419641494751, "step": 4140 }, { "epoch": 2.63, "learning_rate": 6.792009400705053e-08, "logits/generated": -1.6410897970199585, "logits/real": -1.6358200311660767, "logps/generated": -433.7411193847656, "logps/real": -287.76861572265625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -33.50247573852539, "rewards/margins": 26.161731719970703, "rewards/real": -7.340744972229004, "step": 4150 }, { "epoch": 2.64, "learning_rate": 6.674500587544066e-08, "logits/generated": -1.6944376230239868, "logits/real": -1.8017114400863647, "logps/generated": -453.3190002441406, "logps/real": -327.5594787597656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -33.635292053222656, "rewards/margins": 28.087554931640625, "rewards/real": -5.547739505767822, "step": 4160 }, { "epoch": 2.65, "learning_rate": 6.556991774383078e-08, "logits/generated": -1.7940804958343506, "logits/real": -1.7683172225952148, "logps/generated": -417.69122314453125, "logps/real": -273.3849792480469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -30.897634506225586, "rewards/margins": 23.836702346801758, "rewards/real": -7.0609331130981445, "step": 4170 }, { "epoch": 2.65, "learning_rate": 6.439482961222091e-08, "logits/generated": -1.7380746603012085, "logits/real": -1.753003716468811, "logps/generated": -404.82989501953125, "logps/real": -300.6372985839844, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -29.634296417236328, "rewards/margins": 23.29750633239746, "rewards/real": -6.336789131164551, "step": 4180 }, { "epoch": 2.66, "learning_rate": 6.321974148061104e-08, "logits/generated": -1.6851911544799805, "logits/real": -1.7241060733795166, "logps/generated": -400.8048400878906, "logps/real": -284.01654052734375, "loss": 0.0044, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -30.020034790039062, "rewards/margins": 23.252304077148438, "rewards/real": -6.76772928237915, "step": 4190 }, { "epoch": 2.66, "learning_rate": 6.204465334900117e-08, "logits/generated": -1.7801281213760376, "logits/real": -1.757208228111267, "logps/generated": -437.983154296875, "logps/real": -300.23931884765625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -31.8565731048584, "rewards/margins": 25.158836364746094, "rewards/real": -6.697731018066406, "step": 4200 }, { "epoch": 2.67, "learning_rate": 6.086956521739131e-08, "logits/generated": -1.659287691116333, "logits/real": -1.7502975463867188, "logps/generated": -430.50958251953125, "logps/real": -319.3751525878906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -31.70566177368164, "rewards/margins": 25.877904891967773, "rewards/real": -5.827755928039551, "step": 4210 }, { "epoch": 2.68, "learning_rate": 5.969447708578144e-08, "logits/generated": -1.6172062158584595, "logits/real": -1.6674178838729858, "logps/generated": -425.08648681640625, "logps/real": -290.21990966796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -32.34017562866211, "rewards/margins": 26.0834903717041, "rewards/real": -6.256686210632324, "step": 4220 }, { "epoch": 2.68, "learning_rate": 5.851938895417156e-08, "logits/generated": -1.6710926294326782, "logits/real": -1.6580989360809326, "logps/generated": -428.62164306640625, "logps/real": -274.07098388671875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -32.238311767578125, "rewards/margins": 25.74837303161621, "rewards/real": -6.489937782287598, "step": 4230 }, { "epoch": 2.69, "learning_rate": 5.734430082256169e-08, "logits/generated": -1.673111915588379, "logits/real": -1.718198537826538, "logps/generated": -420.06060791015625, "logps/real": -344.5765075683594, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -30.972484588623047, "rewards/margins": 24.510467529296875, "rewards/real": -6.4620161056518555, "step": 4240 }, { "epoch": 2.7, "learning_rate": 5.616921269095182e-08, "logits/generated": -1.708108901977539, "logits/real": -1.7552868127822876, "logps/generated": -416.3793029785156, "logps/real": -286.037841796875, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.67416000366211, "rewards/margins": 25.777502059936523, "rewards/real": -5.896656513214111, "step": 4250 }, { "epoch": 2.7, "learning_rate": 5.499412455934195e-08, "logits/generated": -1.621158242225647, "logits/real": -1.6797363758087158, "logps/generated": -387.11407470703125, "logps/real": -277.28900146484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -29.1168155670166, "rewards/margins": 21.60858917236328, "rewards/real": -7.5082268714904785, "step": 4260 }, { "epoch": 2.71, "learning_rate": 5.3819036427732076e-08, "logits/generated": -1.6894304752349854, "logits/real": -1.7261028289794922, "logps/generated": -456.5179138183594, "logps/real": -290.7882995605469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -34.7430534362793, "rewards/margins": 27.435291290283203, "rewards/real": -7.307762145996094, "step": 4270 }, { "epoch": 2.72, "learning_rate": 5.2643948296122204e-08, "logits/generated": -1.7677587270736694, "logits/real": -1.7243268489837646, "logps/generated": -460.8067932128906, "logps/real": -278.5439758300781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -35.61956024169922, "rewards/margins": 28.052993774414062, "rewards/real": -7.566567420959473, "step": 4280 }, { "epoch": 2.72, "learning_rate": 5.146886016451234e-08, "logits/generated": -1.6441065073013306, "logits/real": -1.6688801050186157, "logps/generated": -410.22320556640625, "logps/real": -287.65997314453125, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.20147705078125, "rewards/margins": 23.18656349182129, "rewards/real": -7.014913082122803, "step": 4290 }, { "epoch": 2.73, "learning_rate": 5.0293772032902466e-08, "logits/generated": -1.5864546298980713, "logits/real": -1.6448328495025635, "logps/generated": -457.1094665527344, "logps/real": -299.36285400390625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -34.83979034423828, "rewards/margins": 27.62546157836914, "rewards/real": -7.214327335357666, "step": 4300 }, { "epoch": 2.73, "learning_rate": 4.91186839012926e-08, "logits/generated": -1.6366796493530273, "logits/real": -1.6697746515274048, "logps/generated": -467.0208435058594, "logps/real": -320.2940673828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -34.921897888183594, "rewards/margins": 27.745059967041016, "rewards/real": -7.176837921142578, "step": 4310 }, { "epoch": 2.74, "learning_rate": 4.794359576968272e-08, "logits/generated": -1.6946895122528076, "logits/real": -1.711517095565796, "logps/generated": -453.2867126464844, "logps/real": -328.2344055175781, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -34.175537109375, "rewards/margins": 26.651561737060547, "rewards/real": -7.5239739418029785, "step": 4320 }, { "epoch": 2.75, "learning_rate": 4.676850763807285e-08, "logits/generated": -1.703352928161621, "logits/real": -1.7690811157226562, "logps/generated": -428.7987365722656, "logps/real": -362.03753662109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -31.37459945678711, "rewards/margins": 24.011770248413086, "rewards/real": -7.362831115722656, "step": 4330 }, { "epoch": 2.75, "learning_rate": 4.5593419506462984e-08, "logits/generated": -1.661341667175293, "logits/real": -1.7016382217407227, "logps/generated": -447.7108459472656, "logps/real": -289.16058349609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -34.20888137817383, "rewards/margins": 26.738143920898438, "rewards/real": -7.47074031829834, "step": 4340 }, { "epoch": 2.76, "learning_rate": 4.441833137485311e-08, "logits/generated": -1.672273874282837, "logits/real": -1.7239618301391602, "logps/generated": -412.23162841796875, "logps/real": -278.4026184082031, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/generated": -31.304784774780273, "rewards/margins": 23.998537063598633, "rewards/real": -7.306250095367432, "step": 4350 }, { "epoch": 2.77, "learning_rate": 4.3243243243243246e-08, "logits/generated": -1.6548233032226562, "logits/real": -1.6878429651260376, "logps/generated": -458.45648193359375, "logps/real": -306.944580078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -33.853546142578125, "rewards/margins": 26.48433494567871, "rewards/real": -7.369210720062256, "step": 4360 }, { "epoch": 2.77, "learning_rate": 4.206815511163337e-08, "logits/generated": -1.7220999002456665, "logits/real": -1.7197761535644531, "logps/generated": -457.8179626464844, "logps/real": -310.9056701660156, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.46787643432617, "rewards/margins": 27.519847869873047, "rewards/real": -6.948026180267334, "step": 4370 }, { "epoch": 2.78, "learning_rate": 4.0893066980023495e-08, "logits/generated": -1.6025826930999756, "logits/real": -1.7037376165390015, "logps/generated": -428.7861328125, "logps/real": -282.8119201660156, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.31279754638672, "rewards/margins": 25.544296264648438, "rewards/real": -6.768496513366699, "step": 4380 }, { "epoch": 2.79, "learning_rate": 3.971797884841363e-08, "logits/generated": -1.6418508291244507, "logits/real": -1.7036247253417969, "logps/generated": -451.6459045410156, "logps/real": -293.98883056640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -33.84459686279297, "rewards/margins": 26.4428653717041, "rewards/real": -7.401734352111816, "step": 4390 }, { "epoch": 2.79, "learning_rate": 3.854289071680376e-08, "logits/generated": -1.5944923162460327, "logits/real": -1.6075847148895264, "logps/generated": -410.13671875, "logps/real": -266.53302001953125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -30.753042221069336, "rewards/margins": 23.652307510375977, "rewards/real": -7.100737571716309, "step": 4400 }, { "epoch": 2.8, "learning_rate": 3.736780258519389e-08, "logits/generated": -1.6141496896743774, "logits/real": -1.6279274225234985, "logps/generated": -458.05731201171875, "logps/real": -282.2770080566406, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -35.077274322509766, "rewards/margins": 27.443347930908203, "rewards/real": -7.633930206298828, "step": 4410 }, { "epoch": 2.8, "learning_rate": 3.619271445358402e-08, "logits/generated": -1.6210002899169922, "logits/real": -1.6764341592788696, "logps/generated": -460.6913146972656, "logps/real": -327.74664306640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -34.58207321166992, "rewards/margins": 27.273120880126953, "rewards/real": -7.3089494705200195, "step": 4420 }, { "epoch": 2.81, "learning_rate": 3.501762632197414e-08, "logits/generated": -1.5527939796447754, "logits/real": -1.6446359157562256, "logps/generated": -428.407470703125, "logps/real": -327.3465576171875, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.21887969970703, "rewards/margins": 25.55868148803711, "rewards/real": -6.6602020263671875, "step": 4430 }, { "epoch": 2.82, "learning_rate": 3.3842538190364275e-08, "logits/generated": -1.5835459232330322, "logits/real": -1.6207221746444702, "logps/generated": -438.98291015625, "logps/real": -280.99237060546875, "loss": 0.0044, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.71384048461914, "rewards/margins": 26.74772071838379, "rewards/real": -6.966122627258301, "step": 4440 }, { "epoch": 2.82, "learning_rate": 3.26674500587544e-08, "logits/generated": -1.6002051830291748, "logits/real": -1.6329238414764404, "logps/generated": -430.971435546875, "logps/real": -304.34686279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -32.37575912475586, "rewards/margins": 25.36520004272461, "rewards/real": -7.010560035705566, "step": 4450 }, { "epoch": 2.83, "learning_rate": 3.149236192714454e-08, "logits/generated": -1.6009585857391357, "logits/real": -1.588710904121399, "logps/generated": -440.9615783691406, "logps/real": -266.1169128417969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.529884338378906, "rewards/margins": 27.274459838867188, "rewards/real": -6.255424499511719, "step": 4460 }, { "epoch": 2.84, "learning_rate": 3.0317273795534665e-08, "logits/generated": -1.6368831396102905, "logits/real": -1.6338386535644531, "logps/generated": -451.52423095703125, "logps/real": -254.2528076171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -35.0713005065918, "rewards/margins": 27.269176483154297, "rewards/real": -7.802123069763184, "step": 4470 }, { "epoch": 2.84, "learning_rate": 2.9142185663924792e-08, "logits/generated": -1.5831745862960815, "logits/real": -1.6589330434799194, "logps/generated": -476.41705322265625, "logps/real": -304.2042541503906, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -36.50579071044922, "rewards/margins": 28.691112518310547, "rewards/real": -7.814681053161621, "step": 4480 }, { "epoch": 2.85, "learning_rate": 2.7967097532314924e-08, "logits/generated": -1.5155766010284424, "logits/real": -1.6084210872650146, "logps/generated": -444.40985107421875, "logps/real": -327.21868896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.392765045166016, "rewards/margins": 26.478784561157227, "rewards/real": -6.913978576660156, "step": 4490 }, { "epoch": 2.86, "learning_rate": 2.6792009400705055e-08, "logits/generated": -1.546156883239746, "logits/real": -1.6635850667953491, "logps/generated": -455.74920654296875, "logps/real": -290.7769470214844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -34.94561767578125, "rewards/margins": 27.562381744384766, "rewards/real": -7.383233070373535, "step": 4500 }, { "epoch": 2.86, "learning_rate": 2.561692126909518e-08, "logits/generated": -1.5961230993270874, "logits/real": -1.6761224269866943, "logps/generated": -474.77325439453125, "logps/real": -320.73260498046875, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -36.5091552734375, "rewards/margins": 28.40032386779785, "rewards/real": -8.108833312988281, "step": 4510 }, { "epoch": 2.87, "learning_rate": 2.444183313748531e-08, "logits/generated": -1.5484769344329834, "logits/real": -1.6109914779663086, "logps/generated": -466.61102294921875, "logps/real": -272.64215087890625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -36.228668212890625, "rewards/margins": 28.0214900970459, "rewards/real": -8.207174301147461, "step": 4520 }, { "epoch": 2.87, "learning_rate": 2.326674500587544e-08, "logits/generated": -1.6316810846328735, "logits/real": -1.692752480506897, "logps/generated": -464.38372802734375, "logps/real": -331.10406494140625, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.567840576171875, "rewards/margins": 26.617473602294922, "rewards/real": -7.950366973876953, "step": 4530 }, { "epoch": 2.88, "learning_rate": 2.209165687426557e-08, "logits/generated": -1.57527756690979, "logits/real": -1.682943344116211, "logps/generated": -459.98919677734375, "logps/real": -372.90106201171875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -34.08959197998047, "rewards/margins": 26.82863426208496, "rewards/real": -7.2609543800354, "step": 4540 }, { "epoch": 2.89, "learning_rate": 2.09165687426557e-08, "logits/generated": -1.4877384901046753, "logits/real": -1.652479887008667, "logps/generated": -450.6048889160156, "logps/real": -311.8920593261719, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -33.74338150024414, "rewards/margins": 26.67026710510254, "rewards/real": -7.07311487197876, "step": 4550 }, { "epoch": 2.89, "learning_rate": 1.9741480611045828e-08, "logits/generated": -1.5721471309661865, "logits/real": -1.7118234634399414, "logps/generated": -434.28472900390625, "logps/real": -302.3216247558594, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -32.82756805419922, "rewards/margins": 25.282840728759766, "rewards/real": -7.544726371765137, "step": 4560 }, { "epoch": 2.9, "learning_rate": 1.8566392479435956e-08, "logits/generated": -1.6017907857894897, "logits/real": -1.6692975759506226, "logps/generated": -445.66387939453125, "logps/real": -283.04779052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -33.74781036376953, "rewards/margins": 26.4423770904541, "rewards/real": -7.3054304122924805, "step": 4570 }, { "epoch": 2.91, "learning_rate": 1.7391304347826087e-08, "logits/generated": -1.621559739112854, "logits/real": -1.7216978073120117, "logps/generated": -429.00128173828125, "logps/real": -390.5367431640625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -30.85262680053711, "rewards/margins": 24.305044174194336, "rewards/real": -6.547584533691406, "step": 4580 }, { "epoch": 2.91, "learning_rate": 1.6216216216216218e-08, "logits/generated": -1.5349498987197876, "logits/real": -1.6045535802841187, "logps/generated": -446.84576416015625, "logps/real": -334.5065002441406, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -34.07716369628906, "rewards/margins": 26.082788467407227, "rewards/real": -7.994380950927734, "step": 4590 }, { "epoch": 2.92, "learning_rate": 1.5041128084606346e-08, "logits/generated": -1.5646604299545288, "logits/real": -1.6417030096054077, "logps/generated": -453.3228454589844, "logps/real": -315.86297607421875, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -34.7583122253418, "rewards/margins": 26.6680965423584, "rewards/real": -8.090215682983398, "step": 4600 }, { "epoch": 2.93, "learning_rate": 1.3866039952996475e-08, "logits/generated": -1.4989112615585327, "logits/real": -1.601131796836853, "logps/generated": -504.98883056640625, "logps/real": -306.59600830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -39.9133186340332, "rewards/margins": 32.207984924316406, "rewards/real": -7.705336093902588, "step": 4610 }, { "epoch": 2.93, "learning_rate": 1.2690951821386603e-08, "logits/generated": -1.5204241275787354, "logits/real": -1.6375468969345093, "logps/generated": -443.3485412597656, "logps/real": -309.1793212890625, "loss": 0.0065, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -34.35383987426758, "rewards/margins": 25.77254295349121, "rewards/real": -8.581293106079102, "step": 4620 }, { "epoch": 2.94, "learning_rate": 1.1515863689776732e-08, "logits/generated": -1.5714848041534424, "logits/real": -1.5972023010253906, "logps/generated": -493.9441833496094, "logps/real": -307.8035583496094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -37.91588592529297, "rewards/margins": 29.837596893310547, "rewards/real": -8.078287124633789, "step": 4630 }, { "epoch": 2.94, "learning_rate": 1.0340775558166862e-08, "logits/generated": -1.4898492097854614, "logits/real": -1.6078729629516602, "logps/generated": -496.19024658203125, "logps/real": -302.10528564453125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -38.65169143676758, "rewards/margins": 32.12113952636719, "rewards/real": -6.530550956726074, "step": 4640 }, { "epoch": 2.95, "learning_rate": 9.165687426556991e-09, "logits/generated": -1.4833087921142578, "logits/real": -1.5793912410736084, "logps/generated": -436.9119567871094, "logps/real": -314.6705322265625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -33.653076171875, "rewards/margins": 26.469491958618164, "rewards/real": -7.183583736419678, "step": 4650 }, { "epoch": 2.96, "learning_rate": 7.99059929494712e-09, "logits/generated": -1.524432897567749, "logits/real": -1.5506082773208618, "logps/generated": -460.62335205078125, "logps/real": -286.51434326171875, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -35.37180709838867, "rewards/margins": 28.64103126525879, "rewards/real": -6.730777740478516, "step": 4660 }, { "epoch": 2.96, "learning_rate": 6.81551116333725e-09, "logits/generated": -1.577246904373169, "logits/real": -1.6606992483139038, "logps/generated": -447.3589782714844, "logps/real": -306.1556701660156, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -33.04829025268555, "rewards/margins": 25.612957000732422, "rewards/real": -7.435332298278809, "step": 4670 }, { "epoch": 2.97, "learning_rate": 5.64042303172738e-09, "logits/generated": -1.5192750692367554, "logits/real": -1.6280876398086548, "logps/generated": -471.4060974121094, "logps/real": -288.0888977050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -36.808692932128906, "rewards/margins": 29.04833984375, "rewards/real": -7.76035213470459, "step": 4680 }, { "epoch": 2.98, "learning_rate": 4.465334900117508e-09, "logits/generated": -1.558406949043274, "logits/real": -1.6380071640014648, "logps/generated": -475.8984375, "logps/real": -323.078369140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -35.528656005859375, "rewards/margins": 27.93899154663086, "rewards/real": -7.589668273925781, "step": 4690 }, { "epoch": 2.98, "learning_rate": 3.2902467685076377e-09, "logits/generated": -1.557838797569275, "logits/real": -1.595538854598999, "logps/generated": -477.4278259277344, "logps/real": -297.52996826171875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -37.0074462890625, "rewards/margins": 28.823505401611328, "rewards/real": -8.183939933776855, "step": 4700 }, { "epoch": 2.99, "learning_rate": 2.115158636897767e-09, "logits/generated": -1.5562093257904053, "logits/real": -1.6695356369018555, "logps/generated": -472.9737243652344, "logps/real": -330.8188171386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -36.18653869628906, "rewards/margins": 28.221332550048828, "rewards/real": -7.965203285217285, "step": 4710 }, { "epoch": 2.99, "learning_rate": 9.400705052878966e-10, "logits/generated": -1.5382697582244873, "logits/real": -1.631152868270874, "logps/generated": -434.56109619140625, "logps/real": -307.39227294921875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -33.543827056884766, "rewards/margins": 25.150760650634766, "rewards/real": -8.393064498901367, "step": 4720 }, { "epoch": 3.0, "step": 4728, "total_flos": 0.0, "train_loss": 0.022571272342312175, "train_runtime": 37239.7337, "train_samples_per_second": 4.061, "train_steps_per_second": 0.127 } ], "logging_steps": 10, "max_steps": 4728, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }