|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 4728, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0570824524312896e-09, |
|
"logits/generated": -2.9680545330047607, |
|
"logits/real": -3.0339415073394775, |
|
"logps/generated": -179.1082763671875, |
|
"logps/real": -255.83349609375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.0570824524312896e-08, |
|
"logits/generated": -2.9810054302215576, |
|
"logits/real": -3.052731990814209, |
|
"logps/generated": -105.46451568603516, |
|
"logps/real": -221.724853515625, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/generated": -0.0014893743209540844, |
|
"rewards/margins": 0.0027981153689324856, |
|
"rewards/real": 0.0013087405823171139, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.114164904862579e-08, |
|
"logits/generated": -2.968405246734619, |
|
"logits/real": -3.0509631633758545, |
|
"logps/generated": -104.47855377197266, |
|
"logps/real": -277.7776184082031, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/generated": -0.029584383592009544, |
|
"rewards/margins": 0.053206123411655426, |
|
"rewards/real": 0.02362174168229103, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.1712473572938685e-08, |
|
"logits/generated": -2.994403600692749, |
|
"logits/real": -3.063781499862671, |
|
"logps/generated": -98.96472930908203, |
|
"logps/real": -216.9051513671875, |
|
"loss": 0.5905, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.14395593106746674, |
|
"rewards/margins": 0.2154092788696289, |
|
"rewards/real": 0.07145334035158157, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.228329809725158e-08, |
|
"logits/generated": -2.9682469367980957, |
|
"logits/real": -3.0599234104156494, |
|
"logps/generated": -120.07786560058594, |
|
"logps/real": -250.1599578857422, |
|
"loss": 0.467, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.3916197419166565, |
|
"rewards/margins": 0.5931448936462402, |
|
"rewards/real": 0.2015252411365509, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.285412262156448e-08, |
|
"logits/generated": -2.9771907329559326, |
|
"logits/real": -3.0221831798553467, |
|
"logps/generated": -100.34281158447266, |
|
"logps/real": -196.468505859375, |
|
"loss": 0.3908, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.585746169090271, |
|
"rewards/margins": 0.8785923719406128, |
|
"rewards/real": 0.2928462624549866, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.342494714587737e-08, |
|
"logits/generated": -2.9906773567199707, |
|
"logits/real": -3.0186338424682617, |
|
"logps/generated": -121.91976165771484, |
|
"logps/real": -218.0440673828125, |
|
"loss": 0.2915, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.8651537895202637, |
|
"rewards/margins": 1.1812090873718262, |
|
"rewards/real": 0.3160552680492401, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.399577167019028e-08, |
|
"logits/generated": -2.928558826446533, |
|
"logits/real": -3.018244743347168, |
|
"logps/generated": -128.43055725097656, |
|
"logps/real": -290.92919921875, |
|
"loss": 0.231, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -1.2124083042144775, |
|
"rewards/margins": 1.8828494548797607, |
|
"rewards/real": 0.6704407930374146, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.456659619450317e-08, |
|
"logits/generated": -2.8757901191711426, |
|
"logits/real": -2.999915599822998, |
|
"logps/generated": -117.48758697509766, |
|
"logps/real": -263.73193359375, |
|
"loss": 0.2078, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.505746841430664, |
|
"rewards/margins": 2.246025800704956, |
|
"rewards/real": 0.7402790188789368, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.513742071881606e-08, |
|
"logits/generated": -2.876457691192627, |
|
"logits/real": -2.995556116104126, |
|
"logps/generated": -131.91555786132812, |
|
"logps/real": -248.60205078125, |
|
"loss": 0.1751, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -1.616385817527771, |
|
"rewards/margins": 2.4295945167541504, |
|
"rewards/real": 0.8132089376449585, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0570824524312896e-07, |
|
"logits/generated": -2.8680336475372314, |
|
"logits/real": -2.983652114868164, |
|
"logps/generated": -135.1668701171875, |
|
"logps/real": -219.4280548095703, |
|
"loss": 0.1643, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.855940580368042, |
|
"rewards/margins": 2.530752420425415, |
|
"rewards/real": 0.6748121976852417, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.1627906976744186e-07, |
|
"logits/generated": -2.908716917037964, |
|
"logits/real": -2.9902663230895996, |
|
"logps/generated": -116.56935119628906, |
|
"logps/real": -190.9403076171875, |
|
"loss": 0.1688, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.6852614879608154, |
|
"rewards/margins": 2.455672264099121, |
|
"rewards/real": 0.7704105973243713, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.2684989429175474e-07, |
|
"logits/generated": -2.867933750152588, |
|
"logits/real": -2.9436843395233154, |
|
"logps/generated": -122.19034576416016, |
|
"logps/real": -200.4087371826172, |
|
"loss": 0.1407, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -2.132105827331543, |
|
"rewards/margins": 2.99078106880188, |
|
"rewards/real": 0.8586748838424683, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3742071881606765e-07, |
|
"logits/generated": -2.8560738563537598, |
|
"logits/real": -2.948706865310669, |
|
"logps/generated": -140.22650146484375, |
|
"logps/real": -238.3866729736328, |
|
"loss": 0.11, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -2.533663272857666, |
|
"rewards/margins": 3.416466474533081, |
|
"rewards/real": 0.8828039169311523, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.4799154334038056e-07, |
|
"logits/generated": -2.7741053104400635, |
|
"logits/real": -2.9004416465759277, |
|
"logps/generated": -137.82418823242188, |
|
"logps/real": -271.0147705078125, |
|
"loss": 0.1159, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -2.4761903285980225, |
|
"rewards/margins": 3.3843002319335938, |
|
"rewards/real": 0.9081098437309265, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.5856236786469342e-07, |
|
"logits/generated": -2.813859701156616, |
|
"logits/real": -2.9562556743621826, |
|
"logps/generated": -141.76651000976562, |
|
"logps/real": -253.9093017578125, |
|
"loss": 0.1004, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.886215925216675, |
|
"rewards/margins": 3.9585909843444824, |
|
"rewards/real": 1.0723752975463867, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.6913319238900633e-07, |
|
"logits/generated": -2.755221366882324, |
|
"logits/real": -2.9063618183135986, |
|
"logps/generated": -142.30709838867188, |
|
"logps/real": -174.6675567626953, |
|
"loss": 0.0916, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.7275359630584717, |
|
"rewards/margins": 4.270571231842041, |
|
"rewards/real": 0.5430347323417664, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7970401691331924e-07, |
|
"logits/generated": -2.762349843978882, |
|
"logits/real": -2.9174437522888184, |
|
"logps/generated": -134.84609985351562, |
|
"logps/real": -218.52487182617188, |
|
"loss": 0.0826, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -3.727612018585205, |
|
"rewards/margins": 4.559648513793945, |
|
"rewards/real": 0.8320371508598328, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9027484143763213e-07, |
|
"logits/generated": -2.7523839473724365, |
|
"logits/real": -2.8980984687805176, |
|
"logps/generated": -142.16477966308594, |
|
"logps/real": -214.64688110351562, |
|
"loss": 0.1008, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.7176735401153564, |
|
"rewards/margins": 4.531271457672119, |
|
"rewards/real": 0.8135976791381836, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.00845665961945e-07, |
|
"logits/generated": -2.7264347076416016, |
|
"logits/real": -2.9047653675079346, |
|
"logps/generated": -144.19976806640625, |
|
"logps/real": -226.42312622070312, |
|
"loss": 0.0598, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.002590179443359, |
|
"rewards/margins": 4.741377830505371, |
|
"rewards/real": 0.7387879490852356, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.1141649048625792e-07, |
|
"logits/generated": -2.7650415897369385, |
|
"logits/real": -2.9020564556121826, |
|
"logps/generated": -150.42185974121094, |
|
"logps/real": -206.5177001953125, |
|
"loss": 0.0838, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -4.431978702545166, |
|
"rewards/margins": 5.01816463470459, |
|
"rewards/real": 0.5861854553222656, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.219873150105708e-07, |
|
"logits/generated": -2.7214510440826416, |
|
"logits/real": -2.90799617767334, |
|
"logps/generated": -151.5577850341797, |
|
"logps/real": -270.7734680175781, |
|
"loss": 0.0901, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -4.565618515014648, |
|
"rewards/margins": 5.258214473724365, |
|
"rewards/real": 0.6925961375236511, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.3255813953488372e-07, |
|
"logits/generated": -2.695213556289673, |
|
"logits/real": -2.896963596343994, |
|
"logps/generated": -154.06976318359375, |
|
"logps/real": -225.1235809326172, |
|
"loss": 0.0563, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.846016883850098, |
|
"rewards/margins": 5.5229692459106445, |
|
"rewards/real": 0.6769517660140991, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.431289640591966e-07, |
|
"logits/generated": -2.684670925140381, |
|
"logits/real": -2.8483946323394775, |
|
"logps/generated": -158.3540496826172, |
|
"logps/real": -233.24765014648438, |
|
"loss": 0.0603, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.12518310546875, |
|
"rewards/margins": 5.8765363693237305, |
|
"rewards/real": 0.7513536810874939, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.536997885835095e-07, |
|
"logits/generated": -2.7210946083068848, |
|
"logits/real": -2.8621644973754883, |
|
"logps/generated": -155.71336364746094, |
|
"logps/real": -272.6509094238281, |
|
"loss": 0.0587, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.054671287536621, |
|
"rewards/margins": 5.770359039306641, |
|
"rewards/real": 0.7156881093978882, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.642706131078224e-07, |
|
"logits/generated": -2.684602737426758, |
|
"logits/real": -2.858766794204712, |
|
"logps/generated": -165.3872833251953, |
|
"logps/real": -220.634765625, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.2169084548950195, |
|
"rewards/margins": 6.802570343017578, |
|
"rewards/real": 0.585662305355072, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.748414376321353e-07, |
|
"logits/generated": -2.650390148162842, |
|
"logits/real": -2.83626389503479, |
|
"logps/generated": -169.95376586914062, |
|
"logps/real": -221.45565795898438, |
|
"loss": 0.046, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.422726631164551, |
|
"rewards/margins": 6.729206085205078, |
|
"rewards/real": 0.3064800202846527, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.854122621564482e-07, |
|
"logits/generated": -2.6237170696258545, |
|
"logits/real": -2.831840753555298, |
|
"logps/generated": -179.83026123046875, |
|
"logps/real": -269.57489013671875, |
|
"loss": 0.0466, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.582816123962402, |
|
"rewards/margins": 6.789361476898193, |
|
"rewards/real": 0.20654550194740295, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.959830866807611e-07, |
|
"logits/generated": -2.6729588508605957, |
|
"logits/real": -2.7799630165100098, |
|
"logps/generated": -166.50579833984375, |
|
"logps/real": -209.8294219970703, |
|
"loss": 0.0534, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -6.923952579498291, |
|
"rewards/margins": 6.987476348876953, |
|
"rewards/real": 0.06352332979440689, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.0655391120507393e-07, |
|
"logits/generated": -2.7076289653778076, |
|
"logits/real": -2.836456537246704, |
|
"logps/generated": -180.61297607421875, |
|
"logps/real": -224.1366424560547, |
|
"loss": 0.0573, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -7.590481758117676, |
|
"rewards/margins": 7.375975131988525, |
|
"rewards/real": -0.21450698375701904, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.1712473572938684e-07, |
|
"logits/generated": -2.563586473464966, |
|
"logits/real": -2.80302357673645, |
|
"logps/generated": -202.00930786132812, |
|
"logps/real": -261.15875244140625, |
|
"loss": 0.044, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -8.740743637084961, |
|
"rewards/margins": 8.463761329650879, |
|
"rewards/real": -0.27698156237602234, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2769556025369975e-07, |
|
"logits/generated": -2.6333580017089844, |
|
"logits/real": -2.798062562942505, |
|
"logps/generated": -204.22036743164062, |
|
"logps/real": -244.3586883544922, |
|
"loss": 0.0585, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -8.76515007019043, |
|
"rewards/margins": 7.9285407066345215, |
|
"rewards/real": -0.8366090059280396, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.3826638477801266e-07, |
|
"logits/generated": -2.666229248046875, |
|
"logits/real": -2.777111291885376, |
|
"logps/generated": -206.2598876953125, |
|
"logps/real": -258.5753173828125, |
|
"loss": 0.0494, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -9.166308403015137, |
|
"rewards/margins": 8.505932807922363, |
|
"rewards/real": -0.6603760719299316, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.4883720930232557e-07, |
|
"logits/generated": -2.61083984375, |
|
"logits/real": -2.7282021045684814, |
|
"logps/generated": -204.8091583251953, |
|
"logps/real": -260.3985595703125, |
|
"loss": 0.0304, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.159486770629883, |
|
"rewards/margins": 8.657907485961914, |
|
"rewards/real": -0.5015801191329956, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.594080338266385e-07, |
|
"logits/generated": -2.6197121143341064, |
|
"logits/real": -2.7505860328674316, |
|
"logps/generated": -198.1029815673828, |
|
"logps/real": -274.4460144042969, |
|
"loss": 0.0397, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.936513900756836, |
|
"rewards/margins": 8.366409301757812, |
|
"rewards/real": -0.570103108882904, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.699788583509514e-07, |
|
"logits/generated": -2.5747499465942383, |
|
"logits/real": -2.700770854949951, |
|
"logps/generated": -195.65858459472656, |
|
"logps/real": -264.3505859375, |
|
"loss": 0.0555, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -9.730626106262207, |
|
"rewards/margins": 7.978546142578125, |
|
"rewards/real": -1.7520811557769775, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8054968287526425e-07, |
|
"logits/generated": -2.644111156463623, |
|
"logits/real": -2.7844269275665283, |
|
"logps/generated": -187.1933135986328, |
|
"logps/real": -274.1761779785156, |
|
"loss": 0.0478, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -8.151183128356934, |
|
"rewards/margins": 7.859720706939697, |
|
"rewards/real": -0.29146260023117065, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.9112050739957716e-07, |
|
"logits/generated": -2.5913429260253906, |
|
"logits/real": -2.7488276958465576, |
|
"logps/generated": -188.7386016845703, |
|
"logps/real": -247.2688446044922, |
|
"loss": 0.0408, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -8.474884033203125, |
|
"rewards/margins": 8.537331581115723, |
|
"rewards/real": 0.062447331845760345, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.0169133192389e-07, |
|
"logits/generated": -2.5858843326568604, |
|
"logits/real": -2.7015249729156494, |
|
"logps/generated": -195.01470947265625, |
|
"logps/real": -203.84634399414062, |
|
"loss": 0.0328, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.704763412475586, |
|
"rewards/margins": 8.604458808898926, |
|
"rewards/real": -1.1003044843673706, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1226215644820293e-07, |
|
"logits/generated": -2.5373871326446533, |
|
"logits/real": -2.662891387939453, |
|
"logps/generated": -196.2036590576172, |
|
"logps/real": -228.1417236328125, |
|
"loss": 0.0345, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -10.00887680053711, |
|
"rewards/margins": 8.654512405395508, |
|
"rewards/real": -1.3543639183044434, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.2283298097251584e-07, |
|
"logits/generated": -2.5022478103637695, |
|
"logits/real": -2.657803773880005, |
|
"logps/generated": -204.67507934570312, |
|
"logps/real": -220.3046875, |
|
"loss": 0.0373, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -10.569047927856445, |
|
"rewards/margins": 8.992898941040039, |
|
"rewards/real": -1.5761499404907227, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.3340380549682875e-07, |
|
"logits/generated": -2.590941905975342, |
|
"logits/real": -2.6890556812286377, |
|
"logps/generated": -225.4229736328125, |
|
"logps/real": -251.73074340820312, |
|
"loss": 0.0488, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -11.226091384887695, |
|
"rewards/margins": 9.168951034545898, |
|
"rewards/real": -2.0571413040161133, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.439746300211416e-07, |
|
"logits/generated": -2.5578768253326416, |
|
"logits/real": -2.7013614177703857, |
|
"logps/generated": -228.48886108398438, |
|
"logps/real": -288.42193603515625, |
|
"loss": 0.0305, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -12.415830612182617, |
|
"rewards/margins": 10.27761459350586, |
|
"rewards/real": -2.1382155418395996, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/generated": -2.5126521587371826, |
|
"logits/real": -2.6469998359680176, |
|
"logps/generated": -242.6312255859375, |
|
"logps/real": -259.1160888671875, |
|
"loss": 0.0425, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -13.763842582702637, |
|
"rewards/margins": 11.178007125854492, |
|
"rewards/real": -2.5858359336853027, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.6511627906976743e-07, |
|
"logits/generated": -2.511451482772827, |
|
"logits/real": -2.610137939453125, |
|
"logps/generated": -224.3479766845703, |
|
"logps/real": -240.7174530029297, |
|
"loss": 0.0462, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -11.783953666687012, |
|
"rewards/margins": 9.211058616638184, |
|
"rewards/real": -2.572895050048828, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.7568710359408034e-07, |
|
"logits/generated": -2.508060932159424, |
|
"logits/real": -2.583813190460205, |
|
"logps/generated": -230.49850463867188, |
|
"logps/real": -259.2693786621094, |
|
"loss": 0.0409, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.978529930114746, |
|
"rewards/margins": 9.823976516723633, |
|
"rewards/real": -2.154552936553955, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.862579281183933e-07, |
|
"logits/generated": -2.4459714889526367, |
|
"logits/real": -2.5999927520751953, |
|
"logps/generated": -222.58071899414062, |
|
"logps/real": -281.602783203125, |
|
"loss": 0.0271, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -11.706878662109375, |
|
"rewards/margins": 9.985689163208008, |
|
"rewards/real": -1.7211902141571045, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.968287526427061e-07, |
|
"logits/generated": -2.5001561641693115, |
|
"logits/real": -2.614675998687744, |
|
"logps/generated": -231.21444702148438, |
|
"logps/real": -303.97711181640625, |
|
"loss": 0.0276, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.426594734191895, |
|
"rewards/margins": 10.142064094543457, |
|
"rewards/real": -2.2845306396484375, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.991774383078731e-07, |
|
"logits/generated": -2.4237747192382812, |
|
"logits/real": -2.5837903022766113, |
|
"logps/generated": -235.39791870117188, |
|
"logps/real": -256.73016357421875, |
|
"loss": 0.031, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -13.370112419128418, |
|
"rewards/margins": 10.403009414672852, |
|
"rewards/real": -2.967103958129883, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.980023501762632e-07, |
|
"logits/generated": -2.475121259689331, |
|
"logits/real": -2.6039159297943115, |
|
"logps/generated": -255.3181915283203, |
|
"logps/real": -247.61093139648438, |
|
"loss": 0.0309, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -15.310647964477539, |
|
"rewards/margins": 11.72684383392334, |
|
"rewards/real": -3.583803176879883, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.968272620446533e-07, |
|
"logits/generated": -2.4924330711364746, |
|
"logits/real": -2.595700979232788, |
|
"logps/generated": -268.7317199707031, |
|
"logps/real": -251.86392211914062, |
|
"loss": 0.0354, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -15.943933486938477, |
|
"rewards/margins": 12.208364486694336, |
|
"rewards/real": -3.735567808151245, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.956521739130435e-07, |
|
"logits/generated": -2.5180201530456543, |
|
"logits/real": -2.5667471885681152, |
|
"logps/generated": -257.37322998046875, |
|
"logps/real": -284.225830078125, |
|
"loss": 0.0206, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -14.81371021270752, |
|
"rewards/margins": 11.491992950439453, |
|
"rewards/real": -3.3217170238494873, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944770857814336e-07, |
|
"logits/generated": -2.4635119438171387, |
|
"logits/real": -2.536935567855835, |
|
"logps/generated": -259.5522766113281, |
|
"logps/real": -244.10079956054688, |
|
"loss": 0.026, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -15.024828910827637, |
|
"rewards/margins": 12.193452835083008, |
|
"rewards/real": -2.831378221511841, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.933019976498237e-07, |
|
"logits/generated": -2.4268784523010254, |
|
"logits/real": -2.527644157409668, |
|
"logps/generated": -250.21347045898438, |
|
"logps/real": -240.0506591796875, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.466471672058105, |
|
"rewards/margins": 11.934263229370117, |
|
"rewards/real": -2.5322086811065674, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.921269095182138e-07, |
|
"logits/generated": -2.436392068862915, |
|
"logits/real": -2.5244240760803223, |
|
"logps/generated": -228.0118408203125, |
|
"logps/real": -242.95556640625, |
|
"loss": 0.034, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -11.751724243164062, |
|
"rewards/margins": 10.017329216003418, |
|
"rewards/real": -1.7343953847885132, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.909518213866039e-07, |
|
"logits/generated": -2.4732158184051514, |
|
"logits/real": -2.5478568077087402, |
|
"logps/generated": -229.0042266845703, |
|
"logps/real": -236.5896453857422, |
|
"loss": 0.0546, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -11.801587104797363, |
|
"rewards/margins": 10.913532257080078, |
|
"rewards/real": -0.8880546689033508, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.897767332549941e-07, |
|
"logits/generated": -2.5068588256835938, |
|
"logits/real": -2.603188991546631, |
|
"logps/generated": -220.4393310546875, |
|
"logps/real": -260.52032470703125, |
|
"loss": 0.0281, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.580656051635742, |
|
"rewards/margins": 10.65870475769043, |
|
"rewards/real": -0.9219503402709961, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.886016451233842e-07, |
|
"logits/generated": -2.5237538814544678, |
|
"logits/real": -2.5715441703796387, |
|
"logps/generated": -254.0616912841797, |
|
"logps/real": -278.89947509765625, |
|
"loss": 0.0156, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.214986801147461, |
|
"rewards/margins": 12.340234756469727, |
|
"rewards/real": -0.8747501373291016, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.874265569917743e-07, |
|
"logits/generated": -2.4976603984832764, |
|
"logits/real": -2.5703940391540527, |
|
"logps/generated": -235.39816284179688, |
|
"logps/real": -229.18832397460938, |
|
"loss": 0.0568, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -12.485773086547852, |
|
"rewards/margins": 10.787823677062988, |
|
"rewards/real": -1.6979477405548096, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.862514688601645e-07, |
|
"logits/generated": -2.4129319190979004, |
|
"logits/real": -2.5519614219665527, |
|
"logps/generated": -232.7602996826172, |
|
"logps/real": -230.69338989257812, |
|
"loss": 0.0194, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -12.507562637329102, |
|
"rewards/margins": 10.88932991027832, |
|
"rewards/real": -1.6182336807250977, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.850763807285546e-07, |
|
"logits/generated": -2.368098735809326, |
|
"logits/real": -2.512528896331787, |
|
"logps/generated": -256.1530456542969, |
|
"logps/real": -293.58050537109375, |
|
"loss": 0.0229, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -14.072511672973633, |
|
"rewards/margins": 12.025641441345215, |
|
"rewards/real": -2.046870231628418, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.839012925969447e-07, |
|
"logits/generated": -2.349189043045044, |
|
"logits/real": -2.4440290927886963, |
|
"logps/generated": -256.9201354980469, |
|
"logps/real": -251.1465606689453, |
|
"loss": 0.0206, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -15.40594482421875, |
|
"rewards/margins": 12.272120475769043, |
|
"rewards/real": -3.1338250637054443, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.827262044653348e-07, |
|
"logits/generated": -2.3456802368164062, |
|
"logits/real": -2.4477171897888184, |
|
"logps/generated": -243.4130401611328, |
|
"logps/real": -298.19415283203125, |
|
"loss": 0.0407, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -13.486459732055664, |
|
"rewards/margins": 11.11083984375, |
|
"rewards/real": -2.3756182193756104, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.81551116333725e-07, |
|
"logits/generated": -2.3130345344543457, |
|
"logits/real": -2.4230241775512695, |
|
"logps/generated": -234.4820556640625, |
|
"logps/real": -215.69741821289062, |
|
"loss": 0.0323, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -13.71208667755127, |
|
"rewards/margins": 12.096234321594238, |
|
"rewards/real": -1.6158527135849, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.803760282021151e-07, |
|
"logits/generated": -2.439054012298584, |
|
"logits/real": -2.489902973175049, |
|
"logps/generated": -234.609619140625, |
|
"logps/real": -227.06698608398438, |
|
"loss": 0.0253, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -13.273165702819824, |
|
"rewards/margins": 10.841035842895508, |
|
"rewards/real": -2.432131052017212, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.792009400705052e-07, |
|
"logits/generated": -2.3417305946350098, |
|
"logits/real": -2.4370510578155518, |
|
"logps/generated": -250.015380859375, |
|
"logps/real": -228.6704559326172, |
|
"loss": 0.0298, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -14.521652221679688, |
|
"rewards/margins": 12.285801887512207, |
|
"rewards/real": -2.2358508110046387, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.780258519388953e-07, |
|
"logits/generated": -2.3547120094299316, |
|
"logits/real": -2.4297335147857666, |
|
"logps/generated": -259.03955078125, |
|
"logps/real": -303.78106689453125, |
|
"loss": 0.0474, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -13.853286743164062, |
|
"rewards/margins": 12.204731941223145, |
|
"rewards/real": -1.6485567092895508, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.768507638072856e-07, |
|
"logits/generated": -2.316434383392334, |
|
"logits/real": -2.376094341278076, |
|
"logps/generated": -248.5943603515625, |
|
"logps/real": -269.3667297363281, |
|
"loss": 0.0195, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -14.337122917175293, |
|
"rewards/margins": 11.638051986694336, |
|
"rewards/real": -2.6990718841552734, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.7567567567567566e-07, |
|
"logits/generated": -2.3710427284240723, |
|
"logits/real": -2.415874481201172, |
|
"logps/generated": -254.5215301513672, |
|
"logps/real": -308.25048828125, |
|
"loss": 0.0255, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -13.657424926757812, |
|
"rewards/margins": 11.751736640930176, |
|
"rewards/real": -1.9056885242462158, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.745005875440658e-07, |
|
"logits/generated": -2.224567413330078, |
|
"logits/real": -2.3295130729675293, |
|
"logps/generated": -244.08151245117188, |
|
"logps/real": -279.2975769042969, |
|
"loss": 0.0331, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -13.952921867370605, |
|
"rewards/margins": 11.643460273742676, |
|
"rewards/real": -2.3094632625579834, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.733254994124559e-07, |
|
"logits/generated": -2.297942638397217, |
|
"logits/real": -2.3956637382507324, |
|
"logps/generated": -259.2550354003906, |
|
"logps/real": -235.402587890625, |
|
"loss": 0.0367, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -15.151153564453125, |
|
"rewards/margins": 13.520169258117676, |
|
"rewards/real": -1.630985975265503, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.72150411280846e-07, |
|
"logits/generated": -2.262821674346924, |
|
"logits/real": -2.3863885402679443, |
|
"logps/generated": -257.7832336425781, |
|
"logps/real": -250.14306640625, |
|
"loss": 0.0332, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -15.589390754699707, |
|
"rewards/margins": 14.08702564239502, |
|
"rewards/real": -1.502366065979004, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.7097532314923617e-07, |
|
"logits/generated": -2.29247784614563, |
|
"logits/real": -2.328556537628174, |
|
"logps/generated": -274.10650634765625, |
|
"logps/real": -247.0130615234375, |
|
"loss": 0.0248, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.48426628112793, |
|
"rewards/margins": 13.664430618286133, |
|
"rewards/real": -2.8198394775390625, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.6980023501762627e-07, |
|
"logits/generated": -2.256700277328491, |
|
"logits/real": -2.3285982608795166, |
|
"logps/generated": -269.7905578613281, |
|
"logps/real": -279.09906005859375, |
|
"loss": 0.0169, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -15.461273193359375, |
|
"rewards/margins": 13.154187202453613, |
|
"rewards/real": -2.3070871829986572, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.686251468860165e-07, |
|
"logits/generated": -2.2194008827209473, |
|
"logits/real": -2.3112258911132812, |
|
"logps/generated": -287.9308776855469, |
|
"logps/real": -242.48416137695312, |
|
"loss": 0.0383, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.926103591918945, |
|
"rewards/margins": 14.351516723632812, |
|
"rewards/real": -3.5745856761932373, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.674500587544066e-07, |
|
"logits/generated": -2.3291237354278564, |
|
"logits/real": -2.4051151275634766, |
|
"logps/generated": -270.6801452636719, |
|
"logps/real": -284.98931884765625, |
|
"loss": 0.0308, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -15.692561149597168, |
|
"rewards/margins": 12.790186882019043, |
|
"rewards/real": -2.902374267578125, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.662749706227967e-07, |
|
"logits/generated": -2.228729724884033, |
|
"logits/real": -2.342529773712158, |
|
"logps/generated": -279.6976318359375, |
|
"logps/real": -259.60150146484375, |
|
"loss": 0.0426, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -17.11913299560547, |
|
"rewards/margins": 13.92261028289795, |
|
"rewards/real": -3.1965222358703613, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6509988249118683e-07, |
|
"logits/generated": -2.215961217880249, |
|
"logits/real": -2.3359737396240234, |
|
"logps/generated": -292.3762512207031, |
|
"logps/real": -271.7994384765625, |
|
"loss": 0.0216, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.02857780456543, |
|
"rewards/margins": 14.50737190246582, |
|
"rewards/real": -3.521205425262451, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6392479435957693e-07, |
|
"logits/generated": -2.204655408859253, |
|
"logits/real": -2.357146739959717, |
|
"logps/generated": -287.6128845214844, |
|
"logps/real": -313.80096435546875, |
|
"loss": 0.0259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.052814483642578, |
|
"rewards/margins": 14.257978439331055, |
|
"rewards/real": -3.7948365211486816, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6274970622796704e-07, |
|
"logits/generated": -2.2601847648620605, |
|
"logits/real": -2.416104555130005, |
|
"logps/generated": -297.5831604003906, |
|
"logps/real": -311.974609375, |
|
"loss": 0.0451, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.84690284729004, |
|
"rewards/margins": 14.774507522583008, |
|
"rewards/real": -4.072394371032715, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6157461809635724e-07, |
|
"logits/generated": -2.2359187602996826, |
|
"logits/real": -2.4105429649353027, |
|
"logps/generated": -263.5639953613281, |
|
"logps/real": -253.03262329101562, |
|
"loss": 0.0287, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -16.32426643371582, |
|
"rewards/margins": 13.009058952331543, |
|
"rewards/real": -3.315206527709961, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6039952996474734e-07, |
|
"logits/generated": -2.2089052200317383, |
|
"logits/real": -2.3267462253570557, |
|
"logps/generated": -292.5827331542969, |
|
"logps/real": -283.88092041015625, |
|
"loss": 0.0183, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.413471221923828, |
|
"rewards/margins": 14.490208625793457, |
|
"rewards/real": -3.92326283454895, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.5922444183313745e-07, |
|
"logits/generated": -2.1738524436950684, |
|
"logits/real": -2.3648948669433594, |
|
"logps/generated": -283.103759765625, |
|
"logps/real": -276.14984130859375, |
|
"loss": 0.0277, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -17.33072280883789, |
|
"rewards/margins": 14.203720092773438, |
|
"rewards/real": -3.1270031929016113, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.580493537015276e-07, |
|
"logits/generated": -2.140481948852539, |
|
"logits/real": -2.2898101806640625, |
|
"logps/generated": -279.39434814453125, |
|
"logps/real": -256.742431640625, |
|
"loss": 0.0236, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -17.80463218688965, |
|
"rewards/margins": 14.53624153137207, |
|
"rewards/real": -3.2683918476104736, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.568742655699177e-07, |
|
"logits/generated": -2.1728413105010986, |
|
"logits/real": -2.299516201019287, |
|
"logps/generated": -290.2254943847656, |
|
"logps/real": -296.55792236328125, |
|
"loss": 0.0188, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.140968322753906, |
|
"rewards/margins": 14.465237617492676, |
|
"rewards/real": -3.675732374191284, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5569917743830786e-07, |
|
"logits/generated": -2.078281879425049, |
|
"logits/real": -2.2794442176818848, |
|
"logps/generated": -303.69927978515625, |
|
"logps/real": -312.5650329589844, |
|
"loss": 0.029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.2463321685791, |
|
"rewards/margins": 15.750595092773438, |
|
"rewards/real": -3.4957356452941895, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.54524089306698e-07, |
|
"logits/generated": -2.10638689994812, |
|
"logits/real": -2.2108185291290283, |
|
"logps/generated": -272.5052185058594, |
|
"logps/real": -257.61053466796875, |
|
"loss": 0.0467, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -16.4874210357666, |
|
"rewards/margins": 13.006558418273926, |
|
"rewards/real": -3.4808602333068848, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.533490011750881e-07, |
|
"logits/generated": -2.015160083770752, |
|
"logits/real": -2.078425884246826, |
|
"logps/generated": -280.7167663574219, |
|
"logps/real": -296.03411865234375, |
|
"loss": 0.0448, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -17.296838760375977, |
|
"rewards/margins": 13.392329216003418, |
|
"rewards/real": -3.9045073986053467, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.521739130434782e-07, |
|
"logits/generated": -2.0493757724761963, |
|
"logits/real": -2.024940252304077, |
|
"logps/generated": -295.7134704589844, |
|
"logps/real": -240.38882446289062, |
|
"loss": 0.0222, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -19.699758529663086, |
|
"rewards/margins": 15.107478141784668, |
|
"rewards/real": -4.59227991104126, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.5099882491186837e-07, |
|
"logits/generated": -2.033402681350708, |
|
"logits/real": -2.106812000274658, |
|
"logps/generated": -281.4131774902344, |
|
"logps/real": -348.5048522949219, |
|
"loss": 0.0332, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -17.562549591064453, |
|
"rewards/margins": 13.78009033203125, |
|
"rewards/real": -3.782458543777466, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.4982373678025847e-07, |
|
"logits/generated": -1.9644243717193604, |
|
"logits/real": -1.9998550415039062, |
|
"logps/generated": -292.21514892578125, |
|
"logps/real": -269.4073181152344, |
|
"loss": 0.0295, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -17.54524803161621, |
|
"rewards/margins": 13.79992961883545, |
|
"rewards/real": -3.7453200817108154, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.486486486486487e-07, |
|
"logits/generated": -2.1318039894104004, |
|
"logits/real": -2.152733564376831, |
|
"logps/generated": -283.0270080566406, |
|
"logps/real": -276.83416748046875, |
|
"loss": 0.0394, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -16.132619857788086, |
|
"rewards/margins": 13.454730033874512, |
|
"rewards/real": -2.677889347076416, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.474735605170388e-07, |
|
"logits/generated": -2.255167245864868, |
|
"logits/real": -2.1642110347747803, |
|
"logps/generated": -267.664306640625, |
|
"logps/real": -241.0842742919922, |
|
"loss": 0.0261, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.635882377624512, |
|
"rewards/margins": 13.6681547164917, |
|
"rewards/real": -1.9677283763885498, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.462984723854289e-07, |
|
"logits/generated": -2.261536121368408, |
|
"logits/real": -2.163490056991577, |
|
"logps/generated": -273.19061279296875, |
|
"logps/real": -238.5223846435547, |
|
"loss": 0.0232, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -16.670108795166016, |
|
"rewards/margins": 14.045565605163574, |
|
"rewards/real": -2.624541759490967, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.4512338425381903e-07, |
|
"logits/generated": -2.2644219398498535, |
|
"logits/real": -2.1735246181488037, |
|
"logps/generated": -272.95294189453125, |
|
"logps/real": -242.27676391601562, |
|
"loss": 0.0204, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.10274887084961, |
|
"rewards/margins": 13.76220989227295, |
|
"rewards/real": -2.340538740158081, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.4394829612220913e-07, |
|
"logits/generated": -2.144902467727661, |
|
"logits/real": -2.092916250228882, |
|
"logps/generated": -276.24700927734375, |
|
"logps/real": -269.9608154296875, |
|
"loss": 0.0255, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -17.573253631591797, |
|
"rewards/margins": 15.12501049041748, |
|
"rewards/real": -2.44824481010437, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4277320799059924e-07, |
|
"logits/generated": -2.154425621032715, |
|
"logits/real": -2.0945446491241455, |
|
"logps/generated": -270.79022216796875, |
|
"logps/real": -268.6008605957031, |
|
"loss": 0.0272, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -17.448904037475586, |
|
"rewards/margins": 14.52673053741455, |
|
"rewards/real": -2.9221715927124023, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4159811985898944e-07, |
|
"logits/generated": -2.2013556957244873, |
|
"logits/real": -2.1177525520324707, |
|
"logps/generated": -279.6724548339844, |
|
"logps/real": -320.83197021484375, |
|
"loss": 0.0348, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -17.04720687866211, |
|
"rewards/margins": 14.274810791015625, |
|
"rewards/real": -2.7723937034606934, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4042303172737954e-07, |
|
"logits/generated": -2.160095453262329, |
|
"logits/real": -2.0706989765167236, |
|
"logps/generated": -312.0510559082031, |
|
"logps/real": -299.41876220703125, |
|
"loss": 0.034, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.43877410888672, |
|
"rewards/margins": 15.981279373168945, |
|
"rewards/real": -4.457496643066406, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3924794359576964e-07, |
|
"logits/generated": -2.0770351886749268, |
|
"logits/real": -2.0986149311065674, |
|
"logps/generated": -307.2867126464844, |
|
"logps/real": -285.21368408203125, |
|
"loss": 0.0327, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.678424835205078, |
|
"rewards/margins": 14.942266464233398, |
|
"rewards/real": -4.736159324645996, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.380728554641598e-07, |
|
"logits/generated": -2.0771639347076416, |
|
"logits/real": -2.0399820804595947, |
|
"logps/generated": -300.96954345703125, |
|
"logps/real": -260.22320556640625, |
|
"loss": 0.0206, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.797773361206055, |
|
"rewards/margins": 15.988889694213867, |
|
"rewards/real": -3.8088836669921875, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.368977673325499e-07, |
|
"logits/generated": -2.097740650177002, |
|
"logits/real": -2.0250871181488037, |
|
"logps/generated": -314.9656677246094, |
|
"logps/real": -258.19146728515625, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -21.372987747192383, |
|
"rewards/margins": 15.312896728515625, |
|
"rewards/real": -6.060091495513916, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.3572267920094e-07, |
|
"logits/generated": -2.2000434398651123, |
|
"logits/real": -2.0983645915985107, |
|
"logps/generated": -341.3284606933594, |
|
"logps/real": -298.4183044433594, |
|
"loss": 0.0403, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.781925201416016, |
|
"rewards/margins": 16.64480972290039, |
|
"rewards/real": -6.137114524841309, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.345475910693302e-07, |
|
"logits/generated": -2.0708365440368652, |
|
"logits/real": -2.0243687629699707, |
|
"logps/generated": -331.35162353515625, |
|
"logps/real": -302.08990478515625, |
|
"loss": 0.0176, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.080236434936523, |
|
"rewards/margins": 15.170089721679688, |
|
"rewards/real": -6.9101457595825195, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.333725029377203e-07, |
|
"logits/generated": -2.1169724464416504, |
|
"logits/real": -2.1490702629089355, |
|
"logps/generated": -294.83856201171875, |
|
"logps/real": -280.41326904296875, |
|
"loss": 0.0464, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -19.257888793945312, |
|
"rewards/margins": 13.788156509399414, |
|
"rewards/real": -5.469732761383057, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.3219741480611046e-07, |
|
"logits/generated": -2.126469135284424, |
|
"logits/real": -2.1890926361083984, |
|
"logps/generated": -300.55047607421875, |
|
"logps/real": -277.67425537109375, |
|
"loss": 0.0353, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.871578216552734, |
|
"rewards/margins": 15.217971801757812, |
|
"rewards/real": -4.6536054611206055, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.3102232667450057e-07, |
|
"logits/generated": -2.235631227493286, |
|
"logits/real": -2.2010443210601807, |
|
"logps/generated": -284.49578857421875, |
|
"logps/real": -236.76010131835938, |
|
"loss": 0.0258, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -17.960590362548828, |
|
"rewards/margins": 13.84907341003418, |
|
"rewards/real": -4.111515522003174, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2984723854289067e-07, |
|
"logits/generated": -2.219752073287964, |
|
"logits/real": -2.2477521896362305, |
|
"logps/generated": -294.33184814453125, |
|
"logps/real": -264.7086486816406, |
|
"loss": 0.0352, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -19.106578826904297, |
|
"rewards/margins": 14.367040634155273, |
|
"rewards/real": -4.73953914642334, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.286721504112809e-07, |
|
"logits/generated": -2.257949113845825, |
|
"logits/real": -2.2932074069976807, |
|
"logps/generated": -278.9601135253906, |
|
"logps/real": -228.11141967773438, |
|
"loss": 0.0314, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.96208953857422, |
|
"rewards/margins": 13.705500602722168, |
|
"rewards/real": -3.2565879821777344, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.27497062279671e-07, |
|
"logits/generated": -2.3263726234436035, |
|
"logits/real": -2.300952434539795, |
|
"logps/generated": -263.8218078613281, |
|
"logps/real": -272.5172119140625, |
|
"loss": 0.0365, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -15.995553970336914, |
|
"rewards/margins": 12.962613105773926, |
|
"rewards/real": -3.032942056655884, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.263219741480611e-07, |
|
"logits/generated": -2.301636219024658, |
|
"logits/real": -2.3038859367370605, |
|
"logps/generated": -280.7583923339844, |
|
"logps/real": -293.1648254394531, |
|
"loss": 0.0449, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -17.14383888244629, |
|
"rewards/margins": 13.275634765625, |
|
"rewards/real": -3.8682055473327637, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.2514688601645123e-07, |
|
"logits/generated": -2.284412384033203, |
|
"logits/real": -2.300344944000244, |
|
"logps/generated": -271.6402893066406, |
|
"logps/real": -303.3867492675781, |
|
"loss": 0.0174, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.277772903442383, |
|
"rewards/margins": 12.891380310058594, |
|
"rewards/real": -3.386394500732422, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.2397179788484133e-07, |
|
"logits/generated": -2.301110029220581, |
|
"logits/real": -2.283402681350708, |
|
"logps/generated": -274.2630615234375, |
|
"logps/real": -295.1095886230469, |
|
"loss": 0.0369, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -17.09488296508789, |
|
"rewards/margins": 13.71021842956543, |
|
"rewards/real": -3.3846638202667236, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2279670975323143e-07, |
|
"logits/generated": -2.281606912612915, |
|
"logits/real": -2.1891345977783203, |
|
"logps/generated": -287.2510070800781, |
|
"logps/real": -249.9571075439453, |
|
"loss": 0.0218, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.902332305908203, |
|
"rewards/margins": 13.83057689666748, |
|
"rewards/real": -4.071754455566406, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2162162162162164e-07, |
|
"logits/generated": -2.1958327293395996, |
|
"logits/real": -2.1260383129119873, |
|
"logps/generated": -294.9903869628906, |
|
"logps/real": -247.8839111328125, |
|
"loss": 0.0258, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.26121711730957, |
|
"rewards/margins": 15.105463027954102, |
|
"rewards/real": -4.155752658843994, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.2044653349001174e-07, |
|
"logits/generated": -2.204456090927124, |
|
"logits/real": -2.0998289585113525, |
|
"logps/generated": -301.88909912109375, |
|
"logps/real": -258.03753662109375, |
|
"loss": 0.016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.723281860351562, |
|
"rewards/margins": 14.72815990447998, |
|
"rewards/real": -3.9951229095458984, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1927144535840184e-07, |
|
"logits/generated": -2.1646523475646973, |
|
"logits/real": -2.1553101539611816, |
|
"logps/generated": -297.005126953125, |
|
"logps/real": -272.0830078125, |
|
"loss": 0.0247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.245548248291016, |
|
"rewards/margins": 15.432889938354492, |
|
"rewards/real": -2.8126583099365234, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.18096357226792e-07, |
|
"logits/generated": -2.1961166858673096, |
|
"logits/real": -2.1357979774475098, |
|
"logps/generated": -313.2271423339844, |
|
"logps/real": -236.79226684570312, |
|
"loss": 0.0219, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.204418182373047, |
|
"rewards/margins": 16.67409896850586, |
|
"rewards/real": -3.530319929122925, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.169212690951821e-07, |
|
"logits/generated": -2.21925687789917, |
|
"logits/real": -2.141052484512329, |
|
"logps/generated": -313.30389404296875, |
|
"logps/real": -259.11871337890625, |
|
"loss": 0.0232, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.646581649780273, |
|
"rewards/margins": 16.518220901489258, |
|
"rewards/real": -3.1283624172210693, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.157461809635722e-07, |
|
"logits/generated": -2.2323949337005615, |
|
"logits/real": -2.228459119796753, |
|
"logps/generated": -323.08868408203125, |
|
"logps/real": -308.6040344238281, |
|
"loss": 0.049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.510068893432617, |
|
"rewards/margins": 17.15546989440918, |
|
"rewards/real": -3.3545982837677, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.145710928319624e-07, |
|
"logits/generated": -2.214876890182495, |
|
"logits/real": -2.1600141525268555, |
|
"logps/generated": -309.6695556640625, |
|
"logps/real": -240.92129516601562, |
|
"loss": 0.02, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.384716033935547, |
|
"rewards/margins": 17.16757583618164, |
|
"rewards/real": -3.2171401977539062, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.133960047003525e-07, |
|
"logits/generated": -2.215092182159424, |
|
"logits/real": -2.1977219581604004, |
|
"logps/generated": -305.69342041015625, |
|
"logps/real": -273.934326171875, |
|
"loss": 0.0263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.206113815307617, |
|
"rewards/margins": 16.1297550201416, |
|
"rewards/real": -3.0763602256774902, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.1222091656874266e-07, |
|
"logits/generated": -2.2863411903381348, |
|
"logits/real": -2.1990232467651367, |
|
"logps/generated": -325.60723876953125, |
|
"logps/real": -249.89712524414062, |
|
"loss": 0.015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.734128952026367, |
|
"rewards/margins": 17.138214111328125, |
|
"rewards/real": -4.595917224884033, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.1104582843713276e-07, |
|
"logits/generated": -2.2235512733459473, |
|
"logits/real": -2.2082717418670654, |
|
"logps/generated": -331.63336181640625, |
|
"logps/real": -294.22552490234375, |
|
"loss": 0.0222, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.272472381591797, |
|
"rewards/margins": 16.936038970947266, |
|
"rewards/real": -5.336433410644531, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.0987074030552287e-07, |
|
"logits/generated": -2.2295403480529785, |
|
"logits/real": -2.2075161933898926, |
|
"logps/generated": -303.949951171875, |
|
"logps/real": -290.83447265625, |
|
"loss": 0.0344, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.734106063842773, |
|
"rewards/margins": 16.062904357910156, |
|
"rewards/real": -3.6712021827697754, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.0869565217391307e-07, |
|
"logits/generated": -2.329646110534668, |
|
"logits/real": -2.2754032611846924, |
|
"logps/generated": -284.7631530761719, |
|
"logps/real": -276.9230651855469, |
|
"loss": 0.0143, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -17.821731567382812, |
|
"rewards/margins": 14.999728202819824, |
|
"rewards/real": -2.82200288772583, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.075205640423032e-07, |
|
"logits/generated": -2.24617075920105, |
|
"logits/real": -2.208388090133667, |
|
"logps/generated": -290.4934997558594, |
|
"logps/real": -255.40194702148438, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.316036224365234, |
|
"rewards/margins": 14.29789924621582, |
|
"rewards/real": -4.018136024475098, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.063454759106933e-07, |
|
"logits/generated": -2.3056511878967285, |
|
"logits/real": -2.1358304023742676, |
|
"logps/generated": -316.7949523925781, |
|
"logps/real": -265.46905517578125, |
|
"loss": 0.0177, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.009098052978516, |
|
"rewards/margins": 17.545692443847656, |
|
"rewards/real": -3.4634087085723877, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0517038777908343e-07, |
|
"logits/generated": -2.2054314613342285, |
|
"logits/real": -2.018893003463745, |
|
"logps/generated": -298.08404541015625, |
|
"logps/real": -229.26156616210938, |
|
"loss": 0.0245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.959003448486328, |
|
"rewards/margins": 16.036542892456055, |
|
"rewards/real": -3.9224586486816406, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0399529964747353e-07, |
|
"logits/generated": -2.284658908843994, |
|
"logits/real": -2.1389191150665283, |
|
"logps/generated": -315.28826904296875, |
|
"logps/real": -349.39276123046875, |
|
"loss": 0.026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.028697967529297, |
|
"rewards/margins": 16.19293212890625, |
|
"rewards/real": -3.8357670307159424, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0282021151586363e-07, |
|
"logits/generated": -2.1421546936035156, |
|
"logits/real": -2.045635223388672, |
|
"logps/generated": -334.4831237792969, |
|
"logps/real": -308.2193908691406, |
|
"loss": 0.0187, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -22.108678817749023, |
|
"rewards/margins": 16.95539093017578, |
|
"rewards/real": -5.153290748596191, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0164512338425384e-07, |
|
"logits/generated": -2.219916582107544, |
|
"logits/real": -2.0313782691955566, |
|
"logps/generated": -324.0411682128906, |
|
"logps/real": -280.9827575683594, |
|
"loss": 0.036, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -21.63995361328125, |
|
"rewards/margins": 16.822376251220703, |
|
"rewards/real": -4.81757926940918, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.0047003525264394e-07, |
|
"logits/generated": -2.0729808807373047, |
|
"logits/real": -1.9643628597259521, |
|
"logps/generated": -303.87921142578125, |
|
"logps/real": -236.41629028320312, |
|
"loss": 0.0336, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.983474731445312, |
|
"rewards/margins": 16.176816940307617, |
|
"rewards/real": -3.806657075881958, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9929494712103404e-07, |
|
"logits/generated": -2.189826488494873, |
|
"logits/real": -2.0320651531219482, |
|
"logps/generated": -296.17340087890625, |
|
"logps/real": -300.67254638671875, |
|
"loss": 0.0181, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.916046142578125, |
|
"rewards/margins": 15.765054702758789, |
|
"rewards/real": -3.150991439819336, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.981198589894242e-07, |
|
"logits/generated": -2.255155086517334, |
|
"logits/real": -2.0348448753356934, |
|
"logps/generated": -292.5631408691406, |
|
"logps/real": -286.8785705566406, |
|
"loss": 0.0134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.44003677368164, |
|
"rewards/margins": 14.815027236938477, |
|
"rewards/real": -3.625012159347534, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.969447708578143e-07, |
|
"logits/generated": -2.151961088180542, |
|
"logits/real": -1.9714977741241455, |
|
"logps/generated": -295.259765625, |
|
"logps/real": -267.87994384765625, |
|
"loss": 0.0153, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.496400833129883, |
|
"rewards/margins": 14.87242317199707, |
|
"rewards/real": -3.6239781379699707, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.957696827262044e-07, |
|
"logits/generated": -2.140820026397705, |
|
"logits/real": -1.9414198398590088, |
|
"logps/generated": -283.1053161621094, |
|
"logps/real": -241.42709350585938, |
|
"loss": 0.0286, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -19.189056396484375, |
|
"rewards/margins": 14.983909606933594, |
|
"rewards/real": -4.2051472663879395, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.945945945945946e-07, |
|
"logits/generated": -2.200963258743286, |
|
"logits/real": -2.045172691345215, |
|
"logps/generated": -293.6392517089844, |
|
"logps/real": -296.48577880859375, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.247238159179688, |
|
"rewards/margins": 14.320365905761719, |
|
"rewards/real": -3.926875352859497, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.934195064629847e-07, |
|
"logits/generated": -2.1450352668762207, |
|
"logits/real": -2.019554376602173, |
|
"logps/generated": -287.7925720214844, |
|
"logps/real": -268.22845458984375, |
|
"loss": 0.013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.483325958251953, |
|
"rewards/margins": 14.118812561035156, |
|
"rewards/real": -4.364512920379639, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9224441833137486e-07, |
|
"logits/generated": -2.3144712448120117, |
|
"logits/real": -1.952749252319336, |
|
"logps/generated": -294.29168701171875, |
|
"logps/real": -282.1815490722656, |
|
"loss": 0.0145, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.526988983154297, |
|
"rewards/margins": 14.965646743774414, |
|
"rewards/real": -3.561342716217041, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.9106933019976496e-07, |
|
"logits/generated": -2.3015847206115723, |
|
"logits/real": -2.000420093536377, |
|
"logps/generated": -287.97747802734375, |
|
"logps/real": -263.9286804199219, |
|
"loss": 0.018, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -18.587892532348633, |
|
"rewards/margins": 13.602892875671387, |
|
"rewards/real": -4.984999656677246, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.8989424206815507e-07, |
|
"logits/generated": -2.424217462539673, |
|
"logits/real": -1.9993460178375244, |
|
"logps/generated": -296.1605529785156, |
|
"logps/real": -274.6358337402344, |
|
"loss": 0.0437, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.653118133544922, |
|
"rewards/margins": 16.015432357788086, |
|
"rewards/real": -3.637685775756836, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.887191539365452e-07, |
|
"logits/generated": -2.4005563259124756, |
|
"logits/real": -2.1184263229370117, |
|
"logps/generated": -280.97833251953125, |
|
"logps/real": -252.4258270263672, |
|
"loss": 0.0204, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -17.730958938598633, |
|
"rewards/margins": 13.906275749206543, |
|
"rewards/real": -3.824683666229248, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.8754406580493537e-07, |
|
"logits/generated": -2.380354642868042, |
|
"logits/real": -2.1375575065612793, |
|
"logps/generated": -308.86285400390625, |
|
"logps/real": -265.4056701660156, |
|
"loss": 0.0189, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.611974716186523, |
|
"rewards/margins": 15.560144424438477, |
|
"rewards/real": -4.051828384399414, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.863689776733255e-07, |
|
"logits/generated": -2.4837889671325684, |
|
"logits/real": -2.1749095916748047, |
|
"logps/generated": -292.39959716796875, |
|
"logps/real": -292.68499755859375, |
|
"loss": 0.0178, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -17.81622886657715, |
|
"rewards/margins": 13.84477424621582, |
|
"rewards/real": -3.97145414352417, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8519388954171563e-07, |
|
"logits/generated": -2.411600351333618, |
|
"logits/real": -2.198923110961914, |
|
"logps/generated": -272.6438903808594, |
|
"logps/real": -268.46417236328125, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.41283416748047, |
|
"rewards/margins": 13.593576431274414, |
|
"rewards/real": -3.8192572593688965, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.8401880141010573e-07, |
|
"logits/generated": -2.5035364627838135, |
|
"logits/real": -2.099947452545166, |
|
"logps/generated": -310.13330078125, |
|
"logps/real": -264.05078125, |
|
"loss": 0.0172, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.967391967773438, |
|
"rewards/margins": 15.722890853881836, |
|
"rewards/real": -5.244499206542969, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.8284371327849583e-07, |
|
"logits/generated": -2.3421576023101807, |
|
"logits/real": -2.2294716835021973, |
|
"logps/generated": -308.58416748046875, |
|
"logps/real": -290.80450439453125, |
|
"loss": 0.0264, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.871484756469727, |
|
"rewards/margins": 14.30445384979248, |
|
"rewards/real": -5.567030906677246, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.8166862514688604e-07, |
|
"logits/generated": -2.4322938919067383, |
|
"logits/real": -2.192739725112915, |
|
"logps/generated": -290.4706726074219, |
|
"logps/real": -280.22711181640625, |
|
"loss": 0.0275, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -18.550111770629883, |
|
"rewards/margins": 13.982562065124512, |
|
"rewards/real": -4.567549228668213, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.8049353701527614e-07, |
|
"logits/generated": -2.4540488719940186, |
|
"logits/real": -2.1258809566497803, |
|
"logps/generated": -314.3543395996094, |
|
"logps/real": -242.69906616210938, |
|
"loss": 0.0197, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.094104766845703, |
|
"rewards/margins": 17.1605167388916, |
|
"rewards/real": -3.933588743209839, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.7931844888366624e-07, |
|
"logits/generated": -2.429776668548584, |
|
"logits/real": -2.1748576164245605, |
|
"logps/generated": -290.11431884765625, |
|
"logps/real": -276.1110534667969, |
|
"loss": 0.0456, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -18.669795989990234, |
|
"rewards/margins": 14.223612785339355, |
|
"rewards/real": -4.446183204650879, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.781433607520564e-07, |
|
"logits/generated": -2.382094621658325, |
|
"logits/real": -2.138918399810791, |
|
"logps/generated": -302.57012939453125, |
|
"logps/real": -249.3428955078125, |
|
"loss": 0.0155, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.439178466796875, |
|
"rewards/margins": 16.179622650146484, |
|
"rewards/real": -4.259556770324707, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.769682726204465e-07, |
|
"logits/generated": -2.4343438148498535, |
|
"logits/real": -2.256923198699951, |
|
"logps/generated": -300.0426330566406, |
|
"logps/real": -285.530517578125, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.440828323364258, |
|
"rewards/margins": 14.015217781066895, |
|
"rewards/real": -4.425610542297363, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.757931844888366e-07, |
|
"logits/generated": -2.4278175830841064, |
|
"logits/real": -2.1669082641601562, |
|
"logps/generated": -304.4680480957031, |
|
"logps/real": -273.3132629394531, |
|
"loss": 0.0131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.61075782775879, |
|
"rewards/margins": 15.039899826049805, |
|
"rewards/real": -4.570856094360352, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.746180963572268e-07, |
|
"logits/generated": -2.434357166290283, |
|
"logits/real": -2.2451558113098145, |
|
"logps/generated": -300.28118896484375, |
|
"logps/real": -304.6499328613281, |
|
"loss": 0.0146, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.55888557434082, |
|
"rewards/margins": 14.898167610168457, |
|
"rewards/real": -4.66071891784668, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.734430082256169e-07, |
|
"logits/generated": -2.454746961593628, |
|
"logits/real": -2.167454719543457, |
|
"logps/generated": -290.23675537109375, |
|
"logps/real": -286.0035400390625, |
|
"loss": 0.0331, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -17.961822509765625, |
|
"rewards/margins": 13.359112739562988, |
|
"rewards/real": -4.602707386016846, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7226792009400706e-07, |
|
"logits/generated": -2.4925215244293213, |
|
"logits/real": -2.185725688934326, |
|
"logps/generated": -306.9207763671875, |
|
"logps/real": -302.2175598144531, |
|
"loss": 0.0262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.345661163330078, |
|
"rewards/margins": 13.471136093139648, |
|
"rewards/real": -6.8745245933532715, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7109283196239716e-07, |
|
"logits/generated": -2.415444850921631, |
|
"logits/real": -2.0123257637023926, |
|
"logps/generated": -339.48565673828125, |
|
"logps/real": -237.3209991455078, |
|
"loss": 0.0439, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.1875, |
|
"rewards/margins": 17.469690322875977, |
|
"rewards/real": -6.717806816101074, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.6991774383078726e-07, |
|
"logits/generated": -2.374286651611328, |
|
"logits/real": -2.0234928131103516, |
|
"logps/generated": -306.5343017578125, |
|
"logps/real": -287.781005859375, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.14816665649414, |
|
"rewards/margins": 15.472940444946289, |
|
"rewards/real": -4.675227165222168, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.687426556991774e-07, |
|
"logits/generated": -2.4474167823791504, |
|
"logits/real": -2.071176052093506, |
|
"logps/generated": -317.44989013671875, |
|
"logps/real": -285.42474365234375, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.355384826660156, |
|
"rewards/margins": 15.974954605102539, |
|
"rewards/real": -5.380428314208984, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.6756756756756757e-07, |
|
"logits/generated": -2.3674721717834473, |
|
"logits/real": -1.9707956314086914, |
|
"logps/generated": -306.5394592285156, |
|
"logps/real": -225.94546508789062, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.928173065185547, |
|
"rewards/margins": 14.604655265808105, |
|
"rewards/real": -6.323517799377441, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.663924794359577e-07, |
|
"logits/generated": -2.42570424079895, |
|
"logits/real": -1.969363808631897, |
|
"logps/generated": -321.4077453613281, |
|
"logps/real": -271.72955322265625, |
|
"loss": 0.008, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.87816619873047, |
|
"rewards/margins": 16.646554946899414, |
|
"rewards/real": -5.231610298156738, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6521739130434783e-07, |
|
"logits/generated": -2.327263593673706, |
|
"logits/real": -1.942073106765747, |
|
"logps/generated": -299.33856201171875, |
|
"logps/real": -233.4484100341797, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.722368240356445, |
|
"rewards/margins": 15.216009140014648, |
|
"rewards/real": -5.5063581466674805, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6404230317273793e-07, |
|
"logits/generated": -2.434544563293457, |
|
"logits/real": -1.9652414321899414, |
|
"logps/generated": -332.652587890625, |
|
"logps/real": -301.4515380859375, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.612462997436523, |
|
"rewards/margins": 15.830297470092773, |
|
"rewards/real": -4.782168388366699, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6286721504112803e-07, |
|
"logits/generated": -2.3795437812805176, |
|
"logits/real": -1.8615341186523438, |
|
"logps/generated": -334.30499267578125, |
|
"logps/real": -264.4176330566406, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.38288116455078, |
|
"rewards/margins": 17.554964065551758, |
|
"rewards/real": -4.827913761138916, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.6169212690951824e-07, |
|
"logits/generated": -2.3490805625915527, |
|
"logits/real": -1.8895342350006104, |
|
"logps/generated": -323.92572021484375, |
|
"logps/real": -287.7126159667969, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.20674705505371, |
|
"rewards/margins": 16.43838882446289, |
|
"rewards/real": -4.76835823059082, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.6051703877790834e-07, |
|
"logits/generated": -2.3346450328826904, |
|
"logits/real": -1.9004275798797607, |
|
"logps/generated": -321.3191833496094, |
|
"logps/real": -248.30148315429688, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.776212692260742, |
|
"rewards/margins": 15.979314804077148, |
|
"rewards/real": -5.796901702880859, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5934195064629844e-07, |
|
"logits/generated": -2.3258790969848633, |
|
"logits/real": -1.9253677129745483, |
|
"logps/generated": -332.10076904296875, |
|
"logps/real": -287.5823669433594, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.906774520874023, |
|
"rewards/margins": 17.70983123779297, |
|
"rewards/real": -5.196944236755371, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.581668625146886e-07, |
|
"logits/generated": -2.4416744709014893, |
|
"logits/real": -1.9654200077056885, |
|
"logps/generated": -338.466552734375, |
|
"logps/real": -289.2925720214844, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.339372634887695, |
|
"rewards/margins": 18.226560592651367, |
|
"rewards/real": -5.1128129959106445, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.569917743830787e-07, |
|
"logits/generated": -2.3743972778320312, |
|
"logits/real": -2.0106990337371826, |
|
"logps/generated": -321.8802795410156, |
|
"logps/real": -289.3930969238281, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.401901245117188, |
|
"rewards/margins": 16.5495662689209, |
|
"rewards/real": -4.852335453033447, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.558166862514688e-07, |
|
"logits/generated": -2.3224287033081055, |
|
"logits/real": -1.9159963130950928, |
|
"logps/generated": -354.5892333984375, |
|
"logps/real": -291.2265625, |
|
"loss": 0.0149, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -24.254032135009766, |
|
"rewards/margins": 19.105968475341797, |
|
"rewards/real": -5.148062229156494, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.54641598119859e-07, |
|
"logits/generated": -2.3815436363220215, |
|
"logits/real": -1.973968744277954, |
|
"logps/generated": -334.0134582519531, |
|
"logps/real": -333.6390686035156, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.35097312927246, |
|
"rewards/margins": 16.910682678222656, |
|
"rewards/real": -5.440291404724121, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.534665099882491e-07, |
|
"logits/generated": -2.2737233638763428, |
|
"logits/real": -1.969150185585022, |
|
"logps/generated": -332.0345764160156, |
|
"logps/real": -276.24163818359375, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.814863204956055, |
|
"rewards/margins": 17.44947052001953, |
|
"rewards/real": -5.36539363861084, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.5229142185663926e-07, |
|
"logits/generated": -2.4095771312713623, |
|
"logits/real": -2.06756329536438, |
|
"logps/generated": -310.903076171875, |
|
"logps/real": -290.26885986328125, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.025936126708984, |
|
"rewards/margins": 16.35614585876465, |
|
"rewards/real": -4.669791221618652, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.5111633372502936e-07, |
|
"logits/generated": -2.394035577774048, |
|
"logits/real": -1.9228935241699219, |
|
"logps/generated": -344.68133544921875, |
|
"logps/real": -322.188232421875, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.82350730895996, |
|
"rewards/margins": 18.79680633544922, |
|
"rewards/real": -4.026702880859375, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.4994124559341946e-07, |
|
"logits/generated": -2.3307666778564453, |
|
"logits/real": -2.019087314605713, |
|
"logps/generated": -348.3106689453125, |
|
"logps/real": -280.13470458984375, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.350341796875, |
|
"rewards/margins": 18.29078483581543, |
|
"rewards/real": -6.059556007385254, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.487661574618096e-07, |
|
"logits/generated": -2.2875423431396484, |
|
"logits/real": -1.8283237218856812, |
|
"logps/generated": -355.9295654296875, |
|
"logps/real": -286.44342041015625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.10162353515625, |
|
"rewards/margins": 19.49599838256836, |
|
"rewards/real": -5.605628490447998, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4759106933019977e-07, |
|
"logits/generated": -2.326918840408325, |
|
"logits/real": -2.001236915588379, |
|
"logps/generated": -351.1276550292969, |
|
"logps/real": -286.3841247558594, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.509958267211914, |
|
"rewards/margins": 18.328205108642578, |
|
"rewards/real": -6.181752681732178, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4641598119858987e-07, |
|
"logits/generated": -2.197887420654297, |
|
"logits/real": -1.8538007736206055, |
|
"logps/generated": -384.99749755859375, |
|
"logps/real": -274.5433044433594, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.36118507385254, |
|
"rewards/margins": 22.02522087097168, |
|
"rewards/real": -6.335963249206543, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4524089306698003e-07, |
|
"logits/generated": -2.345053195953369, |
|
"logits/real": -2.0094006061553955, |
|
"logps/generated": -370.4590759277344, |
|
"logps/real": -316.2205810546875, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.606197357177734, |
|
"rewards/margins": 18.697460174560547, |
|
"rewards/real": -6.908738613128662, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4406580493537013e-07, |
|
"logits/generated": -2.285144329071045, |
|
"logits/real": -1.9462623596191406, |
|
"logps/generated": -373.7235412597656, |
|
"logps/real": -283.7071228027344, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -27.2155818939209, |
|
"rewards/margins": 19.813146591186523, |
|
"rewards/real": -7.4024338722229, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.4289071680376023e-07, |
|
"logits/generated": -2.254729747772217, |
|
"logits/real": -2.026987075805664, |
|
"logps/generated": -378.70941162109375, |
|
"logps/real": -323.30426025390625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.278667449951172, |
|
"rewards/margins": 20.472742080688477, |
|
"rewards/real": -6.805926322937012, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.417156286721504e-07, |
|
"logits/generated": -2.235236644744873, |
|
"logits/real": -2.0165162086486816, |
|
"logps/generated": -397.2226867675781, |
|
"logps/real": -347.4537658691406, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -28.368255615234375, |
|
"rewards/margins": 20.647584915161133, |
|
"rewards/real": -7.720673561096191, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.4054054054054054e-07, |
|
"logits/generated": -2.2384140491485596, |
|
"logits/real": -1.9251244068145752, |
|
"logps/generated": -350.0628356933594, |
|
"logps/real": -244.8567657470703, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.298246383666992, |
|
"rewards/margins": 20.636980056762695, |
|
"rewards/real": -4.661267280578613, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.3936545240893064e-07, |
|
"logits/generated": -2.224591016769409, |
|
"logits/real": -1.7972519397735596, |
|
"logps/generated": -367.85107421875, |
|
"logps/real": -289.4563903808594, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.207212448120117, |
|
"rewards/margins": 21.348243713378906, |
|
"rewards/real": -3.85896372795105, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.381903642773208e-07, |
|
"logits/generated": -2.298875570297241, |
|
"logits/real": -1.9161163568496704, |
|
"logps/generated": -343.57244873046875, |
|
"logps/real": -288.15765380859375, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.460208892822266, |
|
"rewards/margins": 18.997722625732422, |
|
"rewards/real": -4.462485313415527, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.370152761457109e-07, |
|
"logits/generated": -2.2626256942749023, |
|
"logits/real": -2.0237679481506348, |
|
"logps/generated": -328.1484375, |
|
"logps/real": -256.3814697265625, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.351253509521484, |
|
"rewards/margins": 17.940073013305664, |
|
"rewards/real": -4.411181449890137, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.35840188014101e-07, |
|
"logits/generated": -2.2844908237457275, |
|
"logits/real": -2.081831216812134, |
|
"logps/generated": -356.357177734375, |
|
"logps/real": -267.3996887207031, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -24.618431091308594, |
|
"rewards/margins": 20.21932029724121, |
|
"rewards/real": -4.399110317230225, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.346650998824912e-07, |
|
"logits/generated": -2.4111740589141846, |
|
"logits/real": -2.1501283645629883, |
|
"logps/generated": -320.0719299316406, |
|
"logps/real": -321.5926513671875, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.9744930267334, |
|
"rewards/margins": 18.086454391479492, |
|
"rewards/real": -3.888040065765381, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.334900117508813e-07, |
|
"logits/generated": -2.406731128692627, |
|
"logits/real": -2.109961748123169, |
|
"logps/generated": -331.38629150390625, |
|
"logps/real": -285.94610595703125, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.635053634643555, |
|
"rewards/margins": 17.40793228149414, |
|
"rewards/real": -4.227120399475098, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.3231492361927146e-07, |
|
"logits/generated": -2.4639508724212646, |
|
"logits/real": -2.219212293624878, |
|
"logps/generated": -325.2287902832031, |
|
"logps/real": -297.9587097167969, |
|
"loss": 0.0178, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.667682647705078, |
|
"rewards/margins": 17.30364418029785, |
|
"rewards/real": -3.3640379905700684, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.3113983548766156e-07, |
|
"logits/generated": -2.548128604888916, |
|
"logits/real": -2.028677463531494, |
|
"logps/generated": -308.02490234375, |
|
"logps/real": -280.04034423828125, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.040822982788086, |
|
"rewards/margins": 17.85296058654785, |
|
"rewards/real": -3.1878647804260254, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2996474735605166e-07, |
|
"logits/generated": -2.484825849533081, |
|
"logits/real": -2.0903892517089844, |
|
"logps/generated": -325.17352294921875, |
|
"logps/real": -269.5104675292969, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.7929744720459, |
|
"rewards/margins": 17.698253631591797, |
|
"rewards/real": -4.094720840454102, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.287896592244418e-07, |
|
"logits/generated": -2.398820400238037, |
|
"logits/real": -2.1054134368896484, |
|
"logps/generated": -337.5372009277344, |
|
"logps/real": -260.09661865234375, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.03034019470215, |
|
"rewards/margins": 18.337459564208984, |
|
"rewards/real": -4.6928815841674805, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2761457109283197e-07, |
|
"logits/generated": -2.3103232383728027, |
|
"logits/real": -2.0682365894317627, |
|
"logps/generated": -361.6377868652344, |
|
"logps/real": -288.559814453125, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.030508041381836, |
|
"rewards/margins": 18.72498321533203, |
|
"rewards/real": -6.305525779724121, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.2643948296122207e-07, |
|
"logits/generated": -2.2990386486053467, |
|
"logits/real": -1.9596431255340576, |
|
"logps/generated": -370.11602783203125, |
|
"logps/real": -297.407958984375, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -26.362558364868164, |
|
"rewards/margins": 19.419109344482422, |
|
"rewards/real": -6.943448066711426, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.252643948296122e-07, |
|
"logits/generated": -2.5760021209716797, |
|
"logits/real": -2.365118980407715, |
|
"logps/generated": -314.21612548828125, |
|
"logps/real": -329.44818115234375, |
|
"loss": 0.0337, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.608551025390625, |
|
"rewards/margins": 14.474385261535645, |
|
"rewards/real": -6.134167671203613, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2408930669800233e-07, |
|
"logits/generated": -2.529139995574951, |
|
"logits/real": -2.3194754123687744, |
|
"logps/generated": -322.12579345703125, |
|
"logps/real": -315.3408508300781, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -21.443510055541992, |
|
"rewards/margins": 16.046594619750977, |
|
"rewards/real": -5.396914958953857, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2291421856639243e-07, |
|
"logits/generated": -2.455733299255371, |
|
"logits/real": -2.1376829147338867, |
|
"logps/generated": -340.5116271972656, |
|
"logps/real": -303.23089599609375, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.338191986083984, |
|
"rewards/margins": 17.037410736083984, |
|
"rewards/real": -6.300785064697266, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.217391304347826e-07, |
|
"logits/generated": -2.468418598175049, |
|
"logits/real": -2.248774528503418, |
|
"logps/generated": -329.9716491699219, |
|
"logps/real": -286.23785400390625, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.26080322265625, |
|
"rewards/margins": 16.494421005249023, |
|
"rewards/real": -5.766382694244385, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.2056404230317274e-07, |
|
"logits/generated": -2.521620988845825, |
|
"logits/real": -2.2400965690612793, |
|
"logps/generated": -331.94195556640625, |
|
"logps/real": -314.6190490722656, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.115577697753906, |
|
"rewards/margins": 16.175901412963867, |
|
"rewards/real": -5.9396748542785645, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1938895417156284e-07, |
|
"logits/generated": -2.4937617778778076, |
|
"logits/real": -2.2404730319976807, |
|
"logps/generated": -320.19561767578125, |
|
"logps/real": -296.76220703125, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.591772079467773, |
|
"rewards/margins": 15.425150871276855, |
|
"rewards/real": -6.166622161865234, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.18213866039953e-07, |
|
"logits/generated": -2.459237813949585, |
|
"logits/real": -2.1428635120391846, |
|
"logps/generated": -328.6792297363281, |
|
"logps/real": -273.9725036621094, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.400270462036133, |
|
"rewards/margins": 16.682750701904297, |
|
"rewards/real": -5.7175188064575195, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.170387779083431e-07, |
|
"logits/generated": -2.4984331130981445, |
|
"logits/real": -2.219433307647705, |
|
"logps/generated": -333.45123291015625, |
|
"logps/real": -290.7151794433594, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.205299377441406, |
|
"rewards/margins": 16.75266456604004, |
|
"rewards/real": -5.452635765075684, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.1586368977673325e-07, |
|
"logits/generated": -2.4460034370422363, |
|
"logits/real": -2.1568732261657715, |
|
"logps/generated": -311.125244140625, |
|
"logps/real": -252.8068084716797, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.626285552978516, |
|
"rewards/margins": 16.041940689086914, |
|
"rewards/real": -5.584343910217285, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.146886016451234e-07, |
|
"logits/generated": -2.4558825492858887, |
|
"logits/real": -2.202268362045288, |
|
"logps/generated": -321.90008544921875, |
|
"logps/real": -299.6604309082031, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.283735275268555, |
|
"rewards/margins": 16.26697540283203, |
|
"rewards/real": -4.016759395599365, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.135135135135135e-07, |
|
"logits/generated": -2.520723581314087, |
|
"logits/real": -2.143216371536255, |
|
"logps/generated": -329.4117126464844, |
|
"logps/real": -260.57342529296875, |
|
"loss": 0.0183, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -22.833818435668945, |
|
"rewards/margins": 17.800403594970703, |
|
"rewards/real": -5.0334153175354, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.1233842538190366e-07, |
|
"logits/generated": -2.4640583992004395, |
|
"logits/real": -2.2413322925567627, |
|
"logps/generated": -304.47332763671875, |
|
"logps/real": -314.59661865234375, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.477767944335938, |
|
"rewards/margins": 13.840822219848633, |
|
"rewards/real": -5.636946201324463, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.1116333725029376e-07, |
|
"logits/generated": -2.416250228881836, |
|
"logits/real": -2.14082670211792, |
|
"logps/generated": -334.43267822265625, |
|
"logps/real": -253.3500213623047, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.195547103881836, |
|
"rewards/margins": 17.476577758789062, |
|
"rewards/real": -5.718966484069824, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0998824911868386e-07, |
|
"logits/generated": -2.4012951850891113, |
|
"logits/real": -2.0450942516326904, |
|
"logps/generated": -344.83856201171875, |
|
"logps/real": -263.33087158203125, |
|
"loss": 0.0086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.860132217407227, |
|
"rewards/margins": 18.692014694213867, |
|
"rewards/real": -5.168116092681885, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.08813160987074e-07, |
|
"logits/generated": -2.4515933990478516, |
|
"logits/real": -2.1537697315216064, |
|
"logps/generated": -335.5617370605469, |
|
"logps/real": -284.315673828125, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.91217613220215, |
|
"rewards/margins": 17.602642059326172, |
|
"rewards/real": -5.30953311920166, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.0763807285546417e-07, |
|
"logits/generated": -2.421034812927246, |
|
"logits/real": -2.1092395782470703, |
|
"logps/generated": -337.59063720703125, |
|
"logps/real": -293.2365417480469, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.965925216674805, |
|
"rewards/margins": 17.585596084594727, |
|
"rewards/real": -4.380329132080078, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0646298472385427e-07, |
|
"logits/generated": -2.4205338954925537, |
|
"logits/real": -2.0897164344787598, |
|
"logps/generated": -326.11993408203125, |
|
"logps/real": -333.59722900390625, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.24331283569336, |
|
"rewards/margins": 17.81305503845215, |
|
"rewards/real": -3.4302544593811035, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.052878965922444e-07, |
|
"logits/generated": -2.4457616806030273, |
|
"logits/real": -2.1007394790649414, |
|
"logps/generated": -354.89190673828125, |
|
"logps/real": -308.13482666015625, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.513587951660156, |
|
"rewards/margins": 19.220561981201172, |
|
"rewards/real": -4.293027400970459, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.041128084606345e-07, |
|
"logits/generated": -2.5040667057037354, |
|
"logits/real": -2.0916154384613037, |
|
"logps/generated": -297.73785400390625, |
|
"logps/real": -264.8382568359375, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.15200424194336, |
|
"rewards/margins": 15.560193061828613, |
|
"rewards/real": -4.591813087463379, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0293772032902463e-07, |
|
"logits/generated": -2.433756113052368, |
|
"logits/real": -2.0441482067108154, |
|
"logps/generated": -331.0630187988281, |
|
"logps/real": -275.1962890625, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.667695999145508, |
|
"rewards/margins": 18.11298370361328, |
|
"rewards/real": -3.5547127723693848, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.017626321974148e-07, |
|
"logits/generated": -2.4524292945861816, |
|
"logits/real": -2.1112122535705566, |
|
"logps/generated": -310.28179931640625, |
|
"logps/real": -256.42669677734375, |
|
"loss": 0.011, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.27758026123047, |
|
"rewards/margins": 16.748332977294922, |
|
"rewards/real": -4.529247283935547, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.0058754406580494e-07, |
|
"logits/generated": -2.4680449962615967, |
|
"logits/real": -2.1266109943389893, |
|
"logps/generated": -323.96221923828125, |
|
"logps/real": -285.35064697265625, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.724130630493164, |
|
"rewards/margins": 17.49283218383789, |
|
"rewards/real": -4.231298923492432, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.9941245593419504e-07, |
|
"logits/generated": -2.382336378097534, |
|
"logits/real": -1.9932057857513428, |
|
"logps/generated": -331.06640625, |
|
"logps/real": -238.3595428466797, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -23.42551040649414, |
|
"rewards/margins": 17.899635314941406, |
|
"rewards/real": -5.525876045227051, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.982373678025852e-07, |
|
"logits/generated": -2.5244622230529785, |
|
"logits/real": -2.1495394706726074, |
|
"logps/generated": -330.15972900390625, |
|
"logps/real": -266.6393127441406, |
|
"loss": 0.0218, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -22.55251693725586, |
|
"rewards/margins": 18.512014389038086, |
|
"rewards/real": -4.040503978729248, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.970622796709753e-07, |
|
"logits/generated": -2.536439895629883, |
|
"logits/real": -2.2411656379699707, |
|
"logps/generated": -324.23486328125, |
|
"logps/real": -313.6152038574219, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.34421157836914, |
|
"rewards/margins": 17.209888458251953, |
|
"rewards/real": -4.134321212768555, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9588719153936545e-07, |
|
"logits/generated": -2.4607491493225098, |
|
"logits/real": -2.160658597946167, |
|
"logps/generated": -324.77288818359375, |
|
"logps/real": -290.04840087890625, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.049968719482422, |
|
"rewards/margins": 16.71952247619629, |
|
"rewards/real": -4.330449104309082, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.9471210340775555e-07, |
|
"logits/generated": -2.4545581340789795, |
|
"logits/real": -2.1188526153564453, |
|
"logps/generated": -321.65863037109375, |
|
"logps/real": -264.7879333496094, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.988567352294922, |
|
"rewards/margins": 17.094676971435547, |
|
"rewards/real": -4.893891334533691, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.935370152761457e-07, |
|
"logits/generated": -2.501826763153076, |
|
"logits/real": -2.1853764057159424, |
|
"logps/generated": -314.2751770019531, |
|
"logps/real": -334.19024658203125, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.757062911987305, |
|
"rewards/margins": 16.270082473754883, |
|
"rewards/real": -4.486981391906738, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.9236192714453586e-07, |
|
"logits/generated": -2.4264206886291504, |
|
"logits/real": -2.102842330932617, |
|
"logps/generated": -314.3070373535156, |
|
"logps/real": -282.86907958984375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.95997428894043, |
|
"rewards/margins": 16.236602783203125, |
|
"rewards/real": -4.723372459411621, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.9118683901292596e-07, |
|
"logits/generated": -2.4942514896392822, |
|
"logits/real": -2.1962990760803223, |
|
"logps/generated": -355.8966064453125, |
|
"logps/real": -321.4674377441406, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.816083908081055, |
|
"rewards/margins": 18.28668212890625, |
|
"rewards/real": -5.529400825500488, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.9001175088131606e-07, |
|
"logits/generated": -2.455756425857544, |
|
"logits/real": -2.1011083126068115, |
|
"logps/generated": -342.75665283203125, |
|
"logps/real": -316.2607727050781, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.9534969329834, |
|
"rewards/margins": 18.188405990600586, |
|
"rewards/real": -4.7650885581970215, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.888366627497062e-07, |
|
"logits/generated": -2.4029483795166016, |
|
"logits/real": -1.9473835229873657, |
|
"logps/generated": -339.0224304199219, |
|
"logps/real": -281.7505798339844, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.03743553161621, |
|
"rewards/margins": 18.093334197998047, |
|
"rewards/real": -4.944101810455322, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8766157461809637e-07, |
|
"logits/generated": -2.419862747192383, |
|
"logits/real": -1.9635975360870361, |
|
"logps/generated": -363.16534423828125, |
|
"logps/real": -276.90020751953125, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.56096839904785, |
|
"rewards/margins": 21.17633628845215, |
|
"rewards/real": -4.3846282958984375, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8648648648648647e-07, |
|
"logits/generated": -2.450591564178467, |
|
"logits/real": -2.1148898601531982, |
|
"logps/generated": -319.2438659667969, |
|
"logps/real": -307.49114990234375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.591182708740234, |
|
"rewards/margins": 17.235889434814453, |
|
"rewards/real": -4.355295658111572, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.853113983548766e-07, |
|
"logits/generated": -2.463535785675049, |
|
"logits/real": -2.1628036499023438, |
|
"logps/generated": -348.6790771484375, |
|
"logps/real": -287.131103515625, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -23.492891311645508, |
|
"rewards/margins": 18.62226104736328, |
|
"rewards/real": -4.870633125305176, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.841363102232667e-07, |
|
"logits/generated": -2.4891059398651123, |
|
"logits/real": -2.136472463607788, |
|
"logps/generated": -311.2068786621094, |
|
"logps/real": -282.12969970703125, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.66542625427246, |
|
"rewards/margins": 17.774410247802734, |
|
"rewards/real": -2.8910176753997803, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.829612220916568e-07, |
|
"logits/generated": -2.3496716022491455, |
|
"logits/real": -1.9166462421417236, |
|
"logps/generated": -323.8047790527344, |
|
"logps/real": -230.07870483398438, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.36273765563965, |
|
"rewards/margins": 18.25948715209961, |
|
"rewards/real": -4.103250503540039, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.81786133960047e-07, |
|
"logits/generated": -2.4094340801239014, |
|
"logits/real": -2.199237823486328, |
|
"logps/generated": -298.130615234375, |
|
"logps/real": -329.5294189453125, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.52131462097168, |
|
"rewards/margins": 15.532516479492188, |
|
"rewards/real": -2.9887986183166504, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8061104582843713e-07, |
|
"logits/generated": -2.443497896194458, |
|
"logits/real": -2.0504519939422607, |
|
"logps/generated": -318.1686096191406, |
|
"logps/real": -290.20123291015625, |
|
"loss": 0.0195, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -20.709726333618164, |
|
"rewards/margins": 16.9930362701416, |
|
"rewards/real": -3.7166907787323, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.794359576968273e-07, |
|
"logits/generated": -2.394792318344116, |
|
"logits/real": -1.9892990589141846, |
|
"logps/generated": -327.5433349609375, |
|
"logps/real": -269.41571044921875, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.122539520263672, |
|
"rewards/margins": 18.496726989746094, |
|
"rewards/real": -3.6258106231689453, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.782608695652174e-07, |
|
"logits/generated": -2.366842746734619, |
|
"logits/real": -2.0693068504333496, |
|
"logps/generated": -334.1366882324219, |
|
"logps/real": -284.13916015625, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.300800323486328, |
|
"rewards/margins": 17.710512161254883, |
|
"rewards/real": -3.59028697013855, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.770857814336075e-07, |
|
"logits/generated": -2.446584701538086, |
|
"logits/real": -2.0087201595306396, |
|
"logps/generated": -345.32623291015625, |
|
"logps/real": -279.03729248046875, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.770870208740234, |
|
"rewards/margins": 19.3250732421875, |
|
"rewards/real": -4.445800304412842, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.7591069330199765e-07, |
|
"logits/generated": -2.334024667739868, |
|
"logits/real": -2.0111477375030518, |
|
"logps/generated": -323.5222473144531, |
|
"logps/real": -255.93240356445312, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -23.07223129272461, |
|
"rewards/margins": 18.25441551208496, |
|
"rewards/real": -4.817814826965332, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.7473560517038775e-07, |
|
"logits/generated": -2.2881593704223633, |
|
"logits/real": -1.9688920974731445, |
|
"logps/generated": -337.32891845703125, |
|
"logps/real": -274.38018798828125, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.056621551513672, |
|
"rewards/margins": 18.873544692993164, |
|
"rewards/real": -4.183079719543457, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.735605170387779e-07, |
|
"logits/generated": -2.383574962615967, |
|
"logits/real": -2.0922164916992188, |
|
"logps/generated": -316.5880432128906, |
|
"logps/real": -335.4088439941406, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.414888381958008, |
|
"rewards/margins": 17.6447811126709, |
|
"rewards/real": -2.7701072692871094, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7238542890716806e-07, |
|
"logits/generated": -2.3858695030212402, |
|
"logits/real": -2.084942102432251, |
|
"logps/generated": -297.20037841796875, |
|
"logps/real": -269.3205261230469, |
|
"loss": 0.012, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -19.70669937133789, |
|
"rewards/margins": 16.86494255065918, |
|
"rewards/real": -2.8417534828186035, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.7121034077555816e-07, |
|
"logits/generated": -2.400209903717041, |
|
"logits/real": -2.1528279781341553, |
|
"logps/generated": -299.55523681640625, |
|
"logps/real": -293.73187255859375, |
|
"loss": 0.015, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -19.58156394958496, |
|
"rewards/margins": 16.313032150268555, |
|
"rewards/real": -3.268529176712036, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.7003525264394826e-07, |
|
"logits/generated": -2.476315975189209, |
|
"logits/real": -2.0876708030700684, |
|
"logps/generated": -320.7831726074219, |
|
"logps/real": -248.1786651611328, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.875595092773438, |
|
"rewards/margins": 17.460813522338867, |
|
"rewards/real": -4.4147844314575195, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.688601645123384e-07, |
|
"logits/generated": -2.327359676361084, |
|
"logits/real": -2.1029770374298096, |
|
"logps/generated": -296.45855712890625, |
|
"logps/real": -255.8355255126953, |
|
"loss": 0.0166, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.623727798461914, |
|
"rewards/margins": 16.048782348632812, |
|
"rewards/real": -3.574944019317627, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6768507638072857e-07, |
|
"logits/generated": -2.342031240463257, |
|
"logits/real": -2.1226744651794434, |
|
"logps/generated": -310.0341796875, |
|
"logps/real": -291.40167236328125, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -20.308788299560547, |
|
"rewards/margins": 16.74529457092285, |
|
"rewards/real": -3.5634942054748535, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6650998824911867e-07, |
|
"logits/generated": -2.3393919467926025, |
|
"logits/real": -2.1731131076812744, |
|
"logps/generated": -310.68505859375, |
|
"logps/real": -282.8341064453125, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.373615264892578, |
|
"rewards/margins": 16.252016067504883, |
|
"rewards/real": -3.121600866317749, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.653349001175088e-07, |
|
"logits/generated": -2.316004753112793, |
|
"logits/real": -2.080775499343872, |
|
"logps/generated": -315.99456787109375, |
|
"logps/real": -242.0931396484375, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.821147918701172, |
|
"rewards/margins": 17.72539710998535, |
|
"rewards/real": -3.0957531929016113, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.641598119858989e-07, |
|
"logits/generated": -2.381744146347046, |
|
"logits/real": -2.0672965049743652, |
|
"logps/generated": -303.83160400390625, |
|
"logps/real": -283.36956787109375, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.453947067260742, |
|
"rewards/margins": 16.01351547241211, |
|
"rewards/real": -3.440433979034424, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.62984723854289e-07, |
|
"logits/generated": -2.3063855171203613, |
|
"logits/real": -2.1104612350463867, |
|
"logps/generated": -336.24542236328125, |
|
"logps/real": -223.45474243164062, |
|
"loss": 0.0132, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -22.51688003540039, |
|
"rewards/margins": 18.345937728881836, |
|
"rewards/real": -4.170942783355713, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.618096357226792e-07, |
|
"logits/generated": -2.3197531700134277, |
|
"logits/real": -2.018913984298706, |
|
"logps/generated": -336.23980712890625, |
|
"logps/real": -262.60992431640625, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -23.693714141845703, |
|
"rewards/margins": 19.307472229003906, |
|
"rewards/real": -4.386241436004639, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6063454759106933e-07, |
|
"logits/generated": -2.2846219539642334, |
|
"logits/real": -2.0191433429718018, |
|
"logps/generated": -331.71783447265625, |
|
"logps/real": -268.02398681640625, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.27705955505371, |
|
"rewards/margins": 19.076175689697266, |
|
"rewards/real": -4.2008867263793945, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.594594594594595e-07, |
|
"logits/generated": -2.3178963661193848, |
|
"logits/real": -2.035947561264038, |
|
"logps/generated": -323.0597229003906, |
|
"logps/real": -245.92855834960938, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.565059661865234, |
|
"rewards/margins": 18.452144622802734, |
|
"rewards/real": -4.112914085388184, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.582843713278496e-07, |
|
"logits/generated": -2.245072364807129, |
|
"logits/real": -1.9775609970092773, |
|
"logps/generated": -341.42755126953125, |
|
"logps/real": -241.91702270507812, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.08028221130371, |
|
"rewards/margins": 19.214641571044922, |
|
"rewards/real": -3.865643262863159, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.571092831962397e-07, |
|
"logits/generated": -2.287692070007324, |
|
"logits/real": -1.9296283721923828, |
|
"logps/generated": -337.61688232421875, |
|
"logps/real": -272.8384704589844, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -23.93185043334961, |
|
"rewards/margins": 19.479442596435547, |
|
"rewards/real": -4.452404975891113, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5593419506462984e-07, |
|
"logits/generated": -2.231574535369873, |
|
"logits/real": -1.9272006750106812, |
|
"logps/generated": -347.8257141113281, |
|
"logps/real": -286.9981994628906, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.885425567626953, |
|
"rewards/margins": 19.772022247314453, |
|
"rewards/real": -4.113402843475342, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5475910693301995e-07, |
|
"logits/generated": -2.2364954948425293, |
|
"logits/real": -1.9917691946029663, |
|
"logps/generated": -348.7269592285156, |
|
"logps/real": -309.1827392578125, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.005229949951172, |
|
"rewards/margins": 17.796672821044922, |
|
"rewards/real": -4.208555698394775, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.535840188014101e-07, |
|
"logits/generated": -2.2995412349700928, |
|
"logits/real": -2.10380482673645, |
|
"logps/generated": -352.90167236328125, |
|
"logps/real": -314.281982421875, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.920124053955078, |
|
"rewards/margins": 18.855754852294922, |
|
"rewards/real": -5.064369201660156, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5240893066980025e-07, |
|
"logits/generated": -2.256836414337158, |
|
"logits/real": -1.9991710186004639, |
|
"logps/generated": -344.7554016113281, |
|
"logps/real": -267.4646301269531, |
|
"loss": 0.0127, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.83219337463379, |
|
"rewards/margins": 19.01470375061035, |
|
"rewards/real": -4.817486763000488, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5123384253819036e-07, |
|
"logits/generated": -2.294147491455078, |
|
"logits/real": -2.00473952293396, |
|
"logps/generated": -336.1775817871094, |
|
"logps/real": -269.46502685546875, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.781789779663086, |
|
"rewards/margins": 19.8565731048584, |
|
"rewards/real": -3.9252192974090576, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.5005875440658046e-07, |
|
"logits/generated": -2.3603179454803467, |
|
"logits/real": -1.9096410274505615, |
|
"logps/generated": -358.67877197265625, |
|
"logps/real": -259.40484619140625, |
|
"loss": 0.0123, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.424976348876953, |
|
"rewards/margins": 21.086185455322266, |
|
"rewards/real": -3.338792324066162, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.488836662749706e-07, |
|
"logits/generated": -2.351179599761963, |
|
"logits/real": -2.0530447959899902, |
|
"logps/generated": -332.7562561035156, |
|
"logps/real": -271.2536926269531, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.84016227722168, |
|
"rewards/margins": 19.333303451538086, |
|
"rewards/real": -3.5068557262420654, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4770857814336077e-07, |
|
"logits/generated": -2.1961145401000977, |
|
"logits/real": -1.9656083583831787, |
|
"logps/generated": -326.59356689453125, |
|
"logps/real": -220.73312377929688, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.737462997436523, |
|
"rewards/margins": 19.191755294799805, |
|
"rewards/real": -3.5457072257995605, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.4653349001175087e-07, |
|
"logits/generated": -2.2957189083099365, |
|
"logits/real": -2.001795768737793, |
|
"logps/generated": -318.9930725097656, |
|
"logps/real": -281.51959228515625, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.853168487548828, |
|
"rewards/margins": 18.380849838256836, |
|
"rewards/real": -3.4723198413848877, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.45358401880141e-07, |
|
"logits/generated": -2.263975143432617, |
|
"logits/real": -1.9058456420898438, |
|
"logps/generated": -349.5631103515625, |
|
"logps/real": -259.2726745605469, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -24.311424255371094, |
|
"rewards/margins": 20.01760482788086, |
|
"rewards/real": -4.29381799697876, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.441833137485311e-07, |
|
"logits/generated": -2.22051739692688, |
|
"logits/real": -2.042902708053589, |
|
"logps/generated": -346.29327392578125, |
|
"logps/real": -249.6488800048828, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.093563079833984, |
|
"rewards/margins": 19.136564254760742, |
|
"rewards/real": -3.956998825073242, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.430082256169212e-07, |
|
"logits/generated": -2.354282855987549, |
|
"logits/real": -1.9278910160064697, |
|
"logps/generated": -352.00714111328125, |
|
"logps/real": -265.89508056640625, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -24.244333267211914, |
|
"rewards/margins": 20.059101104736328, |
|
"rewards/real": -4.185232162475586, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.418331374853114e-07, |
|
"logits/generated": -2.28190541267395, |
|
"logits/real": -2.0574002265930176, |
|
"logps/generated": -319.3107604980469, |
|
"logps/real": -277.8495788574219, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.27080726623535, |
|
"rewards/margins": 18.873634338378906, |
|
"rewards/real": -3.397172212600708, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.4065804935370153e-07, |
|
"logits/generated": -2.241192102432251, |
|
"logits/real": -1.9571336507797241, |
|
"logps/generated": -327.8152770996094, |
|
"logps/real": -257.9501953125, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.218231201171875, |
|
"rewards/margins": 17.811767578125, |
|
"rewards/real": -4.40646505355835, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3948296122209163e-07, |
|
"logits/generated": -2.2373881340026855, |
|
"logits/real": -2.0191516876220703, |
|
"logps/generated": -345.68438720703125, |
|
"logps/real": -284.0022888183594, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.410137176513672, |
|
"rewards/margins": 19.024240493774414, |
|
"rewards/real": -4.385898113250732, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.383078730904818e-07, |
|
"logits/generated": -2.1850571632385254, |
|
"logits/real": -1.893228530883789, |
|
"logps/generated": -329.25799560546875, |
|
"logps/real": -266.69317626953125, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.37875747680664, |
|
"rewards/margins": 18.571931838989258, |
|
"rewards/real": -3.806826114654541, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.371327849588719e-07, |
|
"logits/generated": -2.1856093406677246, |
|
"logits/real": -1.9036449193954468, |
|
"logps/generated": -352.7874755859375, |
|
"logps/real": -255.8990936279297, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.45499610900879, |
|
"rewards/margins": 20.177873611450195, |
|
"rewards/real": -5.277122497558594, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3595769682726202e-07, |
|
"logits/generated": -2.1994104385375977, |
|
"logits/real": -2.0348970890045166, |
|
"logps/generated": -356.5615234375, |
|
"logps/real": -291.3761291503906, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -24.836057662963867, |
|
"rewards/margins": 19.16860580444336, |
|
"rewards/real": -5.667454719543457, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3478260869565217e-07, |
|
"logits/generated": -2.1490817070007324, |
|
"logits/real": -1.9687789678573608, |
|
"logps/generated": -370.4169006347656, |
|
"logps/real": -257.3158264160156, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.3896484375, |
|
"rewards/margins": 21.185771942138672, |
|
"rewards/real": -5.20387601852417, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.336075205640423e-07, |
|
"logits/generated": -2.1517624855041504, |
|
"logits/real": -1.960091233253479, |
|
"logps/generated": -379.77825927734375, |
|
"logps/real": -356.1867980957031, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.38129234313965, |
|
"rewards/margins": 19.99581527709961, |
|
"rewards/real": -6.385481357574463, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3243243243243243e-07, |
|
"logits/generated": -2.0364327430725098, |
|
"logits/real": -1.8880960941314697, |
|
"logps/generated": -373.9236755371094, |
|
"logps/real": -324.3224182128906, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.41314697265625, |
|
"rewards/margins": 20.459888458251953, |
|
"rewards/real": -5.953259468078613, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3125734430082255e-07, |
|
"logits/generated": -2.0131492614746094, |
|
"logits/real": -1.8854057788848877, |
|
"logps/generated": -390.64788818359375, |
|
"logps/real": -305.11285400390625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.03790283203125, |
|
"rewards/margins": 22.16775131225586, |
|
"rewards/real": -5.870150566101074, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.3008225616921268e-07, |
|
"logits/generated": -1.956024408340454, |
|
"logits/real": -1.9189265966415405, |
|
"logps/generated": -396.72283935546875, |
|
"logps/real": -286.6882019042969, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.13210678100586, |
|
"rewards/margins": 21.822053909301758, |
|
"rewards/real": -7.310055732727051, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.289071680376028e-07, |
|
"logits/generated": -2.030214309692383, |
|
"logits/real": -1.8881438970565796, |
|
"logps/generated": -383.7499084472656, |
|
"logps/real": -250.5139617919922, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.844661712646484, |
|
"rewards/margins": 21.812536239624023, |
|
"rewards/real": -6.032127380371094, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2773207990599294e-07, |
|
"logits/generated": -2.08001708984375, |
|
"logits/real": -1.891178846359253, |
|
"logps/generated": -351.56988525390625, |
|
"logps/real": -296.24371337890625, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.752206802368164, |
|
"rewards/margins": 18.19687271118164, |
|
"rewards/real": -6.555333614349365, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2655699177438307e-07, |
|
"logits/generated": -2.0704877376556396, |
|
"logits/real": -1.90077805519104, |
|
"logps/generated": -355.61767578125, |
|
"logps/real": -274.2396545410156, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -24.99367904663086, |
|
"rewards/margins": 19.7630615234375, |
|
"rewards/real": -5.230617523193359, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2538190364277322e-07, |
|
"logits/generated": -2.140763282775879, |
|
"logits/real": -1.9581050872802734, |
|
"logps/generated": -362.32049560546875, |
|
"logps/real": -258.9219055175781, |
|
"loss": 0.009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.47673988342285, |
|
"rewards/margins": 20.179256439208984, |
|
"rewards/real": -5.2974853515625, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2420681551116332e-07, |
|
"logits/generated": -2.1344077587127686, |
|
"logits/real": -1.9745477437973022, |
|
"logps/generated": -365.5986022949219, |
|
"logps/real": -306.57513427734375, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.825016021728516, |
|
"rewards/margins": 19.670211791992188, |
|
"rewards/real": -5.154806613922119, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2303172737955345e-07, |
|
"logits/generated": -2.124387264251709, |
|
"logits/real": -1.9389822483062744, |
|
"logps/generated": -334.8365173339844, |
|
"logps/real": -278.90850830078125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.859643936157227, |
|
"rewards/margins": 17.987585067749023, |
|
"rewards/real": -4.8720574378967285, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.218566392479436e-07, |
|
"logits/generated": -2.079254627227783, |
|
"logits/real": -1.930846929550171, |
|
"logps/generated": -367.32586669921875, |
|
"logps/real": -301.3092346191406, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.252771377563477, |
|
"rewards/margins": 21.149532318115234, |
|
"rewards/real": -5.103243350982666, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.206815511163337e-07, |
|
"logits/generated": -2.1294503211975098, |
|
"logits/real": -1.9343706369400024, |
|
"logps/generated": -354.76513671875, |
|
"logps/real": -296.0406799316406, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -24.67839241027832, |
|
"rewards/margins": 18.78042221069336, |
|
"rewards/real": -5.897971153259277, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.1950646298472383e-07, |
|
"logits/generated": -2.1347403526306152, |
|
"logits/real": -1.9254436492919922, |
|
"logps/generated": -368.8210754394531, |
|
"logps/real": -288.74053955078125, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.642257690429688, |
|
"rewards/margins": 21.017126083374023, |
|
"rewards/real": -4.625129222869873, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.18331374853114e-07, |
|
"logits/generated": -2.1848769187927246, |
|
"logits/real": -2.0680902004241943, |
|
"logps/generated": -366.6266784667969, |
|
"logps/real": -328.2076416015625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.437532424926758, |
|
"rewards/margins": 19.320016860961914, |
|
"rewards/real": -5.117516040802002, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.171562867215041e-07, |
|
"logits/generated": -2.0878078937530518, |
|
"logits/real": -1.9517886638641357, |
|
"logps/generated": -348.5774230957031, |
|
"logps/real": -299.02374267578125, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.685237884521484, |
|
"rewards/margins": 19.952234268188477, |
|
"rewards/real": -4.733007431030273, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1598119858989422e-07, |
|
"logits/generated": -2.1011247634887695, |
|
"logits/real": -1.9725592136383057, |
|
"logps/generated": -357.71051025390625, |
|
"logps/real": -290.8995056152344, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.373262405395508, |
|
"rewards/margins": 20.132232666015625, |
|
"rewards/real": -5.241027355194092, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1480611045828437e-07, |
|
"logits/generated": -2.1049911975860596, |
|
"logits/real": -1.9649174213409424, |
|
"logps/generated": -385.41241455078125, |
|
"logps/real": -274.30303955078125, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.450220108032227, |
|
"rewards/margins": 21.493106842041016, |
|
"rewards/real": -5.957114219665527, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.136310223266745e-07, |
|
"logits/generated": -2.1464877128601074, |
|
"logits/real": -1.9520257711410522, |
|
"logps/generated": -358.3988342285156, |
|
"logps/real": -258.34478759765625, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.753520965576172, |
|
"rewards/margins": 20.76874351501465, |
|
"rewards/real": -4.984780311584473, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.124559341950646e-07, |
|
"logits/generated": -2.197218418121338, |
|
"logits/real": -2.0531296730041504, |
|
"logps/generated": -342.52374267578125, |
|
"logps/real": -299.6061096191406, |
|
"loss": 0.0158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.946630477905273, |
|
"rewards/margins": 19.103984832763672, |
|
"rewards/real": -3.842647075653076, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1128084606345475e-07, |
|
"logits/generated": -2.251016139984131, |
|
"logits/real": -2.0284006595611572, |
|
"logps/generated": -350.19488525390625, |
|
"logps/real": -281.47698974609375, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.785280227661133, |
|
"rewards/margins": 19.8306827545166, |
|
"rewards/real": -4.9545979499816895, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.1010575793184488e-07, |
|
"logits/generated": -2.2240631580352783, |
|
"logits/real": -2.088017702102661, |
|
"logps/generated": -347.8515319824219, |
|
"logps/real": -317.2982177734375, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.338415145874023, |
|
"rewards/margins": 19.154300689697266, |
|
"rewards/real": -4.184115409851074, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.08930669800235e-07, |
|
"logits/generated": -2.1802897453308105, |
|
"logits/real": -2.0314040184020996, |
|
"logps/generated": -345.7802429199219, |
|
"logps/real": -295.5107421875, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.624263763427734, |
|
"rewards/margins": 18.748220443725586, |
|
"rewards/real": -4.876040458679199, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0775558166862514e-07, |
|
"logits/generated": -2.257678270339966, |
|
"logits/real": -2.0878357887268066, |
|
"logps/generated": -353.88946533203125, |
|
"logps/real": -299.9291687011719, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.746679306030273, |
|
"rewards/margins": 19.866544723510742, |
|
"rewards/real": -4.880134582519531, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0658049353701526e-07, |
|
"logits/generated": -2.3007264137268066, |
|
"logits/real": -2.110032320022583, |
|
"logps/generated": -344.11639404296875, |
|
"logps/real": -284.4814453125, |
|
"loss": 0.0182, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.620412826538086, |
|
"rewards/margins": 18.702817916870117, |
|
"rewards/real": -3.917595386505127, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.0540540540540542e-07, |
|
"logits/generated": -2.27628755569458, |
|
"logits/real": -2.1205649375915527, |
|
"logps/generated": -329.26556396484375, |
|
"logps/real": -263.96136474609375, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.701011657714844, |
|
"rewards/margins": 19.02513313293457, |
|
"rewards/real": -3.675877332687378, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0423031727379552e-07, |
|
"logits/generated": -2.3053290843963623, |
|
"logits/real": -2.0642311573028564, |
|
"logps/generated": -334.71331787109375, |
|
"logps/real": -276.95147705078125, |
|
"loss": 0.0118, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.66802215576172, |
|
"rewards/margins": 18.36069107055664, |
|
"rewards/real": -4.30733585357666, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0305522914218565e-07, |
|
"logits/generated": -2.26841402053833, |
|
"logits/real": -2.039511203765869, |
|
"logps/generated": -342.7603759765625, |
|
"logps/real": -269.6524963378906, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.502750396728516, |
|
"rewards/margins": 18.23208236694336, |
|
"rewards/real": -5.270669460296631, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.018801410105758e-07, |
|
"logits/generated": -2.2973265647888184, |
|
"logits/real": -2.1449685096740723, |
|
"logps/generated": -337.704345703125, |
|
"logps/real": -259.8895568847656, |
|
"loss": 0.009, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -23.042863845825195, |
|
"rewards/margins": 18.184226989746094, |
|
"rewards/real": -4.858633041381836, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.007050528789659e-07, |
|
"logits/generated": -2.286045551300049, |
|
"logits/real": -2.046161413192749, |
|
"logps/generated": -351.5479736328125, |
|
"logps/real": -288.12640380859375, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.482677459716797, |
|
"rewards/margins": 19.257810592651367, |
|
"rewards/real": -4.2248687744140625, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.9952996474735603e-07, |
|
"logits/generated": -2.2416133880615234, |
|
"logits/real": -2.0939717292785645, |
|
"logps/generated": -358.086669921875, |
|
"logps/real": -261.29815673828125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.18197250366211, |
|
"rewards/margins": 20.0886287689209, |
|
"rewards/real": -5.0933451652526855, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9835487661574619e-07, |
|
"logits/generated": -2.221487283706665, |
|
"logits/real": -2.0919318199157715, |
|
"logps/generated": -351.2422180175781, |
|
"logps/real": -277.42901611328125, |
|
"loss": 0.0187, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.407487869262695, |
|
"rewards/margins": 19.2531795501709, |
|
"rewards/real": -5.154306888580322, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.971797884841363e-07, |
|
"logits/generated": -2.1991019248962402, |
|
"logits/real": -2.0188393592834473, |
|
"logps/generated": -374.80938720703125, |
|
"logps/real": -256.5660705566406, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.856903076171875, |
|
"rewards/margins": 21.417938232421875, |
|
"rewards/real": -5.438962936401367, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9600470035252641e-07, |
|
"logits/generated": -2.1711456775665283, |
|
"logits/real": -2.041792392730713, |
|
"logps/generated": -346.64105224609375, |
|
"logps/real": -298.97564697265625, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.955524444580078, |
|
"rewards/margins": 19.128503799438477, |
|
"rewards/real": -4.827020645141602, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9482961222091657e-07, |
|
"logits/generated": -2.20910906791687, |
|
"logits/real": -2.0180556774139404, |
|
"logps/generated": -336.7091369628906, |
|
"logps/real": -261.6615295410156, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -24.166288375854492, |
|
"rewards/margins": 19.48313331604004, |
|
"rewards/real": -4.683154106140137, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.936545240893067e-07, |
|
"logits/generated": -2.190821409225464, |
|
"logits/real": -1.9756942987442017, |
|
"logps/generated": -337.36322021484375, |
|
"logps/real": -244.9541778564453, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -24.287092208862305, |
|
"rewards/margins": 19.412704467773438, |
|
"rewards/real": -4.874386787414551, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.924794359576968e-07, |
|
"logits/generated": -2.2208573818206787, |
|
"logits/real": -1.976784110069275, |
|
"logps/generated": -346.1979675292969, |
|
"logps/real": -275.9041442871094, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.10281753540039, |
|
"rewards/margins": 19.66716194152832, |
|
"rewards/real": -4.435654640197754, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9130434782608695e-07, |
|
"logits/generated": -2.166905403137207, |
|
"logits/real": -2.006438732147217, |
|
"logps/generated": -344.233154296875, |
|
"logps/real": -315.53936767578125, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.461597442626953, |
|
"rewards/margins": 19.2020263671875, |
|
"rewards/real": -4.259572505950928, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9012925969447708e-07, |
|
"logits/generated": -2.227118730545044, |
|
"logits/real": -2.0388541221618652, |
|
"logps/generated": -336.47052001953125, |
|
"logps/real": -268.1602783203125, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.303186416625977, |
|
"rewards/margins": 18.488948822021484, |
|
"rewards/real": -4.814234733581543, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8895417156286718e-07, |
|
"logits/generated": -2.1890883445739746, |
|
"logits/real": -2.04649019241333, |
|
"logps/generated": -353.79461669921875, |
|
"logps/real": -307.52423095703125, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.001995086669922, |
|
"rewards/margins": 19.549617767333984, |
|
"rewards/real": -4.452378749847412, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8777908343125734e-07, |
|
"logits/generated": -2.1731953620910645, |
|
"logits/real": -1.982879638671875, |
|
"logps/generated": -369.5123596191406, |
|
"logps/real": -300.3362121582031, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.723047256469727, |
|
"rewards/margins": 22.299365997314453, |
|
"rewards/real": -4.423679828643799, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8660399529964746e-07, |
|
"logits/generated": -2.1560447216033936, |
|
"logits/real": -2.040362596511841, |
|
"logps/generated": -342.84783935546875, |
|
"logps/real": -278.237548828125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.324447631835938, |
|
"rewards/margins": 19.210416793823242, |
|
"rewards/real": -5.114028453826904, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8542890716803762e-07, |
|
"logits/generated": -2.146101474761963, |
|
"logits/real": -2.0181069374084473, |
|
"logps/generated": -369.8932189941406, |
|
"logps/real": -274.1222839355469, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.28493309020996, |
|
"rewards/margins": 20.953115463256836, |
|
"rewards/real": -5.331815719604492, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.8425381903642772e-07, |
|
"logits/generated": -2.1590821743011475, |
|
"logits/real": -1.9754966497421265, |
|
"logps/generated": -386.6169738769531, |
|
"logps/real": -297.4996643066406, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.224761962890625, |
|
"rewards/margins": 21.103734970092773, |
|
"rewards/real": -5.121026039123535, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.8307873090481785e-07, |
|
"logits/generated": -2.1612915992736816, |
|
"logits/real": -1.959288239479065, |
|
"logps/generated": -363.1042785644531, |
|
"logps/real": -258.10174560546875, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -25.38741111755371, |
|
"rewards/margins": 20.513093948364258, |
|
"rewards/real": -4.874313831329346, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.81903642773208e-07, |
|
"logits/generated": -2.098233461380005, |
|
"logits/real": -1.9145100116729736, |
|
"logps/generated": -371.4063720703125, |
|
"logps/real": -243.34945678710938, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -27.423503875732422, |
|
"rewards/margins": 22.34528923034668, |
|
"rewards/real": -5.078217506408691, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.807285546415981e-07, |
|
"logits/generated": -2.1498446464538574, |
|
"logits/real": -1.9860435724258423, |
|
"logps/generated": -361.0787658691406, |
|
"logps/real": -271.49951171875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.6298885345459, |
|
"rewards/margins": 20.82622718811035, |
|
"rewards/real": -4.803657531738281, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7955346650998823e-07, |
|
"logits/generated": -2.120079278945923, |
|
"logits/real": -2.00551700592041, |
|
"logps/generated": -360.60699462890625, |
|
"logps/real": -287.0360107421875, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.6106014251709, |
|
"rewards/margins": 20.524362564086914, |
|
"rewards/real": -5.086238861083984, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7837837837837838e-07, |
|
"logits/generated": -2.1424782276153564, |
|
"logits/real": -1.9207490682601929, |
|
"logps/generated": -393.1436462402344, |
|
"logps/real": -282.522705078125, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.278972625732422, |
|
"rewards/margins": 22.59919548034668, |
|
"rewards/real": -5.679778099060059, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.772032902467685e-07, |
|
"logits/generated": -2.0800764560699463, |
|
"logits/real": -2.001923084259033, |
|
"logps/generated": -348.87725830078125, |
|
"logps/real": -298.7556457519531, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.367755889892578, |
|
"rewards/margins": 18.799022674560547, |
|
"rewards/real": -5.568735599517822, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7602820211515861e-07, |
|
"logits/generated": -2.1074421405792236, |
|
"logits/real": -2.005859375, |
|
"logps/generated": -356.49200439453125, |
|
"logps/real": -305.5742492675781, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.63992691040039, |
|
"rewards/margins": 19.582958221435547, |
|
"rewards/real": -5.056967735290527, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7485311398354877e-07, |
|
"logits/generated": -2.0994040966033936, |
|
"logits/real": -2.007887601852417, |
|
"logps/generated": -365.8727111816406, |
|
"logps/real": -317.9673156738281, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -25.743017196655273, |
|
"rewards/margins": 20.144943237304688, |
|
"rewards/real": -5.598074913024902, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.736780258519389e-07, |
|
"logits/generated": -2.0684878826141357, |
|
"logits/real": -2.0286765098571777, |
|
"logps/generated": -356.78631591796875, |
|
"logps/real": -321.638916015625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.5567626953125, |
|
"rewards/margins": 20.853425979614258, |
|
"rewards/real": -4.703336715698242, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.72502937720329e-07, |
|
"logits/generated": -2.081726312637329, |
|
"logits/real": -1.862388014793396, |
|
"logps/generated": -380.224853515625, |
|
"logps/real": -282.75091552734375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.150707244873047, |
|
"rewards/margins": 22.10866928100586, |
|
"rewards/real": -5.042040824890137, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7132784958871915e-07, |
|
"logits/generated": -2.0935699939727783, |
|
"logits/real": -1.931623101234436, |
|
"logps/generated": -376.69622802734375, |
|
"logps/real": -274.62872314453125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.69793128967285, |
|
"rewards/margins": 22.377689361572266, |
|
"rewards/real": -5.3202433586120605, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7015276145710928e-07, |
|
"logits/generated": -2.098092794418335, |
|
"logits/real": -1.9440534114837646, |
|
"logps/generated": -383.3734436035156, |
|
"logps/real": -311.0740661621094, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.666534423828125, |
|
"rewards/margins": 22.305049896240234, |
|
"rewards/real": -5.361481666564941, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6897767332549938e-07, |
|
"logits/generated": -2.1007964611053467, |
|
"logits/real": -1.9998801946640015, |
|
"logps/generated": -395.33355712890625, |
|
"logps/real": -310.078857421875, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -28.96969985961914, |
|
"rewards/margins": 24.22079086303711, |
|
"rewards/real": -4.748910903930664, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6780258519388953e-07, |
|
"logits/generated": -2.099114179611206, |
|
"logits/real": -1.94070303440094, |
|
"logps/generated": -377.9015197753906, |
|
"logps/real": -274.0469970703125, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.645259857177734, |
|
"rewards/margins": 22.4710750579834, |
|
"rewards/real": -5.174184322357178, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6662749706227966e-07, |
|
"logits/generated": -2.0564610958099365, |
|
"logits/real": -1.9704822301864624, |
|
"logps/generated": -345.58868408203125, |
|
"logps/real": -283.3597717285156, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -24.705806732177734, |
|
"rewards/margins": 20.07907485961914, |
|
"rewards/real": -4.626730918884277, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.654524089306698e-07, |
|
"logits/generated": -2.08727765083313, |
|
"logits/real": -1.8853626251220703, |
|
"logps/generated": -369.41058349609375, |
|
"logps/real": -255.86831665039062, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.35872459411621, |
|
"rewards/margins": 21.617361068725586, |
|
"rewards/real": -4.741362571716309, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6427732079905992e-07, |
|
"logits/generated": -1.9915502071380615, |
|
"logits/real": -1.8262687921524048, |
|
"logps/generated": -378.84918212890625, |
|
"logps/real": -285.84429931640625, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.37435531616211, |
|
"rewards/margins": 21.355920791625977, |
|
"rewards/real": -6.018433570861816, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6310223266745005e-07, |
|
"logits/generated": -2.0596718788146973, |
|
"logits/real": -1.914754867553711, |
|
"logps/generated": -378.89105224609375, |
|
"logps/real": -328.7146301269531, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.448780059814453, |
|
"rewards/margins": 21.816377639770508, |
|
"rewards/real": -5.6324052810668945, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.619271445358402e-07, |
|
"logits/generated": -2.040947198867798, |
|
"logits/real": -1.8854957818984985, |
|
"logps/generated": -413.3182067871094, |
|
"logps/real": -317.62567138671875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.338998794555664, |
|
"rewards/margins": 25.05344009399414, |
|
"rewards/real": -5.285560607910156, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.607520564042303e-07, |
|
"logits/generated": -2.012472152709961, |
|
"logits/real": -1.8490098714828491, |
|
"logps/generated": -397.6828918457031, |
|
"logps/real": -272.8224182128906, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.052814483642578, |
|
"rewards/margins": 23.327375411987305, |
|
"rewards/real": -5.72544002532959, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5957696827262043e-07, |
|
"logits/generated": -2.029719829559326, |
|
"logits/real": -1.8433088064193726, |
|
"logps/generated": -398.36297607421875, |
|
"logps/real": -265.16351318359375, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.465087890625, |
|
"rewards/margins": 23.546363830566406, |
|
"rewards/real": -5.918723106384277, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5840188014101058e-07, |
|
"logits/generated": -1.9572408199310303, |
|
"logits/real": -1.790989875793457, |
|
"logps/generated": -419.62109375, |
|
"logps/real": -249.32080078125, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.099082946777344, |
|
"rewards/margins": 25.258312225341797, |
|
"rewards/real": -6.840768337249756, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.572267920094007e-07, |
|
"logits/generated": -1.9638580083847046, |
|
"logits/real": -1.9275802373886108, |
|
"logps/generated": -400.74395751953125, |
|
"logps/real": -334.06036376953125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.58945083618164, |
|
"rewards/margins": 23.080732345581055, |
|
"rewards/real": -6.508718967437744, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.560517038777908e-07, |
|
"logits/generated": -2.0270586013793945, |
|
"logits/real": -1.8505998849868774, |
|
"logps/generated": -407.79156494140625, |
|
"logps/real": -288.97918701171875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.032482147216797, |
|
"rewards/margins": 25.01006507873535, |
|
"rewards/real": -5.0224175453186035, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5487661574618097e-07, |
|
"logits/generated": -2.016819477081299, |
|
"logits/real": -1.882765769958496, |
|
"logps/generated": -400.1058349609375, |
|
"logps/real": -288.342529296875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.87357521057129, |
|
"rewards/margins": 23.768878936767578, |
|
"rewards/real": -6.104691028594971, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.537015276145711e-07, |
|
"logits/generated": -1.9431253671646118, |
|
"logits/real": -1.842599868774414, |
|
"logps/generated": -386.32745361328125, |
|
"logps/real": -266.6620788574219, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.412429809570312, |
|
"rewards/margins": 22.158546447753906, |
|
"rewards/real": -6.253881454467773, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.525264394829612e-07, |
|
"logits/generated": -1.9972089529037476, |
|
"logits/real": -1.828897476196289, |
|
"logps/generated": -403.88665771484375, |
|
"logps/real": -264.700927734375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.998998641967773, |
|
"rewards/margins": 23.43665885925293, |
|
"rewards/real": -5.5623393058776855, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5135135135135135e-07, |
|
"logits/generated": -1.9987987279891968, |
|
"logits/real": -1.8746980428695679, |
|
"logps/generated": -408.3876037597656, |
|
"logps/real": -303.7450256347656, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.954736709594727, |
|
"rewards/margins": 23.751495361328125, |
|
"rewards/real": -6.203239440917969, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5017626321974148e-07, |
|
"logits/generated": -1.9847347736358643, |
|
"logits/real": -1.795657753944397, |
|
"logps/generated": -410.84539794921875, |
|
"logps/real": -282.5588684082031, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.85659408569336, |
|
"rewards/margins": 24.667091369628906, |
|
"rewards/real": -6.189502239227295, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4900117508813158e-07, |
|
"logits/generated": -1.9983108043670654, |
|
"logits/real": -1.780178427696228, |
|
"logps/generated": -420.0068359375, |
|
"logps/real": -287.4795227050781, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -30.99736976623535, |
|
"rewards/margins": 24.340909957885742, |
|
"rewards/real": -6.656461238861084, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4782608695652173e-07, |
|
"logits/generated": -2.0104641914367676, |
|
"logits/real": -1.8685280084609985, |
|
"logps/generated": -394.21917724609375, |
|
"logps/real": -291.6011962890625, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -29.079885482788086, |
|
"rewards/margins": 22.35186767578125, |
|
"rewards/real": -6.728022575378418, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.4665099882491186e-07, |
|
"logits/generated": -1.9756362438201904, |
|
"logits/real": -1.8447771072387695, |
|
"logps/generated": -391.41680908203125, |
|
"logps/real": -349.69464111328125, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.011831283569336, |
|
"rewards/margins": 22.25851058959961, |
|
"rewards/real": -5.753323554992676, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.45475910693302e-07, |
|
"logits/generated": -1.9767194986343384, |
|
"logits/real": -1.779309630393982, |
|
"logps/generated": -391.83380126953125, |
|
"logps/real": -278.58905029296875, |
|
"loss": 0.005, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -28.746475219726562, |
|
"rewards/margins": 22.14328384399414, |
|
"rewards/real": -6.603189945220947, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4430082256169212e-07, |
|
"logits/generated": -1.9377864599227905, |
|
"logits/real": -1.8163446187973022, |
|
"logps/generated": -418.55731201171875, |
|
"logps/real": -318.5293273925781, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.8940486907959, |
|
"rewards/margins": 25.708526611328125, |
|
"rewards/real": -5.18552303314209, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4312573443008224e-07, |
|
"logits/generated": -1.9508612155914307, |
|
"logits/real": -1.8122934103012085, |
|
"logps/generated": -394.3963928222656, |
|
"logps/real": -335.4398498535156, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -28.22671127319336, |
|
"rewards/margins": 22.93834114074707, |
|
"rewards/real": -5.288372993469238, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4195064629847237e-07, |
|
"logits/generated": -1.9361099004745483, |
|
"logits/real": -1.7594772577285767, |
|
"logps/generated": -423.53515625, |
|
"logps/real": -293.36834716796875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.2425479888916, |
|
"rewards/margins": 25.244579315185547, |
|
"rewards/real": -5.9979681968688965, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.407755581668625e-07, |
|
"logits/generated": -1.9259124994277954, |
|
"logits/real": -1.7739746570587158, |
|
"logps/generated": -394.9298095703125, |
|
"logps/real": -288.7347412109375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.674121856689453, |
|
"rewards/margins": 22.565471649169922, |
|
"rewards/real": -6.108652114868164, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3960047003525263e-07, |
|
"logits/generated": -1.945650339126587, |
|
"logits/real": -1.7504583597183228, |
|
"logps/generated": -425.6507263183594, |
|
"logps/real": -254.51651000976562, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.8286190032959, |
|
"rewards/margins": 24.612564086914062, |
|
"rewards/real": -6.216059684753418, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3842538190364278e-07, |
|
"logits/generated": -1.948203444480896, |
|
"logits/real": -1.8336362838745117, |
|
"logps/generated": -414.30035400390625, |
|
"logps/real": -326.8367614746094, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.730371475219727, |
|
"rewards/margins": 23.398197174072266, |
|
"rewards/real": -6.332175254821777, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.372502937720329e-07, |
|
"logits/generated": -2.027785539627075, |
|
"logits/real": -1.9192225933074951, |
|
"logps/generated": -383.18121337890625, |
|
"logps/real": -313.5802307128906, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.630611419677734, |
|
"rewards/margins": 22.166791915893555, |
|
"rewards/real": -4.463819980621338, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.36075205640423e-07, |
|
"logits/generated": -1.9468467235565186, |
|
"logits/real": -1.8661794662475586, |
|
"logps/generated": -399.8686828613281, |
|
"logps/real": -270.86627197265625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.764408111572266, |
|
"rewards/margins": 23.469295501708984, |
|
"rewards/real": -5.295111179351807, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3490011750881317e-07, |
|
"logits/generated": -2.049177408218384, |
|
"logits/real": -1.8789154291152954, |
|
"logps/generated": -396.53472900390625, |
|
"logps/real": -308.8271484375, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.53740882873535, |
|
"rewards/margins": 24.08509063720703, |
|
"rewards/real": -4.4523186683654785, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.337250293772033e-07, |
|
"logits/generated": -2.0029730796813965, |
|
"logits/real": -1.934480905532837, |
|
"logps/generated": -374.8182067871094, |
|
"logps/real": -322.31915283203125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.162517547607422, |
|
"rewards/margins": 20.880863189697266, |
|
"rewards/real": -5.281655788421631, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.325499412455934e-07, |
|
"logits/generated": -1.9719984531402588, |
|
"logits/real": -1.801230788230896, |
|
"logps/generated": -394.8279724121094, |
|
"logps/real": -306.5439453125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -28.897329330444336, |
|
"rewards/margins": 23.881816864013672, |
|
"rewards/real": -5.015511512756348, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3137485311398355e-07, |
|
"logits/generated": -1.9651342630386353, |
|
"logits/real": -1.8825359344482422, |
|
"logps/generated": -364.9615478515625, |
|
"logps/real": -305.80230712890625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -25.72686195373535, |
|
"rewards/margins": 20.991674423217773, |
|
"rewards/real": -4.735187530517578, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.3019976498237368e-07, |
|
"logits/generated": -2.0056333541870117, |
|
"logits/real": -1.8786531686782837, |
|
"logps/generated": -405.5434265136719, |
|
"logps/real": -294.9462585449219, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.38471031188965, |
|
"rewards/margins": 24.157129287719727, |
|
"rewards/real": -5.2275800704956055, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.290246768507638e-07, |
|
"logits/generated": -1.947729468345642, |
|
"logits/real": -1.8916778564453125, |
|
"logps/generated": -375.58026123046875, |
|
"logps/real": -303.40283203125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -26.317413330078125, |
|
"rewards/margins": 21.212438583374023, |
|
"rewards/real": -5.104973793029785, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2784958871915393e-07, |
|
"logits/generated": -1.9016625881195068, |
|
"logits/real": -1.7552967071533203, |
|
"logps/generated": -396.3576965332031, |
|
"logps/real": -253.3724822998047, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.431873321533203, |
|
"rewards/margins": 23.697856903076172, |
|
"rewards/real": -5.734013080596924, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2667450058754406e-07, |
|
"logits/generated": -1.9619117975234985, |
|
"logits/real": -1.8128383159637451, |
|
"logps/generated": -364.29071044921875, |
|
"logps/real": -247.4691619873047, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -26.770038604736328, |
|
"rewards/margins": 21.507999420166016, |
|
"rewards/real": -5.262039661407471, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.254994124559342e-07, |
|
"logits/generated": -1.9781490564346313, |
|
"logits/real": -1.8369346857070923, |
|
"logps/generated": -394.2939453125, |
|
"logps/real": -295.5604553222656, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.346759796142578, |
|
"rewards/margins": 23.18613624572754, |
|
"rewards/real": -5.160625457763672, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2432432432432432e-07, |
|
"logits/generated": -1.9349294900894165, |
|
"logits/real": -1.8810224533081055, |
|
"logps/generated": -378.1846008300781, |
|
"logps/real": -301.382080078125, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -26.532123565673828, |
|
"rewards/margins": 20.59892463684082, |
|
"rewards/real": -5.933199882507324, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2314923619271444e-07, |
|
"logits/generated": -1.957461953163147, |
|
"logits/real": -1.8917419910430908, |
|
"logps/generated": -388.87432861328125, |
|
"logps/real": -350.3866271972656, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.713695526123047, |
|
"rewards/margins": 22.36217498779297, |
|
"rewards/real": -5.3515214920043945, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2197414806110457e-07, |
|
"logits/generated": -1.8699737787246704, |
|
"logits/real": -1.782846212387085, |
|
"logps/generated": -400.0876770019531, |
|
"logps/real": -277.35076904296875, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.43486976623535, |
|
"rewards/margins": 23.6584529876709, |
|
"rewards/real": -5.776413917541504, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.207990599294947e-07, |
|
"logits/generated": -1.9608417749404907, |
|
"logits/real": -1.8951002359390259, |
|
"logps/generated": -411.26190185546875, |
|
"logps/real": -317.6622009277344, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.566543579101562, |
|
"rewards/margins": 23.481670379638672, |
|
"rewards/real": -6.084873676300049, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1962397179788483e-07, |
|
"logits/generated": -1.9031436443328857, |
|
"logits/real": -1.8433917760849, |
|
"logps/generated": -397.88836669921875, |
|
"logps/real": -258.14495849609375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.519550323486328, |
|
"rewards/margins": 22.443387985229492, |
|
"rewards/real": -7.076161861419678, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1844888366627497e-07, |
|
"logits/generated": -1.9236103296279907, |
|
"logits/real": -1.829207181930542, |
|
"logps/generated": -396.1493225097656, |
|
"logps/real": -270.80133056640625, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -29.64163589477539, |
|
"rewards/margins": 23.829097747802734, |
|
"rewards/real": -5.8125386238098145, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.172737955346651e-07, |
|
"logits/generated": -1.953190565109253, |
|
"logits/real": -1.8170249462127686, |
|
"logps/generated": -422.35650634765625, |
|
"logps/real": -285.5227966308594, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.606027603149414, |
|
"rewards/margins": 25.138118743896484, |
|
"rewards/real": -6.467909336090088, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1609870740305522e-07, |
|
"logits/generated": -1.9567396640777588, |
|
"logits/real": -1.8386499881744385, |
|
"logps/generated": -411.81378173828125, |
|
"logps/real": -301.4488830566406, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.37715721130371, |
|
"rewards/margins": 24.879119873046875, |
|
"rewards/real": -5.498034477233887, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1492361927144535e-07, |
|
"logits/generated": -1.8939313888549805, |
|
"logits/real": -1.7057090997695923, |
|
"logps/generated": -398.8463439941406, |
|
"logps/real": -330.5086364746094, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.181045532226562, |
|
"rewards/margins": 23.68384552001953, |
|
"rewards/real": -5.497200965881348, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1374853113983548e-07, |
|
"logits/generated": -1.9329249858856201, |
|
"logits/real": -1.820165991783142, |
|
"logps/generated": -422.8020935058594, |
|
"logps/real": -280.1080627441406, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -30.731647491455078, |
|
"rewards/margins": 24.493200302124023, |
|
"rewards/real": -6.238447666168213, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1257344300822562e-07, |
|
"logits/generated": -1.8722028732299805, |
|
"logits/real": -1.7556512355804443, |
|
"logps/generated": -412.02276611328125, |
|
"logps/real": -309.0514831542969, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.253271102905273, |
|
"rewards/margins": 24.02978515625, |
|
"rewards/real": -6.223486423492432, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1139835487661573e-07, |
|
"logits/generated": -1.808394193649292, |
|
"logits/real": -1.8020694255828857, |
|
"logps/generated": -403.3237609863281, |
|
"logps/real": -283.5267333984375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.11172866821289, |
|
"rewards/margins": 23.824481964111328, |
|
"rewards/real": -6.287243843078613, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1022326674500588e-07, |
|
"logits/generated": -1.7356446981430054, |
|
"logits/real": -1.7262321710586548, |
|
"logps/generated": -414.942626953125, |
|
"logps/real": -302.17474365234375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.184499740600586, |
|
"rewards/margins": 25.247180938720703, |
|
"rewards/real": -5.937318801879883, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.09048178613396e-07, |
|
"logits/generated": -1.8033393621444702, |
|
"logits/real": -1.7795436382293701, |
|
"logps/generated": -420.39971923828125, |
|
"logps/real": -293.73248291015625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.607690811157227, |
|
"rewards/margins": 24.23845672607422, |
|
"rewards/real": -6.369235992431641, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0787309048178613e-07, |
|
"logits/generated": -1.8363456726074219, |
|
"logits/real": -1.8129303455352783, |
|
"logps/generated": -397.9834289550781, |
|
"logps/real": -297.1370544433594, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -28.858022689819336, |
|
"rewards/margins": 22.88436508178711, |
|
"rewards/real": -5.973654747009277, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0669800235017626e-07, |
|
"logits/generated": -1.8176523447036743, |
|
"logits/real": -1.774566888809204, |
|
"logps/generated": -408.4759826660156, |
|
"logps/real": -288.6650390625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.875314712524414, |
|
"rewards/margins": 23.64436912536621, |
|
"rewards/real": -6.230945110321045, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0552291421856639e-07, |
|
"logits/generated": -1.7467482089996338, |
|
"logits/real": -1.7439444065093994, |
|
"logps/generated": -422.25213623046875, |
|
"logps/real": -269.0238037109375, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -31.126392364501953, |
|
"rewards/margins": 24.541561126708984, |
|
"rewards/real": -6.584831237792969, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0434782608695651e-07, |
|
"logits/generated": -1.732426404953003, |
|
"logits/real": -1.7286033630371094, |
|
"logps/generated": -398.12841796875, |
|
"logps/real": -310.280029296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.06282615661621, |
|
"rewards/margins": 22.96421241760254, |
|
"rewards/real": -6.098611354827881, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0317273795534664e-07, |
|
"logits/generated": -1.806668996810913, |
|
"logits/real": -1.7185096740722656, |
|
"logps/generated": -387.23248291015625, |
|
"logps/real": -307.28485107421875, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -28.445404052734375, |
|
"rewards/margins": 22.187774658203125, |
|
"rewards/real": -6.25762939453125, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0199764982373678e-07, |
|
"logits/generated": -1.764177680015564, |
|
"logits/real": -1.779284119606018, |
|
"logps/generated": -427.274658203125, |
|
"logps/real": -308.47650146484375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -31.830997467041016, |
|
"rewards/margins": 24.836002349853516, |
|
"rewards/real": -6.994994163513184, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0082256169212691e-07, |
|
"logits/generated": -1.8138262033462524, |
|
"logits/real": -1.734763741493225, |
|
"logps/generated": -422.82421875, |
|
"logps/real": -307.7763671875, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.90139389038086, |
|
"rewards/margins": 25.225627899169922, |
|
"rewards/real": -6.675767421722412, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.964747356051703e-08, |
|
"logits/generated": -1.687376618385315, |
|
"logits/real": -1.636805534362793, |
|
"logps/generated": -409.3494567871094, |
|
"logps/real": -281.611328125, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.65591049194336, |
|
"rewards/margins": 24.733543395996094, |
|
"rewards/real": -6.922367095947266, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.847238542890717e-08, |
|
"logits/generated": -1.6962316036224365, |
|
"logits/real": -1.7696120738983154, |
|
"logps/generated": -443.807373046875, |
|
"logps/real": -335.1094970703125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.494110107421875, |
|
"rewards/margins": 26.92751693725586, |
|
"rewards/real": -6.566593170166016, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.72972972972973e-08, |
|
"logits/generated": -1.7020161151885986, |
|
"logits/real": -1.681674599647522, |
|
"logps/generated": -434.5680236816406, |
|
"logps/real": -280.8212585449219, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.23873519897461, |
|
"rewards/margins": 26.639150619506836, |
|
"rewards/real": -6.599585056304932, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.612220916568742e-08, |
|
"logits/generated": -1.7404295206069946, |
|
"logits/real": -1.73974609375, |
|
"logps/generated": -434.4542541503906, |
|
"logps/real": -315.81671142578125, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.91022300720215, |
|
"rewards/margins": 25.870519638061523, |
|
"rewards/real": -6.039700508117676, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.494712103407755e-08, |
|
"logits/generated": -1.7896816730499268, |
|
"logits/real": -1.7941644191741943, |
|
"logps/generated": -401.29730224609375, |
|
"logps/real": -345.435302734375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.499197006225586, |
|
"rewards/margins": 22.433208465576172, |
|
"rewards/real": -7.065989017486572, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.377203290246769e-08, |
|
"logits/generated": -1.8484700918197632, |
|
"logits/real": -1.916161298751831, |
|
"logps/generated": -363.75433349609375, |
|
"logps/real": -331.8744201660156, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -25.15157699584961, |
|
"rewards/margins": 19.572826385498047, |
|
"rewards/real": -5.578749656677246, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.25969447708578e-08, |
|
"logits/generated": -1.7649660110473633, |
|
"logits/real": -1.7312488555908203, |
|
"logps/generated": -420.1351623535156, |
|
"logps/real": -292.35882568359375, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.093624114990234, |
|
"rewards/margins": 24.103261947631836, |
|
"rewards/real": -6.990364074707031, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.142185663924793e-08, |
|
"logits/generated": -1.7428464889526367, |
|
"logits/real": -1.7745319604873657, |
|
"logps/generated": -418.396484375, |
|
"logps/real": -293.8400573730469, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.799205780029297, |
|
"rewards/margins": 24.954181671142578, |
|
"rewards/real": -6.845028877258301, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.024676850763807e-08, |
|
"logits/generated": -1.7720359563827515, |
|
"logits/real": -1.7069841623306274, |
|
"logps/generated": -431.8590393066406, |
|
"logps/real": -297.2078552246094, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.12538528442383, |
|
"rewards/margins": 26.22249984741211, |
|
"rewards/real": -5.902883529663086, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.90716803760282e-08, |
|
"logits/generated": -1.7318195104599, |
|
"logits/real": -1.752964735031128, |
|
"logps/generated": -418.92169189453125, |
|
"logps/real": -314.60302734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.43752670288086, |
|
"rewards/margins": 25.329748153686523, |
|
"rewards/real": -6.1077775955200195, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.789659224441833e-08, |
|
"logits/generated": -1.7232677936553955, |
|
"logits/real": -1.6864397525787354, |
|
"logps/generated": -420.5328674316406, |
|
"logps/real": -290.80902099609375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -32.34931564331055, |
|
"rewards/margins": 26.023815155029297, |
|
"rewards/real": -6.325500011444092, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.672150411280846e-08, |
|
"logits/generated": -1.7739341259002686, |
|
"logits/real": -1.8019405603408813, |
|
"logps/generated": -415.83062744140625, |
|
"logps/real": -326.75469970703125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.948598861694336, |
|
"rewards/margins": 25.07623291015625, |
|
"rewards/real": -5.872367858886719, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.554641598119859e-08, |
|
"logits/generated": -1.8021968603134155, |
|
"logits/real": -1.7830060720443726, |
|
"logps/generated": -425.39697265625, |
|
"logps/real": -320.0240478515625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.33600425720215, |
|
"rewards/margins": 25.411022186279297, |
|
"rewards/real": -5.924983024597168, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.437132784958871e-08, |
|
"logits/generated": -1.7438474893569946, |
|
"logits/real": -1.7602970600128174, |
|
"logps/generated": -406.46856689453125, |
|
"logps/real": -296.0639343261719, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.566293716430664, |
|
"rewards/margins": 23.20583152770996, |
|
"rewards/real": -7.360462188720703, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.319623971797884e-08, |
|
"logits/generated": -1.8322248458862305, |
|
"logits/real": -1.86432683467865, |
|
"logps/generated": -406.3897705078125, |
|
"logps/real": -310.910400390625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.910282135009766, |
|
"rewards/margins": 23.635074615478516, |
|
"rewards/real": -6.275205612182617, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.202115158636898e-08, |
|
"logits/generated": -1.7475402355194092, |
|
"logits/real": -1.744633674621582, |
|
"logps/generated": -435.2589416503906, |
|
"logps/real": -313.73150634765625, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.4385986328125, |
|
"rewards/margins": 25.712047576904297, |
|
"rewards/real": -6.7265496253967285, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.08460634547591e-08, |
|
"logits/generated": -1.7663837671279907, |
|
"logits/real": -1.74166738986969, |
|
"logps/generated": -437.02069091796875, |
|
"logps/real": -333.59344482421875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.60531997680664, |
|
"rewards/margins": 25.910425186157227, |
|
"rewards/real": -6.6948981285095215, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.967097532314922e-08, |
|
"logits/generated": -1.7162328958511353, |
|
"logits/real": -1.7188522815704346, |
|
"logps/generated": -453.78546142578125, |
|
"logps/real": -318.1512145996094, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -34.322872161865234, |
|
"rewards/margins": 27.458202362060547, |
|
"rewards/real": -6.8646674156188965, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.849588719153937e-08, |
|
"logits/generated": -1.7017135620117188, |
|
"logits/real": -1.7324234247207642, |
|
"logps/generated": -450.5570373535156, |
|
"logps/real": -289.6382141113281, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.93928527832031, |
|
"rewards/margins": 27.395299911499023, |
|
"rewards/real": -6.54398250579834, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.73207990599295e-08, |
|
"logits/generated": -1.6585533618927002, |
|
"logits/real": -1.634086012840271, |
|
"logps/generated": -448.7232360839844, |
|
"logps/real": -277.66802978515625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.147247314453125, |
|
"rewards/margins": 26.556921005249023, |
|
"rewards/real": -7.590325355529785, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.614571092831962e-08, |
|
"logits/generated": -1.621957778930664, |
|
"logits/real": -1.6922699213027954, |
|
"logps/generated": -462.8789978027344, |
|
"logps/real": -305.51947021484375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -35.89826202392578, |
|
"rewards/margins": 28.3109188079834, |
|
"rewards/real": -7.587339878082275, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.497062279670975e-08, |
|
"logits/generated": -1.6756019592285156, |
|
"logits/real": -1.705483078956604, |
|
"logps/generated": -430.5467224121094, |
|
"logps/real": -286.50982666015625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.59349060058594, |
|
"rewards/margins": 24.432825088500977, |
|
"rewards/real": -8.160660743713379, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.379553466509989e-08, |
|
"logits/generated": -1.6760860681533813, |
|
"logits/real": -1.7032572031021118, |
|
"logps/generated": -439.35498046875, |
|
"logps/real": -293.1243591308594, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.49091339111328, |
|
"rewards/margins": 26.223369598388672, |
|
"rewards/real": -7.267544746398926, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.262044653349e-08, |
|
"logits/generated": -1.6767972707748413, |
|
"logits/real": -1.6770479679107666, |
|
"logps/generated": -453.43463134765625, |
|
"logps/real": -309.2659912109375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.969093322753906, |
|
"rewards/margins": 27.015491485595703, |
|
"rewards/real": -7.953605651855469, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.144535840188013e-08, |
|
"logits/generated": -1.6518837213516235, |
|
"logits/real": -1.661682367324829, |
|
"logps/generated": -444.19732666015625, |
|
"logps/real": -248.5184783935547, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.29254150390625, |
|
"rewards/margins": 27.253955841064453, |
|
"rewards/real": -7.0385894775390625, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.027027027027027e-08, |
|
"logits/generated": -1.6208521127700806, |
|
"logits/real": -1.684266448020935, |
|
"logps/generated": -450.17913818359375, |
|
"logps/real": -303.62945556640625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.04570388793945, |
|
"rewards/margins": 27.82986831665039, |
|
"rewards/real": -6.215835094451904, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.909518213866039e-08, |
|
"logits/generated": -1.7041343450546265, |
|
"logits/real": -1.7098076343536377, |
|
"logps/generated": -449.44219970703125, |
|
"logps/real": -314.94830322265625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.25445556640625, |
|
"rewards/margins": 28.510257720947266, |
|
"rewards/real": -5.74419641494751, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.792009400705053e-08, |
|
"logits/generated": -1.6410897970199585, |
|
"logits/real": -1.6358200311660767, |
|
"logps/generated": -433.7411193847656, |
|
"logps/real": -287.76861572265625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.50247573852539, |
|
"rewards/margins": 26.161731719970703, |
|
"rewards/real": -7.340744972229004, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.674500587544066e-08, |
|
"logits/generated": -1.6944376230239868, |
|
"logits/real": -1.8017114400863647, |
|
"logps/generated": -453.3190002441406, |
|
"logps/real": -327.5594787597656, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.635292053222656, |
|
"rewards/margins": 28.087554931640625, |
|
"rewards/real": -5.547739505767822, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.556991774383078e-08, |
|
"logits/generated": -1.7940804958343506, |
|
"logits/real": -1.7683172225952148, |
|
"logps/generated": -417.69122314453125, |
|
"logps/real": -273.3849792480469, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.897634506225586, |
|
"rewards/margins": 23.836702346801758, |
|
"rewards/real": -7.0609331130981445, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.439482961222091e-08, |
|
"logits/generated": -1.7380746603012085, |
|
"logits/real": -1.753003716468811, |
|
"logps/generated": -404.82989501953125, |
|
"logps/real": -300.6372985839844, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.634296417236328, |
|
"rewards/margins": 23.29750633239746, |
|
"rewards/real": -6.336789131164551, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.321974148061104e-08, |
|
"logits/generated": -1.6851911544799805, |
|
"logits/real": -1.7241060733795166, |
|
"logps/generated": -400.8048400878906, |
|
"logps/real": -284.01654052734375, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -30.020034790039062, |
|
"rewards/margins": 23.252304077148438, |
|
"rewards/real": -6.76772928237915, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.204465334900117e-08, |
|
"logits/generated": -1.7801281213760376, |
|
"logits/real": -1.757208228111267, |
|
"logps/generated": -437.983154296875, |
|
"logps/real": -300.23931884765625, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.8565731048584, |
|
"rewards/margins": 25.158836364746094, |
|
"rewards/real": -6.697731018066406, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.086956521739131e-08, |
|
"logits/generated": -1.659287691116333, |
|
"logits/real": -1.7502975463867188, |
|
"logps/generated": -430.50958251953125, |
|
"logps/real": -319.3751525878906, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.70566177368164, |
|
"rewards/margins": 25.877904891967773, |
|
"rewards/real": -5.827755928039551, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.969447708578144e-08, |
|
"logits/generated": -1.6172062158584595, |
|
"logits/real": -1.6674178838729858, |
|
"logps/generated": -425.08648681640625, |
|
"logps/real": -290.21990966796875, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.34017562866211, |
|
"rewards/margins": 26.0834903717041, |
|
"rewards/real": -6.256686210632324, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.851938895417156e-08, |
|
"logits/generated": -1.6710926294326782, |
|
"logits/real": -1.6580989360809326, |
|
"logps/generated": -428.62164306640625, |
|
"logps/real": -274.07098388671875, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.238311767578125, |
|
"rewards/margins": 25.74837303161621, |
|
"rewards/real": -6.489937782287598, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.734430082256169e-08, |
|
"logits/generated": -1.673111915588379, |
|
"logits/real": -1.718198537826538, |
|
"logps/generated": -420.06060791015625, |
|
"logps/real": -344.5765075683594, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.972484588623047, |
|
"rewards/margins": 24.510467529296875, |
|
"rewards/real": -6.4620161056518555, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.616921269095182e-08, |
|
"logits/generated": -1.708108901977539, |
|
"logits/real": -1.7552868127822876, |
|
"logps/generated": -416.3793029785156, |
|
"logps/real": -286.037841796875, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -31.67416000366211, |
|
"rewards/margins": 25.777502059936523, |
|
"rewards/real": -5.896656513214111, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.499412455934195e-08, |
|
"logits/generated": -1.621158242225647, |
|
"logits/real": -1.6797363758087158, |
|
"logps/generated": -387.11407470703125, |
|
"logps/real": -277.28900146484375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -29.1168155670166, |
|
"rewards/margins": 21.60858917236328, |
|
"rewards/real": -7.5082268714904785, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.3819036427732076e-08, |
|
"logits/generated": -1.6894304752349854, |
|
"logits/real": -1.7261028289794922, |
|
"logps/generated": -456.5179138183594, |
|
"logps/real": -290.7882995605469, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.7430534362793, |
|
"rewards/margins": 27.435291290283203, |
|
"rewards/real": -7.307762145996094, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.2643948296122204e-08, |
|
"logits/generated": -1.7677587270736694, |
|
"logits/real": -1.7243268489837646, |
|
"logps/generated": -460.8067932128906, |
|
"logps/real": -278.5439758300781, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -35.61956024169922, |
|
"rewards/margins": 28.052993774414062, |
|
"rewards/real": -7.566567420959473, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.146886016451234e-08, |
|
"logits/generated": -1.6441065073013306, |
|
"logits/real": -1.6688801050186157, |
|
"logps/generated": -410.22320556640625, |
|
"logps/real": -287.65997314453125, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -30.20147705078125, |
|
"rewards/margins": 23.18656349182129, |
|
"rewards/real": -7.014913082122803, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.0293772032902466e-08, |
|
"logits/generated": -1.5864546298980713, |
|
"logits/real": -1.6448328495025635, |
|
"logps/generated": -457.1094665527344, |
|
"logps/real": -299.36285400390625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.83979034423828, |
|
"rewards/margins": 27.62546157836914, |
|
"rewards/real": -7.214327335357666, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.91186839012926e-08, |
|
"logits/generated": -1.6366796493530273, |
|
"logits/real": -1.6697746515274048, |
|
"logps/generated": -467.0208435058594, |
|
"logps/real": -320.2940673828125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.921897888183594, |
|
"rewards/margins": 27.745059967041016, |
|
"rewards/real": -7.176837921142578, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.794359576968272e-08, |
|
"logits/generated": -1.6946895122528076, |
|
"logits/real": -1.711517095565796, |
|
"logps/generated": -453.2867126464844, |
|
"logps/real": -328.2344055175781, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.175537109375, |
|
"rewards/margins": 26.651561737060547, |
|
"rewards/real": -7.5239739418029785, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.676850763807285e-08, |
|
"logits/generated": -1.703352928161621, |
|
"logits/real": -1.7690811157226562, |
|
"logps/generated": -428.7987365722656, |
|
"logps/real": -362.03753662109375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.37459945678711, |
|
"rewards/margins": 24.011770248413086, |
|
"rewards/real": -7.362831115722656, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.5593419506462984e-08, |
|
"logits/generated": -1.661341667175293, |
|
"logits/real": -1.7016382217407227, |
|
"logps/generated": -447.7108459472656, |
|
"logps/real": -289.16058349609375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.20888137817383, |
|
"rewards/margins": 26.738143920898438, |
|
"rewards/real": -7.47074031829834, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.441833137485311e-08, |
|
"logits/generated": -1.672273874282837, |
|
"logits/real": -1.7239618301391602, |
|
"logps/generated": -412.23162841796875, |
|
"logps/real": -278.4026184082031, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.304784774780273, |
|
"rewards/margins": 23.998537063598633, |
|
"rewards/real": -7.306250095367432, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.3243243243243246e-08, |
|
"logits/generated": -1.6548233032226562, |
|
"logits/real": -1.6878429651260376, |
|
"logps/generated": -458.45648193359375, |
|
"logps/real": -306.944580078125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.853546142578125, |
|
"rewards/margins": 26.48433494567871, |
|
"rewards/real": -7.369210720062256, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.206815511163337e-08, |
|
"logits/generated": -1.7220999002456665, |
|
"logits/real": -1.7197761535644531, |
|
"logps/generated": -457.8179626464844, |
|
"logps/real": -310.9056701660156, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -34.46787643432617, |
|
"rewards/margins": 27.519847869873047, |
|
"rewards/real": -6.948026180267334, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.0893066980023495e-08, |
|
"logits/generated": -1.6025826930999756, |
|
"logits/real": -1.7037376165390015, |
|
"logps/generated": -428.7861328125, |
|
"logps/real": -282.8119201660156, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -32.31279754638672, |
|
"rewards/margins": 25.544296264648438, |
|
"rewards/real": -6.768496513366699, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.971797884841363e-08, |
|
"logits/generated": -1.6418508291244507, |
|
"logits/real": -1.7036247253417969, |
|
"logps/generated": -451.6459045410156, |
|
"logps/real": -293.98883056640625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.84459686279297, |
|
"rewards/margins": 26.4428653717041, |
|
"rewards/real": -7.401734352111816, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.854289071680376e-08, |
|
"logits/generated": -1.5944923162460327, |
|
"logits/real": -1.6075847148895264, |
|
"logps/generated": -410.13671875, |
|
"logps/real": -266.53302001953125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.753042221069336, |
|
"rewards/margins": 23.652307510375977, |
|
"rewards/real": -7.100737571716309, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.736780258519389e-08, |
|
"logits/generated": -1.6141496896743774, |
|
"logits/real": -1.6279274225234985, |
|
"logps/generated": -458.05731201171875, |
|
"logps/real": -282.2770080566406, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -35.077274322509766, |
|
"rewards/margins": 27.443347930908203, |
|
"rewards/real": -7.633930206298828, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.619271445358402e-08, |
|
"logits/generated": -1.6210002899169922, |
|
"logits/real": -1.6764341592788696, |
|
"logps/generated": -460.6913146972656, |
|
"logps/real": -327.74664306640625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.58207321166992, |
|
"rewards/margins": 27.273120880126953, |
|
"rewards/real": -7.3089494705200195, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.501762632197414e-08, |
|
"logits/generated": -1.5527939796447754, |
|
"logits/real": -1.6446359157562256, |
|
"logps/generated": -428.407470703125, |
|
"logps/real": -327.3465576171875, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -32.21887969970703, |
|
"rewards/margins": 25.55868148803711, |
|
"rewards/real": -6.6602020263671875, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.3842538190364275e-08, |
|
"logits/generated": -1.5835459232330322, |
|
"logits/real": -1.6207221746444702, |
|
"logps/generated": -438.98291015625, |
|
"logps/real": -280.99237060546875, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -33.71384048461914, |
|
"rewards/margins": 26.74772071838379, |
|
"rewards/real": -6.966122627258301, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.26674500587544e-08, |
|
"logits/generated": -1.6002051830291748, |
|
"logits/real": -1.6329238414764404, |
|
"logps/generated": -430.971435546875, |
|
"logps/real": -304.34686279296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.37575912475586, |
|
"rewards/margins": 25.36520004272461, |
|
"rewards/real": -7.010560035705566, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.149236192714454e-08, |
|
"logits/generated": -1.6009585857391357, |
|
"logits/real": -1.588710904121399, |
|
"logps/generated": -440.9615783691406, |
|
"logps/real": -266.1169128417969, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.529884338378906, |
|
"rewards/margins": 27.274459838867188, |
|
"rewards/real": -6.255424499511719, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.0317273795534665e-08, |
|
"logits/generated": -1.6368831396102905, |
|
"logits/real": -1.6338386535644531, |
|
"logps/generated": -451.52423095703125, |
|
"logps/real": -254.2528076171875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -35.0713005065918, |
|
"rewards/margins": 27.269176483154297, |
|
"rewards/real": -7.802123069763184, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.9142185663924792e-08, |
|
"logits/generated": -1.5831745862960815, |
|
"logits/real": -1.6589330434799194, |
|
"logps/generated": -476.41705322265625, |
|
"logps/real": -304.2042541503906, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.50579071044922, |
|
"rewards/margins": 28.691112518310547, |
|
"rewards/real": -7.814681053161621, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.7967097532314924e-08, |
|
"logits/generated": -1.5155766010284424, |
|
"logits/real": -1.6084210872650146, |
|
"logps/generated": -444.40985107421875, |
|
"logps/real": -327.21868896484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.392765045166016, |
|
"rewards/margins": 26.478784561157227, |
|
"rewards/real": -6.913978576660156, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.6792009400705055e-08, |
|
"logits/generated": -1.546156883239746, |
|
"logits/real": -1.6635850667953491, |
|
"logps/generated": -455.74920654296875, |
|
"logps/real": -290.7769470214844, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.94561767578125, |
|
"rewards/margins": 27.562381744384766, |
|
"rewards/real": -7.383233070373535, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.561692126909518e-08, |
|
"logits/generated": -1.5961230993270874, |
|
"logits/real": -1.6761224269866943, |
|
"logps/generated": -474.77325439453125, |
|
"logps/real": -320.73260498046875, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -36.5091552734375, |
|
"rewards/margins": 28.40032386779785, |
|
"rewards/real": -8.108833312988281, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.444183313748531e-08, |
|
"logits/generated": -1.5484769344329834, |
|
"logits/real": -1.6109914779663086, |
|
"logps/generated": -466.61102294921875, |
|
"logps/real": -272.64215087890625, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.228668212890625, |
|
"rewards/margins": 28.0214900970459, |
|
"rewards/real": -8.207174301147461, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.326674500587544e-08, |
|
"logits/generated": -1.6316810846328735, |
|
"logits/real": -1.692752480506897, |
|
"logps/generated": -464.38372802734375, |
|
"logps/real": -331.10406494140625, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -34.567840576171875, |
|
"rewards/margins": 26.617473602294922, |
|
"rewards/real": -7.950366973876953, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.209165687426557e-08, |
|
"logits/generated": -1.57527756690979, |
|
"logits/real": -1.682943344116211, |
|
"logps/generated": -459.98919677734375, |
|
"logps/real": -372.90106201171875, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.08959197998047, |
|
"rewards/margins": 26.82863426208496, |
|
"rewards/real": -7.2609543800354, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.09165687426557e-08, |
|
"logits/generated": -1.4877384901046753, |
|
"logits/real": -1.652479887008667, |
|
"logps/generated": -450.6048889160156, |
|
"logps/real": -311.8920593261719, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.74338150024414, |
|
"rewards/margins": 26.67026710510254, |
|
"rewards/real": -7.07311487197876, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.9741480611045828e-08, |
|
"logits/generated": -1.5721471309661865, |
|
"logits/real": -1.7118234634399414, |
|
"logps/generated": -434.28472900390625, |
|
"logps/real": -302.3216247558594, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -32.82756805419922, |
|
"rewards/margins": 25.282840728759766, |
|
"rewards/real": -7.544726371765137, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.8566392479435956e-08, |
|
"logits/generated": -1.6017907857894897, |
|
"logits/real": -1.6692975759506226, |
|
"logps/generated": -445.66387939453125, |
|
"logps/real": -283.04779052734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.74781036376953, |
|
"rewards/margins": 26.4423770904541, |
|
"rewards/real": -7.3054304122924805, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.7391304347826087e-08, |
|
"logits/generated": -1.621559739112854, |
|
"logits/real": -1.7216978073120117, |
|
"logps/generated": -429.00128173828125, |
|
"logps/real": -390.5367431640625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -30.85262680053711, |
|
"rewards/margins": 24.305044174194336, |
|
"rewards/real": -6.547584533691406, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6216216216216218e-08, |
|
"logits/generated": -1.5349498987197876, |
|
"logits/real": -1.6045535802841187, |
|
"logps/generated": -446.84576416015625, |
|
"logps/real": -334.5065002441406, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -34.07716369628906, |
|
"rewards/margins": 26.082788467407227, |
|
"rewards/real": -7.994380950927734, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.5041128084606346e-08, |
|
"logits/generated": -1.5646604299545288, |
|
"logits/real": -1.6417030096054077, |
|
"logps/generated": -453.3228454589844, |
|
"logps/real": -315.86297607421875, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -34.7583122253418, |
|
"rewards/margins": 26.6680965423584, |
|
"rewards/real": -8.090215682983398, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.3866039952996475e-08, |
|
"logits/generated": -1.4989112615585327, |
|
"logits/real": -1.601131796836853, |
|
"logps/generated": -504.98883056640625, |
|
"logps/real": -306.59600830078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -39.9133186340332, |
|
"rewards/margins": 32.207984924316406, |
|
"rewards/real": -7.705336093902588, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2690951821386603e-08, |
|
"logits/generated": -1.5204241275787354, |
|
"logits/real": -1.6375468969345093, |
|
"logps/generated": -443.3485412597656, |
|
"logps/real": -309.1793212890625, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -34.35383987426758, |
|
"rewards/margins": 25.77254295349121, |
|
"rewards/real": -8.581293106079102, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.1515863689776732e-08, |
|
"logits/generated": -1.5714848041534424, |
|
"logits/real": -1.5972023010253906, |
|
"logps/generated": -493.9441833496094, |
|
"logps/real": -307.8035583496094, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -37.91588592529297, |
|
"rewards/margins": 29.837596893310547, |
|
"rewards/real": -8.078287124633789, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0340775558166862e-08, |
|
"logits/generated": -1.4898492097854614, |
|
"logits/real": -1.6078729629516602, |
|
"logps/generated": -496.19024658203125, |
|
"logps/real": -302.10528564453125, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -38.65169143676758, |
|
"rewards/margins": 32.12113952636719, |
|
"rewards/real": -6.530550956726074, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 9.165687426556991e-09, |
|
"logits/generated": -1.4833087921142578, |
|
"logits/real": -1.5793912410736084, |
|
"logps/generated": -436.9119567871094, |
|
"logps/real": -314.6705322265625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.653076171875, |
|
"rewards/margins": 26.469491958618164, |
|
"rewards/real": -7.183583736419678, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.99059929494712e-09, |
|
"logits/generated": -1.524432897567749, |
|
"logits/real": -1.5506082773208618, |
|
"logps/generated": -460.62335205078125, |
|
"logps/real": -286.51434326171875, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -35.37180709838867, |
|
"rewards/margins": 28.64103126525879, |
|
"rewards/real": -6.730777740478516, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.81551116333725e-09, |
|
"logits/generated": -1.577246904373169, |
|
"logits/real": -1.6606992483139038, |
|
"logps/generated": -447.3589782714844, |
|
"logps/real": -306.1556701660156, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.04829025268555, |
|
"rewards/margins": 25.612957000732422, |
|
"rewards/real": -7.435332298278809, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.64042303172738e-09, |
|
"logits/generated": -1.5192750692367554, |
|
"logits/real": -1.6280876398086548, |
|
"logps/generated": -471.4060974121094, |
|
"logps/real": -288.0888977050781, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.808692932128906, |
|
"rewards/margins": 29.04833984375, |
|
"rewards/real": -7.76035213470459, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.465334900117508e-09, |
|
"logits/generated": -1.558406949043274, |
|
"logits/real": -1.6380071640014648, |
|
"logps/generated": -475.8984375, |
|
"logps/real": -323.078369140625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -35.528656005859375, |
|
"rewards/margins": 27.93899154663086, |
|
"rewards/real": -7.589668273925781, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.2902467685076377e-09, |
|
"logits/generated": -1.557838797569275, |
|
"logits/real": -1.595538854598999, |
|
"logps/generated": -477.4278259277344, |
|
"logps/real": -297.52996826171875, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -37.0074462890625, |
|
"rewards/margins": 28.823505401611328, |
|
"rewards/real": -8.183939933776855, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.115158636897767e-09, |
|
"logits/generated": -1.5562093257904053, |
|
"logits/real": -1.6695356369018555, |
|
"logps/generated": -472.9737243652344, |
|
"logps/real": -330.8188171386719, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -36.18653869628906, |
|
"rewards/margins": 28.221332550048828, |
|
"rewards/real": -7.965203285217285, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 9.400705052878966e-10, |
|
"logits/generated": -1.5382697582244873, |
|
"logits/real": -1.631152868270874, |
|
"logps/generated": -434.56109619140625, |
|
"logps/real": -307.39227294921875, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -33.543827056884766, |
|
"rewards/margins": 25.150760650634766, |
|
"rewards/real": -8.393064498901367, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 4728, |
|
"total_flos": 0.0, |
|
"train_loss": 0.022571272342312175, |
|
"train_runtime": 37239.7337, |
|
"train_samples_per_second": 4.061, |
|
"train_steps_per_second": 0.127 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4728, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|