{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.994962216624685, "eval_steps": 50, "global_step": 528, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.003778337531486146, "grad_norm": 49.44846097246243, "learning_rate": 1.8867924528301883e-09, "logits": -1.1757584810256958, "logps": -94.8752670288086, "loss": 1.0913, "objective": 1.048144817352295, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.4375, "regularize": 0.3549971580505371, "step": 1 }, { "dpo_loss": 0.6931453347206116, "epoch": 0.018891687657430732, "grad_norm": 56.77495215138669, "learning_rate": 9.433962264150943e-09, "logits": -1.3608341217041016, "logps": -92.11934661865234, "loss": 1.1069, "objective": 1.06926691532135, "ranking_idealized": 0.5729166865348816, "ranking_idealized_expo": 0.4895833432674408, "ranking_simple": 0.4895833432674408, "regularize": 0.37612107396125793, "step": 5 }, { "dpo_loss": 0.693118155002594, "epoch": 0.037783375314861464, "grad_norm": 50.79305715348446, "learning_rate": 1.8867924528301887e-08, "logits": -1.3560354709625244, "logps": -90.93302917480469, "loss": 1.1118, "objective": 1.1139355897903442, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.4208172559738159, "step": 10 }, { "dpo_loss": 0.6927705407142639, "epoch": 0.05667506297229219, "grad_norm": 48.78735409468553, "learning_rate": 2.830188679245283e-08, "logits": -1.3467658758163452, "logps": -92.52275848388672, "loss": 1.1086, "objective": 1.1240794658660889, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5291666388511658, "regularize": 0.4313090443611145, "step": 15 }, { "dpo_loss": 0.6927476525306702, "epoch": 0.07556675062972293, "grad_norm": 48.89099638416194, "learning_rate": 3.7735849056603774e-08, "logits": -1.286866545677185, "logps": -91.98667907714844, "loss": 1.1147, "objective": 1.127905011177063, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.5583333373069763, "regularize": 0.43515732884407043, "step": 20 }, { "dpo_loss": 0.6929070353507996, "epoch": 0.09445843828715365, "grad_norm": 51.44180006710924, "learning_rate": 4.7169811320754715e-08, "logits": -1.3749586343765259, "logps": -92.29708862304688, "loss": 1.1069, "objective": 1.1106172800064087, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5458333492279053, "regularize": 0.4177102744579315, "step": 25 }, { "dpo_loss": 0.6915452480316162, "epoch": 0.11335012594458438, "grad_norm": 52.52518235565533, "learning_rate": 5.660377358490566e-08, "logits": -1.4094866514205933, "logps": -92.03213500976562, "loss": 1.1138, "objective": 1.1041828393936157, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 0.4126375913619995, "step": 30 }, { "dpo_loss": 0.6903693675994873, "epoch": 0.13224181360201512, "grad_norm": 55.88065446639471, "learning_rate": 6.603773584905659e-08, "logits": -1.3921390771865845, "logps": -93.27505493164062, "loss": 1.0984, "objective": 1.1129175424575806, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5083333253860474, "regularize": 0.4225481450557709, "step": 35 }, { "dpo_loss": 0.6901296377182007, "epoch": 0.15113350125944586, "grad_norm": 48.11830818999703, "learning_rate": 7.547169811320755e-08, "logits": -1.4104276895523071, "logps": -92.22798156738281, "loss": 1.1021, "objective": 1.083837628364563, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5708333253860474, "regularize": 0.39370766282081604, "step": 40 }, { "dpo_loss": 0.6862795352935791, "epoch": 0.17002518891687657, "grad_norm": 45.66068468167867, "learning_rate": 8.490566037735849e-08, "logits": -1.3663307428359985, "logps": -91.8579330444336, "loss": 1.1032, "objective": 1.1153314113616943, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5083333253860474, "regularize": 0.4290517270565033, "step": 45 }, { "dpo_loss": 0.6879932284355164, "epoch": 0.1889168765743073, "grad_norm": 51.12091223625438, "learning_rate": 9.433962264150943e-08, "logits": -1.3217811584472656, "logps": -90.97481536865234, "loss": 1.0911, "objective": 1.0859743356704712, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.3979811370372772, "step": 50 }, { "epoch": 0.1889168765743073, "eval_dpo_loss": 0.690547525882721, "eval_logits": -1.3092193603515625, "eval_logps": -97.7940902709961, "eval_loss": 1.104289174079895, "eval_objective": 1.1041598320007324, "eval_ranking_idealized": 0.5925414562225342, "eval_ranking_idealized_expo": 0.5165745615959167, "eval_ranking_simple": 0.5165745615959167, "eval_regularize": 0.4136121869087219, "eval_runtime": 235.6732, "eval_samples_per_second": 24.568, "eval_steps_per_second": 1.536, "step": 50 }, { "dpo_loss": 0.6884905695915222, "epoch": 0.20780856423173805, "grad_norm": 54.01101403233421, "learning_rate": 9.99956257238817e-08, "logits": -1.4181103706359863, "logps": -89.9561538696289, "loss": 1.1009, "objective": 1.094197154045105, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5291666388511658, "regularize": 0.40570658445358276, "step": 55 }, { "dpo_loss": 0.6880350708961487, "epoch": 0.22670025188916876, "grad_norm": 54.996673931606395, "learning_rate": 9.994642390694308e-08, "logits": -1.3805103302001953, "logps": -90.7548828125, "loss": 1.1093, "objective": 1.114397406578064, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5041666626930237, "regularize": 0.4263623058795929, "step": 60 }, { "dpo_loss": 0.6825106143951416, "epoch": 0.2455919395465995, "grad_norm": 56.45685371856582, "learning_rate": 9.98426064087682e-08, "logits": -1.4092483520507812, "logps": -92.43305969238281, "loss": 1.1012, "objective": 1.104454755783081, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5208333134651184, "regularize": 0.4219440817832947, "step": 65 }, { "dpo_loss": 0.6836772561073303, "epoch": 0.26448362720403024, "grad_norm": 55.79791061619149, "learning_rate": 9.968428675226714e-08, "logits": -1.372382640838623, "logps": -92.17337036132812, "loss": 1.1123, "objective": 1.1599425077438354, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.476265013217926, "step": 70 }, { "dpo_loss": 0.6837269067764282, "epoch": 0.28337531486146095, "grad_norm": 47.47237853468278, "learning_rate": 9.947163805765979e-08, "logits": -1.3373608589172363, "logps": -90.7457275390625, "loss": 1.0793, "objective": 1.0441726446151733, "ranking_idealized": 0.5083333253860474, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.4333333373069763, "regularize": 0.3604455888271332, "step": 75 }, { "dpo_loss": 0.6796848773956299, "epoch": 0.3022670025188917, "grad_norm": 47.94506148186036, "learning_rate": 9.920489285317169e-08, "logits": -1.4042983055114746, "logps": -91.38846588134766, "loss": 1.0778, "objective": 1.0585055351257324, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5041666626930237, "regularize": 0.3788203001022339, "step": 80 }, { "dpo_loss": 0.6791465282440186, "epoch": 0.3211586901763224, "grad_norm": 47.95858131470083, "learning_rate": 9.888434282076758e-08, "logits": -1.3911387920379639, "logps": -92.44884490966797, "loss": 1.0925, "objective": 1.0910677909851074, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.4119212031364441, "step": 85 }, { "dpo_loss": 0.6837694644927979, "epoch": 0.34005037783375314, "grad_norm": 55.1730704885422, "learning_rate": 9.851033847720166e-08, "logits": -1.3752093315124512, "logps": -91.67774200439453, "loss": 1.0847, "objective": 1.085326075553894, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.512499988079071, "regularize": 0.4015560448169708, "step": 90 }, { "dpo_loss": 0.6765946745872498, "epoch": 0.3589420654911839, "grad_norm": 48.33215476425452, "learning_rate": 9.808328879073251e-08, "logits": -1.3342726230621338, "logps": -92.56273651123047, "loss": 1.0612, "objective": 1.060593605041504, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5208333134651184, "regularize": 0.3839987814426422, "step": 95 }, { "dpo_loss": 0.6816121935844421, "epoch": 0.3778337531486146, "grad_norm": 49.179351832428715, "learning_rate": 9.760366073392245e-08, "logits": -1.3950403928756714, "logps": -92.40387725830078, "loss": 1.0629, "objective": 1.0402370691299438, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5041666626930237, "regularize": 0.35862475633621216, "step": 100 }, { "epoch": 0.3778337531486146, "eval_dpo_loss": 0.6871094703674316, "eval_logits": -1.3221344947814941, "eval_logps": -98.14198303222656, "eval_loss": 1.093752384185791, "eval_objective": 1.0948374271392822, "eval_ranking_idealized": 0.5925414562225342, "eval_ranking_idealized_expo": 0.5165745615959167, "eval_ranking_simple": 0.5179557800292969, "eval_regularize": 0.4077278971672058, "eval_runtime": 235.7453, "eval_samples_per_second": 24.56, "eval_steps_per_second": 1.536, "step": 100 }, { "dpo_loss": 0.6785728931427002, "epoch": 0.3967254408060453, "grad_norm": 51.47899227122413, "learning_rate": 9.707197877300973e-08, "logits": -1.4077008962631226, "logps": -91.70052337646484, "loss": 1.0696, "objective": 1.086281180381775, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5041666626930237, "regularize": 0.407708078622818, "step": 105 }, { "dpo_loss": 0.6762667298316956, "epoch": 0.4156171284634761, "grad_norm": 50.28143364432592, "learning_rate": 9.648882429441257e-08, "logits": -1.3699510097503662, "logps": -93.08635711669922, "loss": 1.0782, "objective": 1.1022285223007202, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5708333253860474, "regularize": 0.4259616434574127, "step": 110 }, { "dpo_loss": 0.6772736310958862, "epoch": 0.4345088161209068, "grad_norm": 49.35211256173949, "learning_rate": 9.585483496899149e-08, "logits": -1.4434921741485596, "logps": -91.60735321044922, "loss": 1.049, "objective": 1.0325186252593994, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4958333373069763, "regularize": 0.3552449345588684, "step": 115 }, { "dpo_loss": 0.67376708984375, "epoch": 0.4534005037783375, "grad_norm": 49.769392715448596, "learning_rate": 9.517070405476573e-08, "logits": -1.3977102041244507, "logps": -91.86192321777344, "loss": 1.0722, "objective": 1.0750477313995361, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5249999761581421, "regularize": 0.40128055214881897, "step": 120 }, { "dpo_loss": 0.6729015111923218, "epoch": 0.4722921914357683, "grad_norm": 48.20763298222792, "learning_rate": 9.443717963884567e-08, "logits": -1.4215410947799683, "logps": -91.61782836914062, "loss": 1.0433, "objective": 1.0424840450286865, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5625, "regularize": 0.36958202719688416, "step": 125 }, { "dpo_loss": 0.6743431687355042, "epoch": 0.491183879093199, "grad_norm": 48.45479288893814, "learning_rate": 9.365506381941065e-08, "logits": -1.3215110301971436, "logps": -92.12106323242188, "loss": 1.0515, "objective": 1.097717046737671, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.5625, "regularize": 0.4233737289905548, "step": 130 }, { "dpo_loss": 0.6695905327796936, "epoch": 0.5100755667506297, "grad_norm": 43.646349799244106, "learning_rate": 9.282521182862628e-08, "logits": -1.3272545337677002, "logps": -92.60189056396484, "loss": 1.057, "objective": 1.0827622413635254, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5166666507720947, "regularize": 0.41317158937454224, "step": 135 }, { "dpo_loss": 0.6709803342819214, "epoch": 0.5289672544080605, "grad_norm": 47.3960543467576, "learning_rate": 9.194853109746073e-08, "logits": -1.4292913675308228, "logps": -92.06829071044922, "loss": 1.0462, "objective": 1.046062707901001, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5, "regularize": 0.3750823736190796, "step": 140 }, { "dpo_loss": 0.6690364480018616, "epoch": 0.5478589420654912, "grad_norm": 51.65656404072737, "learning_rate": 9.102598026342222e-08, "logits": -1.4306373596191406, "logps": -91.35098266601562, "loss": 1.0533, "objective": 1.0885671377182007, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5625, "regularize": 0.41953060030937195, "step": 145 }, { "dpo_loss": 0.6675416231155396, "epoch": 0.5667506297229219, "grad_norm": 47.60377632937882, "learning_rate": 9.005856812230304e-08, "logits": -1.4119346141815186, "logps": -90.73808288574219, "loss": 1.0597, "objective": 1.0471808910369873, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5375000238418579, "regularize": 0.379639208316803, "step": 150 }, { "epoch": 0.5667506297229219, "eval_dpo_loss": 0.6842523813247681, "eval_logits": -1.3387939929962158, "eval_logps": -97.57598114013672, "eval_loss": 1.0886038541793823, "eval_objective": 1.089935064315796, "eval_ranking_idealized": 0.5925414562225342, "eval_ranking_idealized_expo": 0.5165745615959167, "eval_ranking_simple": 0.519336998462677, "eval_regularize": 0.4056825637817383, "eval_runtime": 236.1326, "eval_samples_per_second": 24.52, "eval_steps_per_second": 1.533, "step": 150 }, { "dpo_loss": 0.6675500273704529, "epoch": 0.5856423173803527, "grad_norm": 48.130868373709134, "learning_rate": 8.904735252507609e-08, "logits": -1.3894909620285034, "logps": -91.1141128540039, "loss": 1.0454, "objective": 1.0189038515090942, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5541666746139526, "regularize": 0.3513537645339966, "step": 155 }, { "dpo_loss": 0.6642627716064453, "epoch": 0.6045340050377834, "grad_norm": 48.76010002249353, "learning_rate": 8.799343922115043e-08, "logits": -1.3890469074249268, "logps": -91.98741149902344, "loss": 1.0636, "objective": 1.0629956722259521, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5166666507720947, "regularize": 0.39873284101486206, "step": 160 }, { "dpo_loss": 0.6647918224334717, "epoch": 0.6234256926952141, "grad_norm": 51.709247719469836, "learning_rate": 8.689798064925048e-08, "logits": -1.430177927017212, "logps": -92.05915832519531, "loss": 1.0499, "objective": 1.05256187915802, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4791666567325592, "regularize": 0.3877698481082916, "step": 165 }, { "dpo_loss": 0.6606873273849487, "epoch": 0.6423173803526449, "grad_norm": 46.459328825125596, "learning_rate": 8.576217467724127e-08, "logits": -1.3984259366989136, "logps": -91.71315002441406, "loss": 1.029, "objective": 1.0234616994857788, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.42500001192092896, "ranking_simple": 0.44583332538604736, "regularize": 0.36277416348457336, "step": 170 }, { "dpo_loss": 0.6623333096504211, "epoch": 0.6612090680100756, "grad_norm": 51.1586426932559, "learning_rate": 8.458726329227747e-08, "logits": -1.4344308376312256, "logps": -92.77267456054688, "loss": 1.0316, "objective": 1.0282338857650757, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.36590057611465454, "step": 175 }, { "dpo_loss": 0.6674898862838745, "epoch": 0.6801007556675063, "grad_norm": 48.61836865824933, "learning_rate": 8.337453124270862e-08, "logits": -1.3953392505645752, "logps": -90.3520736694336, "loss": 1.0387, "objective": 1.0503102540969849, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.512499988079071, "regularize": 0.3828202784061432, "step": 180 }, { "dpo_loss": 0.6639277935028076, "epoch": 0.698992443324937, "grad_norm": 56.3868937423093, "learning_rate": 8.212530463322582e-08, "logits": -1.376102089881897, "logps": -90.30011749267578, "loss": 1.0192, "objective": 1.0141814947128296, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5791666507720947, "regularize": 0.3502536714076996, "step": 185 }, { "dpo_loss": 0.6700864434242249, "epoch": 0.7178841309823678, "grad_norm": 51.6918769313309, "learning_rate": 8.084094947478554e-08, "logits": -1.4650901556015015, "logps": -92.19945526123047, "loss": 1.0536, "objective": 1.0666062831878662, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5, "regularize": 0.3965199291706085, "step": 190 }, { "dpo_loss": 0.6677178740501404, "epoch": 0.7367758186397985, "grad_norm": 51.07369943909954, "learning_rate": 7.952287019089686e-08, "logits": -1.384783387184143, "logps": -92.98999786376953, "loss": 1.0332, "objective": 1.0433076620101929, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5416666865348816, "regularize": 0.3755895793437958, "step": 195 }, { "dpo_loss": 0.6689361333847046, "epoch": 0.7556675062972292, "grad_norm": 50.59449995506128, "learning_rate": 7.817250808190483e-08, "logits": -1.4448987245559692, "logps": -92.8608169555664, "loss": 1.0201, "objective": 0.9872502684593201, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.31831392645835876, "step": 200 }, { "epoch": 0.7556675062972292, "eval_dpo_loss": 0.6845039129257202, "eval_logits": -1.3518892526626587, "eval_logps": -98.55247497558594, "eval_loss": 1.0877532958984375, "eval_objective": 1.0928053855895996, "eval_ranking_idealized": 0.5925414562225342, "eval_ranking_idealized_expo": 0.5165745615959167, "eval_ranking_simple": 0.519336998462677, "eval_regularize": 0.40830138325691223, "eval_runtime": 236.1466, "eval_samples_per_second": 24.519, "eval_steps_per_second": 1.533, "step": 200 }, { "dpo_loss": 0.6665075421333313, "epoch": 0.77455919395466, "grad_norm": 44.9309987154925, "learning_rate": 7.679133974894983e-08, "logits": -1.377119779586792, "logps": -91.88671112060547, "loss": 1.0103, "objective": 1.0105198621749878, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.49166667461395264, "regularize": 0.34401220083236694, "step": 205 }, { "dpo_loss": 0.6583543419837952, "epoch": 0.7934508816120907, "grad_norm": 51.30313684013195, "learning_rate": 7.538087547932585e-08, "logits": -1.3690478801727295, "logps": -93.26469421386719, "loss": 1.0206, "objective": 1.019605040550232, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5166666507720947, "regularize": 0.3612505793571472, "step": 210 }, { "dpo_loss": 0.6641567349433899, "epoch": 0.8123425692695214, "grad_norm": 50.968568724158025, "learning_rate": 7.394265759500347e-08, "logits": -1.3541967868804932, "logps": -92.59414672851562, "loss": 1.0263, "objective": 1.0428659915924072, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.3787091076374054, "step": 215 }, { "dpo_loss": 0.6569988131523132, "epoch": 0.8312342569269522, "grad_norm": 53.082634877219036, "learning_rate": 7.247825876612352e-08, "logits": -1.4405725002288818, "logps": -93.02523803710938, "loss": 1.0183, "objective": 1.0499581098556519, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5458333492279053, "regularize": 0.3929591476917267, "step": 220 }, { "dpo_loss": 0.6647858023643494, "epoch": 0.8501259445843828, "grad_norm": 51.335148967482006, "learning_rate": 7.098928029130528e-08, "logits": -1.4285895824432373, "logps": -92.1469955444336, "loss": 1.0123, "objective": 1.034002423286438, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5416666865348816, "regularize": 0.3692165017127991, "step": 225 }, { "dpo_loss": 0.6575040221214294, "epoch": 0.8690176322418136, "grad_norm": 48.651529158644536, "learning_rate": 6.947735034665001e-08, "logits": -1.4194341897964478, "logps": -92.3643798828125, "loss": 1.0129, "objective": 1.019044041633606, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5791666507720947, "regularize": 0.3615398705005646, "step": 230 }, { "dpo_loss": 0.6606828570365906, "epoch": 0.8879093198992444, "grad_norm": 48.73605551669952, "learning_rate": 6.794412220535425e-08, "logits": -1.4252243041992188, "logps": -91.66600799560547, "loss": 1.0227, "objective": 1.0363646745681763, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5958333611488342, "regularize": 0.375681608915329, "step": 235 }, { "dpo_loss": 0.661056399345398, "epoch": 0.906801007556675, "grad_norm": 48.248478909206035, "learning_rate": 6.639127242987987e-08, "logits": -1.4544317722320557, "logps": -92.43217468261719, "loss": 1.0019, "objective": 1.0119422674179077, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5083333253860474, "regularize": 0.3508855998516083, "step": 240 }, { "dpo_loss": 0.6545276045799255, "epoch": 0.9256926952141058, "grad_norm": 55.62927449157849, "learning_rate": 6.482049903865769e-08, "logits": -1.3181027173995972, "logps": -92.43726348876953, "loss": 1.0099, "objective": 0.9958193302154541, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5208333134651184, "regularize": 0.341291606426239, "step": 245 }, { "dpo_loss": 0.6624430418014526, "epoch": 0.9445843828715366, "grad_norm": 45.52279742153591, "learning_rate": 6.323351964932908e-08, "logits": -1.363103985786438, "logps": -91.6707992553711, "loss": 1.0173, "objective": 1.0481780767440796, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5416666865348816, "regularize": 0.3857349753379822, "step": 250 }, { "epoch": 0.9445843828715366, "eval_dpo_loss": 0.6845124959945679, "eval_logits": -1.3585822582244873, "eval_logps": -99.33744812011719, "eval_loss": 1.0862815380096436, "eval_objective": 1.0919297933578491, "eval_ranking_idealized": 0.5925414562225342, "eval_ranking_idealized_expo": 0.5165745615959167, "eval_ranking_simple": 0.519336998462677, "eval_regularize": 0.40741726756095886, "eval_runtime": 235.789, "eval_samples_per_second": 24.556, "eval_steps_per_second": 1.535, "step": 250 }, { "dpo_loss": 0.6532657742500305, "epoch": 0.9634760705289672, "grad_norm": 51.00987164026873, "learning_rate": 6.163206960055652e-08, "logits": -1.460981011390686, "logps": -94.54005432128906, "loss": 1.0046, "objective": 1.0130220651626587, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5333333611488342, "regularize": 0.359756201505661, "step": 255 }, { "dpo_loss": 0.6577326059341431, "epoch": 0.982367758186398, "grad_norm": 50.576016219867064, "learning_rate": 6.001790005445606e-08, "logits": -1.362568974494934, "logps": -93.19829559326172, "loss": 1.01, "objective": 0.9904889464378357, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5625, "regularize": 0.33275625109672546, "step": 260 }, { "dpo_loss": 0.6527519226074219, "epoch": 1.0012594458438286, "grad_norm": 50.78293400345161, "learning_rate": 5.839277608172738e-08, "logits": -1.3711644411087036, "logps": -93.6723403930664, "loss": 1.0071, "objective": 1.026816487312317, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5333333611488342, "regularize": 0.3740644156932831, "step": 265 }, { "dpo_loss": 0.6553524136543274, "epoch": 1.0201511335012594, "grad_norm": 52.08673841750398, "learning_rate": 5.675847473157485e-08, "logits": -1.4852278232574463, "logps": -94.36783599853516, "loss": 0.9818, "objective": 0.9954887628555298, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5416666865348816, "regularize": 0.34013625979423523, "step": 270 }, { "dpo_loss": 0.6415101289749146, "epoch": 1.0390428211586902, "grad_norm": 44.617567180749866, "learning_rate": 5.511678308853025e-08, "logits": -1.4626718759536743, "logps": -93.63671112060547, "loss": 0.9878, "objective": 0.9779664278030396, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5333333611488342, "regularize": 0.33645620942115784, "step": 275 }, { "dpo_loss": 0.6500685811042786, "epoch": 1.057934508816121, "grad_norm": 47.25187912639425, "learning_rate": 5.3469496318302197e-08, "logits": -1.40652334690094, "logps": -92.5146255493164, "loss": 0.9659, "objective": 0.9707435369491577, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5208333134651184, "regularize": 0.3206748366355896, "step": 280 }, { "dpo_loss": 0.6382409930229187, "epoch": 1.0768261964735517, "grad_norm": 47.30764627956772, "learning_rate": 5.1818415704788724e-08, "logits": -1.4587684869766235, "logps": -94.23441314697266, "loss": 0.964, "objective": 0.9715672135353088, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.6083333492279053, "regularize": 0.3333263099193573, "step": 285 }, { "dpo_loss": 0.6455419063568115, "epoch": 1.0957178841309823, "grad_norm": 45.41530383871643, "learning_rate": 5.016534668039975e-08, "logits": -1.3359150886535645, "logps": -91.27457427978516, "loss": 0.9783, "objective": 0.9912244081497192, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5625, "regularize": 0.3456825315952301, "step": 290 }, { "dpo_loss": 0.6454907655715942, "epoch": 1.114609571788413, "grad_norm": 52.660264254351745, "learning_rate": 4.8512096851843375e-08, "logits": -1.414397120475769, "logps": -92.17057037353516, "loss": 0.9657, "objective": 0.964141309261322, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5708333253860474, "regularize": 0.31865036487579346, "step": 295 }, { "dpo_loss": 0.6478288173675537, "epoch": 1.1335012594458438, "grad_norm": 55.18319202572926, "learning_rate": 4.686047402353433e-08, "logits": -1.482143521308899, "logps": -93.26191711425781, "loss": 0.9755, "objective": 0.9665765762329102, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5583333373069763, "regularize": 0.31874755024909973, "step": 300 }, { "epoch": 1.1335012594458438, "eval_dpo_loss": 0.6826499700546265, "eval_logits": -1.3663146495819092, "eval_logps": -99.1143798828125, "eval_loss": 1.0829436779022217, "eval_objective": 1.08933687210083, "eval_ranking_idealized": 0.5925414562225342, "eval_ranking_idealized_expo": 0.5165745615959167, "eval_ranking_simple": 0.5214088559150696, "eval_regularize": 0.40668678283691406, "eval_runtime": 235.9585, "eval_samples_per_second": 24.538, "eval_steps_per_second": 1.534, "step": 300 }, { "dpo_loss": 0.6493499875068665, "epoch": 1.1523929471032746, "grad_norm": 45.82300619637504, "learning_rate": 4.521228422078649e-08, "logits": -1.386905550956726, "logps": -92.7632064819336, "loss": 0.9773, "objective": 0.9832318425178528, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5375000238418579, "regularize": 0.33388179540634155, "step": 305 }, { "dpo_loss": 0.643381655216217, "epoch": 1.1712846347607053, "grad_norm": 43.431490164469025, "learning_rate": 4.3569329714950706e-08, "logits": -1.4345782995224, "logps": -92.81407928466797, "loss": 0.9734, "objective": 0.9835326671600342, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.574999988079071, "regularize": 0.3401508927345276, "step": 310 }, { "dpo_loss": 0.6410504579544067, "epoch": 1.190176322418136, "grad_norm": 47.92691613928351, "learning_rate": 4.1933407052657454e-08, "logits": -1.406593918800354, "logps": -94.20066833496094, "loss": 0.9734, "objective": 0.9850514531135559, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4791666567325592, "regularize": 0.3440004587173462, "step": 315 }, { "dpo_loss": 0.6493880748748779, "epoch": 1.2090680100755669, "grad_norm": 49.39982876479276, "learning_rate": 4.030630509131959e-08, "logits": -1.4532923698425293, "logps": -92.71504974365234, "loss": 0.9679, "objective": 0.961419939994812, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5375000238418579, "regularize": 0.31203168630599976, "step": 320 }, { "dpo_loss": 0.6389760971069336, "epoch": 1.2279596977329974, "grad_norm": 50.23647074169398, "learning_rate": 3.8689803043042996e-08, "logits": -1.3649462461471558, "logps": -94.10885620117188, "loss": 0.9416, "objective": 0.9290289282798767, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5625, "regularize": 0.29005277156829834, "step": 325 }, { "dpo_loss": 0.6403206586837769, "epoch": 1.2468513853904282, "grad_norm": 48.23351344804798, "learning_rate": 3.708566852908418e-08, "logits": -1.4762166738510132, "logps": -92.44864654541016, "loss": 0.9562, "objective": 0.942139744758606, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5416666865348816, "regularize": 0.30181899666786194, "step": 330 }, { "dpo_loss": 0.6417604088783264, "epoch": 1.265743073047859, "grad_norm": 54.445785967306286, "learning_rate": 3.54956556469825e-08, "logits": -1.446532130241394, "logps": -92.52754974365234, "loss": 0.9746, "objective": 0.9908974766731262, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5041666626930237, "regularize": 0.3491368889808655, "step": 335 }, { "dpo_loss": 0.6481935977935791, "epoch": 1.2846347607052897, "grad_norm": 43.30323866491239, "learning_rate": 3.392150305248024e-08, "logits": -1.416210651397705, "logps": -92.2144775390625, "loss": 0.9605, "objective": 0.9179424047470093, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5249999761581421, "regularize": 0.2697486877441406, "step": 340 }, { "dpo_loss": 0.6473791599273682, "epoch": 1.3035264483627205, "grad_norm": 46.63436114142443, "learning_rate": 3.236493205832794e-08, "logits": -1.4570471048355103, "logps": -93.01604461669922, "loss": 0.9614, "objective": 0.9653963446617126, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4958333373069763, "regularize": 0.3180171847343445, "step": 345 }, { "dpo_loss": 0.6448249816894531, "epoch": 1.322418136020151, "grad_norm": 48.68063353112031, "learning_rate": 3.082764475205442e-08, "logits": -1.4262375831604004, "logps": -92.78704071044922, "loss": 0.9708, "objective": 0.9741830229759216, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.550000011920929, "regularize": 0.32935774326324463, "step": 350 }, { "epoch": 1.322418136020151, "eval_dpo_loss": 0.6832794547080994, "eval_logits": -1.3642104864120483, "eval_logps": -99.26692962646484, "eval_loss": 1.082851767539978, "eval_objective": 1.0900707244873047, "eval_ranking_idealized": 0.5925414562225342, "eval_ranking_idealized_expo": 0.5165745615959167, "eval_ranking_simple": 0.5220994353294373, "eval_regularize": 0.4067910611629486, "eval_runtime": 235.8313, "eval_samples_per_second": 24.551, "eval_steps_per_second": 1.535, "step": 350 }, { "dpo_loss": 0.6446579098701477, "epoch": 1.341309823677582, "grad_norm": 44.218673755410634, "learning_rate": 2.9311322134758836e-08, "logits": -1.4359955787658691, "logps": -91.50603485107422, "loss": 0.9673, "objective": 0.991865873336792, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5416666865348816, "regularize": 0.3472079634666443, "step": 355 }, { "dpo_loss": 0.6347137689590454, "epoch": 1.3602015113350125, "grad_norm": 46.29947549071951, "learning_rate": 2.7817622282960812e-08, "logits": -1.430161714553833, "logps": -94.7589340209961, "loss": 0.9565, "objective": 0.9400501251220703, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5291666388511658, "regularize": 0.3053361773490906, "step": 360 }, { "dpo_loss": 0.6405820250511169, "epoch": 1.3790931989924433, "grad_norm": 47.287766896204, "learning_rate": 2.6348178535517962e-08, "logits": -1.4314912557601929, "logps": -93.3971939086914, "loss": 0.9446, "objective": 0.9586126208305359, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.550000011920929, "regularize": 0.31803005933761597, "step": 365 }, { "dpo_loss": 0.6399816274642944, "epoch": 1.397984886649874, "grad_norm": 47.51527080377652, "learning_rate": 2.4904597707593977e-08, "logits": -1.4040166139602661, "logps": -93.57247924804688, "loss": 0.9651, "objective": 0.9659475088119507, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5208333134651184, "regularize": 0.32596567273139954, "step": 370 }, { "dpo_loss": 0.6420871615409851, "epoch": 1.4168765743073048, "grad_norm": 45.177183298812, "learning_rate": 2.3488458333629773e-08, "logits": -1.362334966659546, "logps": -94.35677337646484, "loss": 0.9574, "objective": 0.9662178754806519, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5166666507720947, "regularize": 0.3241305351257324, "step": 375 }, { "dpo_loss": 0.6476179957389832, "epoch": 1.4357682619647356, "grad_norm": 42.51629065900929, "learning_rate": 2.21013089412392e-08, "logits": -1.4462828636169434, "logps": -93.31849670410156, "loss": 0.9686, "objective": 0.9625572562217712, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5791666507720947, "ranking_simple": 0.6000000238418579, "regularize": 0.31493937969207764, "step": 380 }, { "dpo_loss": 0.6483107209205627, "epoch": 1.4546599496221662, "grad_norm": 50.494940681761314, "learning_rate": 2.0744666357916923e-08, "logits": -1.3696460723876953, "logps": -93.16974639892578, "loss": 0.9674, "objective": 0.9437763690948486, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4791666567325592, "regularize": 0.2954654395580292, "step": 385 }, { "dpo_loss": 0.6387519836425781, "epoch": 1.473551637279597, "grad_norm": 48.0772224935248, "learning_rate": 1.942001405240979e-08, "logits": -1.4375274181365967, "logps": -93.4893798828125, "loss": 0.951, "objective": 0.9498422145843506, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5666666626930237, "regularize": 0.3110901117324829, "step": 390 }, { "dpo_loss": 0.6425871253013611, "epoch": 1.4924433249370277, "grad_norm": 50.00168420726503, "learning_rate": 1.8128800512565513e-08, "logits": -1.4277968406677246, "logps": -92.17271423339844, "loss": 0.9714, "objective": 0.9966804385185242, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5041666626930237, "regularize": 0.3540932834148407, "step": 395 }, { "dpo_loss": 0.6415828466415405, "epoch": 1.5113350125944585, "grad_norm": 47.59483679412131, "learning_rate": 1.6872437661432515e-08, "logits": -1.4341318607330322, "logps": -94.33455657958984, "loss": 0.968, "objective": 0.9913274049758911, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5458333492279053, "regularize": 0.3497445285320282, "step": 400 }, { "epoch": 1.5113350125944585, "eval_dpo_loss": 0.6832301616668701, "eval_logits": -1.3681507110595703, "eval_logps": -99.251953125, "eval_loss": 1.0828720331192017, "eval_objective": 1.090120792388916, "eval_ranking_idealized": 0.5925414562225342, "eval_ranking_idealized_expo": 0.5165745615959167, "eval_ranking_simple": 0.5214088559150696, "eval_regularize": 0.4068906307220459, "eval_runtime": 235.2584, "eval_samples_per_second": 24.611, "eval_steps_per_second": 1.539, "step": 400 }, { "dpo_loss": 0.645163357257843, "epoch": 1.5302267002518892, "grad_norm": 47.34605640737913, "learning_rate": 1.5652299313342772e-08, "logits": -1.4498835802078247, "logps": -94.09959411621094, "loss": 0.9622, "objective": 0.9742183089256287, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5166666507720947, "regularize": 0.3290548026561737, "step": 405 }, { "dpo_loss": 0.6364944577217102, "epoch": 1.5491183879093198, "grad_norm": 49.26429643097485, "learning_rate": 1.4469719671666041e-08, "logits": -1.3392701148986816, "logps": -93.40227508544922, "loss": 0.9596, "objective": 0.970222532749176, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5625, "regularize": 0.33372804522514343, "step": 410 }, { "dpo_loss": 0.6261176466941833, "epoch": 1.5680100755667508, "grad_norm": 47.529393740735564, "learning_rate": 1.3325991869878012e-08, "logits": -1.398348093032837, "logps": -93.3853530883789, "loss": 0.9724, "objective": 0.9390360116958618, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.4583333432674408, "ranking_simple": 0.48750001192092896, "regularize": 0.3129182457923889, "step": 415 }, { "dpo_loss": 0.6377049088478088, "epoch": 1.5869017632241813, "grad_norm": 57.43753774126503, "learning_rate": 1.222236655753791e-08, "logits": -1.4465179443359375, "logps": -92.30918884277344, "loss": 0.9625, "objective": 0.9549995064735413, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5166666507720947, "regularize": 0.31729447841644287, "step": 420 }, { "dpo_loss": 0.6409914493560791, "epoch": 1.605793450881612, "grad_norm": 48.69171129272466, "learning_rate": 1.1160050532721526e-08, "logits": -1.4412765502929688, "logps": -92.86591339111328, "loss": 0.9543, "objective": 0.9196628928184509, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.48750001192092896, "regularize": 0.27867138385772705, "step": 425 }, { "dpo_loss": 0.6387853622436523, "epoch": 1.6246851385390428, "grad_norm": 45.275230591779184, "learning_rate": 1.0140205422405212e-08, "logits": -1.4535937309265137, "logps": -92.8780517578125, "loss": 0.9509, "objective": 0.9649083018302917, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5791666507720947, "regularize": 0.3261227011680603, "step": 430 }, { "dpo_loss": 0.6288203001022339, "epoch": 1.6435768261964736, "grad_norm": 44.1758284644859, "learning_rate": 9.163946412243894e-09, "logits": -1.4540935754776, "logps": -93.5007553100586, "loss": 0.9616, "objective": 0.9557725191116333, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5791666507720947, "regularize": 0.32695215940475464, "step": 435 }, { "dpo_loss": 0.6319211721420288, "epoch": 1.6624685138539044, "grad_norm": 47.357355663292644, "learning_rate": 8.232341027131883e-09, "logits": -1.4391558170318604, "logps": -92.56800079345703, "loss": 0.9484, "objective": 0.9231205582618713, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5583333373069763, "regularize": 0.2911991775035858, "step": 440 }, { "dpo_loss": 0.6425955295562744, "epoch": 1.681360201511335, "grad_norm": 46.443814611627765, "learning_rate": 7.346407963880136e-09, "logits": -1.4678118228912354, "logps": -92.27074432373047, "loss": 0.9664, "objective": 0.949626624584198, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5541666746139526, "regularize": 0.3070310652256012, "step": 445 }, { "dpo_loss": 0.6390999555587769, "epoch": 1.700251889168766, "grad_norm": 49.0414360352724, "learning_rate": 6.507115977286143e-09, "logits": -1.393315076828003, "logps": -93.02438354492188, "loss": 0.9495, "objective": 0.9575169682502747, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5541666746139526, "regularize": 0.31841692328453064, "step": 450 }, { "epoch": 1.700251889168766, "eval_dpo_loss": 0.6830143928527832, "eval_logits": -1.3688236474990845, "eval_logps": -99.32157135009766, "eval_loss": 1.0825797319412231, "eval_objective": 1.0900212526321411, "eval_ranking_idealized": 0.5925414562225342, "eval_ranking_idealized_expo": 0.5165745615959167, "eval_ranking_simple": 0.5214088559150696, "eval_regularize": 0.4070066511631012, "eval_runtime": 235.5467, "eval_samples_per_second": 24.581, "eval_steps_per_second": 1.537, "step": 450 }, { "dpo_loss": 0.6333620548248291, "epoch": 1.7191435768261965, "grad_norm": 51.27723544337059, "learning_rate": 5.715382820814885e-09, "logits": -1.463624358177185, "logps": -95.06629180908203, "loss": 0.9525, "objective": 0.981126070022583, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5541666746139526, "regularize": 0.34776392579078674, "step": 455 }, { "dpo_loss": 0.6484085321426392, "epoch": 1.7380352644836272, "grad_norm": 46.95616030148672, "learning_rate": 4.972074243048896e-09, "logits": -1.4587913751602173, "logps": -91.8313217163086, "loss": 0.9569, "objective": 0.9492828845977783, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.5541666746139526, "regularize": 0.30087435245513916, "step": 460 }, { "dpo_loss": 0.6343809962272644, "epoch": 1.756926952141058, "grad_norm": 47.70310058715592, "learning_rate": 4.278003041004779e-09, "logits": -1.3960585594177246, "logps": -92.12299346923828, "loss": 0.9624, "objective": 0.9703618884086609, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5458333492279053, "regularize": 0.33598068356513977, "step": 465 }, { "dpo_loss": 0.6355936527252197, "epoch": 1.7758186397984885, "grad_norm": 48.36448652264405, "learning_rate": 3.63392817135173e-09, "logits": -1.4002528190612793, "logps": -92.8111343383789, "loss": 0.9635, "objective": 0.9694804549217224, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5416666865348816, "regularize": 0.3338867127895355, "step": 470 }, { "dpo_loss": 0.6456637978553772, "epoch": 1.7947103274559195, "grad_norm": 48.611216917206725, "learning_rate": 3.0405539205035023e-09, "logits": -1.4299951791763306, "logps": -92.8174819946289, "loss": 0.9564, "objective": 0.9828375577926636, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5458333492279053, "regularize": 0.3371736407279968, "step": 475 }, { "dpo_loss": 0.6420384645462036, "epoch": 1.81360201511335, "grad_norm": 46.76089409325827, "learning_rate": 2.4985291344915673e-09, "logits": -1.4936844110488892, "logps": -93.58541870117188, "loss": 0.967, "objective": 0.9691051244735718, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4583333432674408, "ranking_simple": 0.46666666865348816, "regularize": 0.3270666301250458, "step": 480 }, { "dpo_loss": 0.6282381415367126, "epoch": 1.8324937027707808, "grad_norm": 52.789969586082115, "learning_rate": 2.0084465094614976e-09, "logits": -1.3527694940567017, "logps": -94.94033813476562, "loss": 0.9721, "objective": 0.9742304682731628, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5333333611488342, "regularize": 0.34599223732948303, "step": 485 }, { "dpo_loss": 0.6374282836914062, "epoch": 1.8513853904282116, "grad_norm": 47.2028496146643, "learning_rate": 1.570841943568446e-09, "logits": -1.430949091911316, "logps": -93.16008758544922, "loss": 0.9587, "objective": 0.9563023447990417, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5416666865348816, "regularize": 0.3188740313053131, "step": 490 }, { "dpo_loss": 0.6446228623390198, "epoch": 1.8702770780856424, "grad_norm": 47.63126102742745, "learning_rate": 1.1861939509803687e-09, "logits": -1.4252774715423584, "logps": -93.47933959960938, "loss": 0.9498, "objective": 0.9525411128997803, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.512499988079071, "regularize": 0.30791813135147095, "step": 495 }, { "dpo_loss": 0.6408995985984802, "epoch": 1.8891687657430731, "grad_norm": 47.540323720333014, "learning_rate": 8.54923138629815e-10, "logits": -1.4415290355682373, "logps": -92.98784637451172, "loss": 0.9463, "objective": 0.9464013576507568, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5541666746139526, "regularize": 0.3055017292499542, "step": 500 }, { "epoch": 1.8891687657430731, "eval_dpo_loss": 0.6829814314842224, "eval_logits": -1.3685972690582275, "eval_logps": -99.3604736328125, "eval_loss": 1.0826866626739502, "eval_objective": 1.0901830196380615, "eval_ranking_idealized": 0.5925414562225342, "eval_ranking_idealized_expo": 0.5165745615959167, "eval_ranking_simple": 0.5220994353294373, "eval_regularize": 0.40720152854919434, "eval_runtime": 235.7418, "eval_samples_per_second": 24.561, "eval_steps_per_second": 1.536, "step": 500 }, { "dpo_loss": 0.6330133080482483, "epoch": 1.9080604534005037, "grad_norm": 44.27516380619689, "learning_rate": 5.773917462864264e-10, "logits": -1.3644249439239502, "logps": -91.88590240478516, "loss": 0.954, "objective": 0.951960027217865, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5375000238418579, "regularize": 0.3189466595649719, "step": 505 }, { "dpo_loss": 0.6388721466064453, "epoch": 1.9269521410579347, "grad_norm": 51.31132661100108, "learning_rate": 3.53903250453047e-10, "logits": -1.4425408840179443, "logps": -95.0220718383789, "loss": 0.9495, "objective": 0.9448812007904053, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.6000000238418579, "regularize": 0.30600884556770325, "step": 510 }, { "dpo_loss": 0.638128936290741, "epoch": 1.9458438287153652, "grad_norm": 49.79971218067957, "learning_rate": 1.8470203251865768e-10, "logits": -1.4008522033691406, "logps": -95.15757751464844, "loss": 0.9567, "objective": 0.9624292850494385, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.512499988079071, "regularize": 0.324300080537796, "step": 515 }, { "dpo_loss": 0.6457895636558533, "epoch": 1.964735516372796, "grad_norm": 55.67667071934356, "learning_rate": 6.997311153086882e-11, "logits": -1.464538335800171, "logps": -93.64386749267578, "loss": 0.9495, "objective": 0.9239124059677124, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5708333253860474, "regularize": 0.2781226336956024, "step": 520 }, { "dpo_loss": 0.6334381103515625, "epoch": 1.9836272040302267, "grad_norm": 50.356520924506064, "learning_rate": 9.841941880361915e-12, "logits": -1.4272739887237549, "logps": -93.79329681396484, "loss": 0.966, "objective": 0.9610148072242737, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5375000238418579, "regularize": 0.3275766670703888, "step": 525 }, { "epoch": 1.994962216624685, "step": 528, "total_flos": 0.0, "train_loss": 1.010300681672313, "train_runtime": 13850.696, "train_samples_per_second": 7.336, "train_steps_per_second": 0.038 } ], "logging_steps": 5, "max_steps": 528, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }