|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.994962216624685, |
|
"eval_steps": 50, |
|
"global_step": 528, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.003778337531486146, |
|
"grad_norm": 49.44846097246243, |
|
"learning_rate": 1.8867924528301883e-09, |
|
"logits": -1.1757584810256958, |
|
"logps": -94.8752670288086, |
|
"loss": 1.0913, |
|
"objective": 1.048144817352295, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.4375, |
|
"ranking_simple": 0.4375, |
|
"regularize": 0.3549971580505371, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_loss": 0.6931453347206116, |
|
"epoch": 0.018891687657430732, |
|
"grad_norm": 56.77495215138669, |
|
"learning_rate": 9.433962264150943e-09, |
|
"logits": -1.3608341217041016, |
|
"logps": -92.11934661865234, |
|
"loss": 1.1069, |
|
"objective": 1.06926691532135, |
|
"ranking_idealized": 0.5729166865348816, |
|
"ranking_idealized_expo": 0.4895833432674408, |
|
"ranking_simple": 0.4895833432674408, |
|
"regularize": 0.37612107396125793, |
|
"step": 5 |
|
}, |
|
{ |
|
"dpo_loss": 0.693118155002594, |
|
"epoch": 0.037783375314861464, |
|
"grad_norm": 50.79305715348446, |
|
"learning_rate": 1.8867924528301887e-08, |
|
"logits": -1.3560354709625244, |
|
"logps": -90.93302917480469, |
|
"loss": 1.1118, |
|
"objective": 1.1139355897903442, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.4208172559738159, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_loss": 0.6927705407142639, |
|
"epoch": 0.05667506297229219, |
|
"grad_norm": 48.78735409468553, |
|
"learning_rate": 2.830188679245283e-08, |
|
"logits": -1.3467658758163452, |
|
"logps": -92.52275848388672, |
|
"loss": 1.1086, |
|
"objective": 1.1240794658660889, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.4313090443611145, |
|
"step": 15 |
|
}, |
|
{ |
|
"dpo_loss": 0.6927476525306702, |
|
"epoch": 0.07556675062972293, |
|
"grad_norm": 48.89099638416194, |
|
"learning_rate": 3.7735849056603774e-08, |
|
"logits": -1.286866545677185, |
|
"logps": -91.98667907714844, |
|
"loss": 1.1147, |
|
"objective": 1.127905011177063, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.43515732884407043, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_loss": 0.6929070353507996, |
|
"epoch": 0.09445843828715365, |
|
"grad_norm": 51.44180006710924, |
|
"learning_rate": 4.7169811320754715e-08, |
|
"logits": -1.3749586343765259, |
|
"logps": -92.29708862304688, |
|
"loss": 1.1069, |
|
"objective": 1.1106172800064087, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.4177102744579315, |
|
"step": 25 |
|
}, |
|
{ |
|
"dpo_loss": 0.6915452480316162, |
|
"epoch": 0.11335012594458438, |
|
"grad_norm": 52.52518235565533, |
|
"learning_rate": 5.660377358490566e-08, |
|
"logits": -1.4094866514205933, |
|
"logps": -92.03213500976562, |
|
"loss": 1.1138, |
|
"objective": 1.1041828393936157, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.4126375913619995, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_loss": 0.6903693675994873, |
|
"epoch": 0.13224181360201512, |
|
"grad_norm": 55.88065446639471, |
|
"learning_rate": 6.603773584905659e-08, |
|
"logits": -1.3921390771865845, |
|
"logps": -93.27505493164062, |
|
"loss": 1.0984, |
|
"objective": 1.1129175424575806, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.4225481450557709, |
|
"step": 35 |
|
}, |
|
{ |
|
"dpo_loss": 0.6901296377182007, |
|
"epoch": 0.15113350125944586, |
|
"grad_norm": 48.11830818999703, |
|
"learning_rate": 7.547169811320755e-08, |
|
"logits": -1.4104276895523071, |
|
"logps": -92.22798156738281, |
|
"loss": 1.1021, |
|
"objective": 1.083837628364563, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.39370766282081604, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_loss": 0.6862795352935791, |
|
"epoch": 0.17002518891687657, |
|
"grad_norm": 45.66068468167867, |
|
"learning_rate": 8.490566037735849e-08, |
|
"logits": -1.3663307428359985, |
|
"logps": -91.8579330444336, |
|
"loss": 1.1032, |
|
"objective": 1.1153314113616943, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.4290517270565033, |
|
"step": 45 |
|
}, |
|
{ |
|
"dpo_loss": 0.6879932284355164, |
|
"epoch": 0.1889168765743073, |
|
"grad_norm": 51.12091223625438, |
|
"learning_rate": 9.433962264150943e-08, |
|
"logits": -1.3217811584472656, |
|
"logps": -90.97481536865234, |
|
"loss": 1.0911, |
|
"objective": 1.0859743356704712, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.3979811370372772, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1889168765743073, |
|
"eval_dpo_loss": 0.690547525882721, |
|
"eval_logits": -1.3092193603515625, |
|
"eval_logps": -97.7940902709961, |
|
"eval_loss": 1.104289174079895, |
|
"eval_objective": 1.1041598320007324, |
|
"eval_ranking_idealized": 0.5925414562225342, |
|
"eval_ranking_idealized_expo": 0.5165745615959167, |
|
"eval_ranking_simple": 0.5165745615959167, |
|
"eval_regularize": 0.4136121869087219, |
|
"eval_runtime": 235.6732, |
|
"eval_samples_per_second": 24.568, |
|
"eval_steps_per_second": 1.536, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6884905695915222, |
|
"epoch": 0.20780856423173805, |
|
"grad_norm": 54.01101403233421, |
|
"learning_rate": 9.99956257238817e-08, |
|
"logits": -1.4181103706359863, |
|
"logps": -89.9561538696289, |
|
"loss": 1.1009, |
|
"objective": 1.094197154045105, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.40570658445358276, |
|
"step": 55 |
|
}, |
|
{ |
|
"dpo_loss": 0.6880350708961487, |
|
"epoch": 0.22670025188916876, |
|
"grad_norm": 54.996673931606395, |
|
"learning_rate": 9.994642390694308e-08, |
|
"logits": -1.3805103302001953, |
|
"logps": -90.7548828125, |
|
"loss": 1.1093, |
|
"objective": 1.114397406578064, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.4263623058795929, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_loss": 0.6825106143951416, |
|
"epoch": 0.2455919395465995, |
|
"grad_norm": 56.45685371856582, |
|
"learning_rate": 9.98426064087682e-08, |
|
"logits": -1.4092483520507812, |
|
"logps": -92.43305969238281, |
|
"loss": 1.1012, |
|
"objective": 1.104454755783081, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.4219440817832947, |
|
"step": 65 |
|
}, |
|
{ |
|
"dpo_loss": 0.6836772561073303, |
|
"epoch": 0.26448362720403024, |
|
"grad_norm": 55.79791061619149, |
|
"learning_rate": 9.968428675226714e-08, |
|
"logits": -1.372382640838623, |
|
"logps": -92.17337036132812, |
|
"loss": 1.1123, |
|
"objective": 1.1599425077438354, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.476265013217926, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_loss": 0.6837269067764282, |
|
"epoch": 0.28337531486146095, |
|
"grad_norm": 47.47237853468278, |
|
"learning_rate": 9.947163805765979e-08, |
|
"logits": -1.3373608589172363, |
|
"logps": -90.7457275390625, |
|
"loss": 1.0793, |
|
"objective": 1.0441726446151733, |
|
"ranking_idealized": 0.5083333253860474, |
|
"ranking_idealized_expo": 0.4375, |
|
"ranking_simple": 0.4333333373069763, |
|
"regularize": 0.3604455888271332, |
|
"step": 75 |
|
}, |
|
{ |
|
"dpo_loss": 0.6796848773956299, |
|
"epoch": 0.3022670025188917, |
|
"grad_norm": 47.94506148186036, |
|
"learning_rate": 9.920489285317169e-08, |
|
"logits": -1.4042983055114746, |
|
"logps": -91.38846588134766, |
|
"loss": 1.0778, |
|
"objective": 1.0585055351257324, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.3788203001022339, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_loss": 0.6791465282440186, |
|
"epoch": 0.3211586901763224, |
|
"grad_norm": 47.95858131470083, |
|
"learning_rate": 9.888434282076758e-08, |
|
"logits": -1.3911387920379639, |
|
"logps": -92.44884490966797, |
|
"loss": 1.0925, |
|
"objective": 1.0910677909851074, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.4119212031364441, |
|
"step": 85 |
|
}, |
|
{ |
|
"dpo_loss": 0.6837694644927979, |
|
"epoch": 0.34005037783375314, |
|
"grad_norm": 55.1730704885422, |
|
"learning_rate": 9.851033847720166e-08, |
|
"logits": -1.3752093315124512, |
|
"logps": -91.67774200439453, |
|
"loss": 1.0847, |
|
"objective": 1.085326075553894, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.4015560448169708, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_loss": 0.6765946745872498, |
|
"epoch": 0.3589420654911839, |
|
"grad_norm": 48.33215476425452, |
|
"learning_rate": 9.808328879073251e-08, |
|
"logits": -1.3342726230621338, |
|
"logps": -92.56273651123047, |
|
"loss": 1.0612, |
|
"objective": 1.060593605041504, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.3839987814426422, |
|
"step": 95 |
|
}, |
|
{ |
|
"dpo_loss": 0.6816121935844421, |
|
"epoch": 0.3778337531486146, |
|
"grad_norm": 49.179351832428715, |
|
"learning_rate": 9.760366073392245e-08, |
|
"logits": -1.3950403928756714, |
|
"logps": -92.40387725830078, |
|
"loss": 1.0629, |
|
"objective": 1.0402370691299438, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.35862475633621216, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3778337531486146, |
|
"eval_dpo_loss": 0.6871094703674316, |
|
"eval_logits": -1.3221344947814941, |
|
"eval_logps": -98.14198303222656, |
|
"eval_loss": 1.093752384185791, |
|
"eval_objective": 1.0948374271392822, |
|
"eval_ranking_idealized": 0.5925414562225342, |
|
"eval_ranking_idealized_expo": 0.5165745615959167, |
|
"eval_ranking_simple": 0.5179557800292969, |
|
"eval_regularize": 0.4077278971672058, |
|
"eval_runtime": 235.7453, |
|
"eval_samples_per_second": 24.56, |
|
"eval_steps_per_second": 1.536, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.6785728931427002, |
|
"epoch": 0.3967254408060453, |
|
"grad_norm": 51.47899227122413, |
|
"learning_rate": 9.707197877300973e-08, |
|
"logits": -1.4077008962631226, |
|
"logps": -91.70052337646484, |
|
"loss": 1.0696, |
|
"objective": 1.086281180381775, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.407708078622818, |
|
"step": 105 |
|
}, |
|
{ |
|
"dpo_loss": 0.6762667298316956, |
|
"epoch": 0.4156171284634761, |
|
"grad_norm": 50.28143364432592, |
|
"learning_rate": 9.648882429441257e-08, |
|
"logits": -1.3699510097503662, |
|
"logps": -93.08635711669922, |
|
"loss": 1.0782, |
|
"objective": 1.1022285223007202, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.4259616434574127, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_loss": 0.6772736310958862, |
|
"epoch": 0.4345088161209068, |
|
"grad_norm": 49.35211256173949, |
|
"learning_rate": 9.585483496899149e-08, |
|
"logits": -1.4434921741485596, |
|
"logps": -91.60735321044922, |
|
"loss": 1.049, |
|
"objective": 1.0325186252593994, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.3552449345588684, |
|
"step": 115 |
|
}, |
|
{ |
|
"dpo_loss": 0.67376708984375, |
|
"epoch": 0.4534005037783375, |
|
"grad_norm": 49.769392715448596, |
|
"learning_rate": 9.517070405476573e-08, |
|
"logits": -1.3977102041244507, |
|
"logps": -91.86192321777344, |
|
"loss": 1.0722, |
|
"objective": 1.0750477313995361, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.40128055214881897, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_loss": 0.6729015111923218, |
|
"epoch": 0.4722921914357683, |
|
"grad_norm": 48.20763298222792, |
|
"learning_rate": 9.443717963884567e-08, |
|
"logits": -1.4215410947799683, |
|
"logps": -91.61782836914062, |
|
"loss": 1.0433, |
|
"objective": 1.0424840450286865, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.36958202719688416, |
|
"step": 125 |
|
}, |
|
{ |
|
"dpo_loss": 0.6743431687355042, |
|
"epoch": 0.491183879093199, |
|
"grad_norm": 48.45479288893814, |
|
"learning_rate": 9.365506381941065e-08, |
|
"logits": -1.3215110301971436, |
|
"logps": -92.12106323242188, |
|
"loss": 1.0515, |
|
"objective": 1.097717046737671, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.4233737289905548, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_loss": 0.6695905327796936, |
|
"epoch": 0.5100755667506297, |
|
"grad_norm": 43.646349799244106, |
|
"learning_rate": 9.282521182862628e-08, |
|
"logits": -1.3272545337677002, |
|
"logps": -92.60189056396484, |
|
"loss": 1.057, |
|
"objective": 1.0827622413635254, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.41317158937454224, |
|
"step": 135 |
|
}, |
|
{ |
|
"dpo_loss": 0.6709803342819214, |
|
"epoch": 0.5289672544080605, |
|
"grad_norm": 47.3960543467576, |
|
"learning_rate": 9.194853109746073e-08, |
|
"logits": -1.4292913675308228, |
|
"logps": -92.06829071044922, |
|
"loss": 1.0462, |
|
"objective": 1.046062707901001, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.3750823736190796, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_loss": 0.6690364480018616, |
|
"epoch": 0.5478589420654912, |
|
"grad_norm": 51.65656404072737, |
|
"learning_rate": 9.102598026342222e-08, |
|
"logits": -1.4306373596191406, |
|
"logps": -91.35098266601562, |
|
"loss": 1.0533, |
|
"objective": 1.0885671377182007, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.41953060030937195, |
|
"step": 145 |
|
}, |
|
{ |
|
"dpo_loss": 0.6675416231155396, |
|
"epoch": 0.5667506297229219, |
|
"grad_norm": 47.60377632937882, |
|
"learning_rate": 9.005856812230304e-08, |
|
"logits": -1.4119346141815186, |
|
"logps": -90.73808288574219, |
|
"loss": 1.0597, |
|
"objective": 1.0471808910369873, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.379639208316803, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5667506297229219, |
|
"eval_dpo_loss": 0.6842523813247681, |
|
"eval_logits": -1.3387939929962158, |
|
"eval_logps": -97.57598114013672, |
|
"eval_loss": 1.0886038541793823, |
|
"eval_objective": 1.089935064315796, |
|
"eval_ranking_idealized": 0.5925414562225342, |
|
"eval_ranking_idealized_expo": 0.5165745615959167, |
|
"eval_ranking_simple": 0.519336998462677, |
|
"eval_regularize": 0.4056825637817383, |
|
"eval_runtime": 236.1326, |
|
"eval_samples_per_second": 24.52, |
|
"eval_steps_per_second": 1.533, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.6675500273704529, |
|
"epoch": 0.5856423173803527, |
|
"grad_norm": 48.130868373709134, |
|
"learning_rate": 8.904735252507609e-08, |
|
"logits": -1.3894909620285034, |
|
"logps": -91.1141128540039, |
|
"loss": 1.0454, |
|
"objective": 1.0189038515090942, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.3513537645339966, |
|
"step": 155 |
|
}, |
|
{ |
|
"dpo_loss": 0.6642627716064453, |
|
"epoch": 0.6045340050377834, |
|
"grad_norm": 48.76010002249353, |
|
"learning_rate": 8.799343922115043e-08, |
|
"logits": -1.3890469074249268, |
|
"logps": -91.98741149902344, |
|
"loss": 1.0636, |
|
"objective": 1.0629956722259521, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.39873284101486206, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_loss": 0.6647918224334717, |
|
"epoch": 0.6234256926952141, |
|
"grad_norm": 51.709247719469836, |
|
"learning_rate": 8.689798064925048e-08, |
|
"logits": -1.430177927017212, |
|
"logps": -92.05915832519531, |
|
"loss": 1.0499, |
|
"objective": 1.05256187915802, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 0.3877698481082916, |
|
"step": 165 |
|
}, |
|
{ |
|
"dpo_loss": 0.6606873273849487, |
|
"epoch": 0.6423173803526449, |
|
"grad_norm": 46.459328825125596, |
|
"learning_rate": 8.576217467724127e-08, |
|
"logits": -1.3984259366989136, |
|
"logps": -91.71315002441406, |
|
"loss": 1.029, |
|
"objective": 1.0234616994857788, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.42500001192092896, |
|
"ranking_simple": 0.44583332538604736, |
|
"regularize": 0.36277416348457336, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_loss": 0.6623333096504211, |
|
"epoch": 0.6612090680100756, |
|
"grad_norm": 51.1586426932559, |
|
"learning_rate": 8.458726329227747e-08, |
|
"logits": -1.4344308376312256, |
|
"logps": -92.77267456054688, |
|
"loss": 1.0316, |
|
"objective": 1.0282338857650757, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.36590057611465454, |
|
"step": 175 |
|
}, |
|
{ |
|
"dpo_loss": 0.6674898862838745, |
|
"epoch": 0.6801007556675063, |
|
"grad_norm": 48.61836865824933, |
|
"learning_rate": 8.337453124270862e-08, |
|
"logits": -1.3953392505645752, |
|
"logps": -90.3520736694336, |
|
"loss": 1.0387, |
|
"objective": 1.0503102540969849, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.3828202784061432, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_loss": 0.6639277935028076, |
|
"epoch": 0.698992443324937, |
|
"grad_norm": 56.3868937423093, |
|
"learning_rate": 8.212530463322582e-08, |
|
"logits": -1.376102089881897, |
|
"logps": -90.30011749267578, |
|
"loss": 1.0192, |
|
"objective": 1.0141814947128296, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.3502536714076996, |
|
"step": 185 |
|
}, |
|
{ |
|
"dpo_loss": 0.6700864434242249, |
|
"epoch": 0.7178841309823678, |
|
"grad_norm": 51.6918769313309, |
|
"learning_rate": 8.084094947478554e-08, |
|
"logits": -1.4650901556015015, |
|
"logps": -92.19945526123047, |
|
"loss": 1.0536, |
|
"objective": 1.0666062831878662, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.3965199291706085, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_loss": 0.6677178740501404, |
|
"epoch": 0.7367758186397985, |
|
"grad_norm": 51.07369943909954, |
|
"learning_rate": 7.952287019089686e-08, |
|
"logits": -1.384783387184143, |
|
"logps": -92.98999786376953, |
|
"loss": 1.0332, |
|
"objective": 1.0433076620101929, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.3755895793437958, |
|
"step": 195 |
|
}, |
|
{ |
|
"dpo_loss": 0.6689361333847046, |
|
"epoch": 0.7556675062972292, |
|
"grad_norm": 50.59449995506128, |
|
"learning_rate": 7.817250808190483e-08, |
|
"logits": -1.4448987245559692, |
|
"logps": -92.8608169555664, |
|
"loss": 1.0201, |
|
"objective": 0.9872502684593201, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.31831392645835876, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7556675062972292, |
|
"eval_dpo_loss": 0.6845039129257202, |
|
"eval_logits": -1.3518892526626587, |
|
"eval_logps": -98.55247497558594, |
|
"eval_loss": 1.0877532958984375, |
|
"eval_objective": 1.0928053855895996, |
|
"eval_ranking_idealized": 0.5925414562225342, |
|
"eval_ranking_idealized_expo": 0.5165745615959167, |
|
"eval_ranking_simple": 0.519336998462677, |
|
"eval_regularize": 0.40830138325691223, |
|
"eval_runtime": 236.1466, |
|
"eval_samples_per_second": 24.519, |
|
"eval_steps_per_second": 1.533, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.6665075421333313, |
|
"epoch": 0.77455919395466, |
|
"grad_norm": 44.9309987154925, |
|
"learning_rate": 7.679133974894983e-08, |
|
"logits": -1.377119779586792, |
|
"logps": -91.88671112060547, |
|
"loss": 1.0103, |
|
"objective": 1.0105198621749878, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.34401220083236694, |
|
"step": 205 |
|
}, |
|
{ |
|
"dpo_loss": 0.6583543419837952, |
|
"epoch": 0.7934508816120907, |
|
"grad_norm": 51.30313684013195, |
|
"learning_rate": 7.538087547932585e-08, |
|
"logits": -1.3690478801727295, |
|
"logps": -93.26469421386719, |
|
"loss": 1.0206, |
|
"objective": 1.019605040550232, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.3612505793571472, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_loss": 0.6641567349433899, |
|
"epoch": 0.8123425692695214, |
|
"grad_norm": 50.968568724158025, |
|
"learning_rate": 7.394265759500347e-08, |
|
"logits": -1.3541967868804932, |
|
"logps": -92.59414672851562, |
|
"loss": 1.0263, |
|
"objective": 1.0428659915924072, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.3787091076374054, |
|
"step": 215 |
|
}, |
|
{ |
|
"dpo_loss": 0.6569988131523132, |
|
"epoch": 0.8312342569269522, |
|
"grad_norm": 53.082634877219036, |
|
"learning_rate": 7.247825876612352e-08, |
|
"logits": -1.4405725002288818, |
|
"logps": -93.02523803710938, |
|
"loss": 1.0183, |
|
"objective": 1.0499581098556519, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.3929591476917267, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_loss": 0.6647858023643494, |
|
"epoch": 0.8501259445843828, |
|
"grad_norm": 51.335148967482006, |
|
"learning_rate": 7.098928029130528e-08, |
|
"logits": -1.4285895824432373, |
|
"logps": -92.1469955444336, |
|
"loss": 1.0123, |
|
"objective": 1.034002423286438, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.3692165017127991, |
|
"step": 225 |
|
}, |
|
{ |
|
"dpo_loss": 0.6575040221214294, |
|
"epoch": 0.8690176322418136, |
|
"grad_norm": 48.651529158644536, |
|
"learning_rate": 6.947735034665001e-08, |
|
"logits": -1.4194341897964478, |
|
"logps": -92.3643798828125, |
|
"loss": 1.0129, |
|
"objective": 1.019044041633606, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.3615398705005646, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_loss": 0.6606828570365906, |
|
"epoch": 0.8879093198992444, |
|
"grad_norm": 48.73605551669952, |
|
"learning_rate": 6.794412220535425e-08, |
|
"logits": -1.4252243041992188, |
|
"logps": -91.66600799560547, |
|
"loss": 1.0227, |
|
"objective": 1.0363646745681763, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.375681608915329, |
|
"step": 235 |
|
}, |
|
{ |
|
"dpo_loss": 0.661056399345398, |
|
"epoch": 0.906801007556675, |
|
"grad_norm": 48.248478909206035, |
|
"learning_rate": 6.639127242987987e-08, |
|
"logits": -1.4544317722320557, |
|
"logps": -92.43217468261719, |
|
"loss": 1.0019, |
|
"objective": 1.0119422674179077, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.3508855998516083, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_loss": 0.6545276045799255, |
|
"epoch": 0.9256926952141058, |
|
"grad_norm": 55.62927449157849, |
|
"learning_rate": 6.482049903865769e-08, |
|
"logits": -1.3181027173995972, |
|
"logps": -92.43726348876953, |
|
"loss": 1.0099, |
|
"objective": 0.9958193302154541, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.341291606426239, |
|
"step": 245 |
|
}, |
|
{ |
|
"dpo_loss": 0.6624430418014526, |
|
"epoch": 0.9445843828715366, |
|
"grad_norm": 45.52279742153591, |
|
"learning_rate": 6.323351964932908e-08, |
|
"logits": -1.363103985786438, |
|
"logps": -91.6707992553711, |
|
"loss": 1.0173, |
|
"objective": 1.0481780767440796, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.3857349753379822, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.9445843828715366, |
|
"eval_dpo_loss": 0.6845124959945679, |
|
"eval_logits": -1.3585822582244873, |
|
"eval_logps": -99.33744812011719, |
|
"eval_loss": 1.0862815380096436, |
|
"eval_objective": 1.0919297933578491, |
|
"eval_ranking_idealized": 0.5925414562225342, |
|
"eval_ranking_idealized_expo": 0.5165745615959167, |
|
"eval_ranking_simple": 0.519336998462677, |
|
"eval_regularize": 0.40741726756095886, |
|
"eval_runtime": 235.789, |
|
"eval_samples_per_second": 24.556, |
|
"eval_steps_per_second": 1.535, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.6532657742500305, |
|
"epoch": 0.9634760705289672, |
|
"grad_norm": 51.00987164026873, |
|
"learning_rate": 6.163206960055652e-08, |
|
"logits": -1.460981011390686, |
|
"logps": -94.54005432128906, |
|
"loss": 1.0046, |
|
"objective": 1.0130220651626587, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.359756201505661, |
|
"step": 255 |
|
}, |
|
{ |
|
"dpo_loss": 0.6577326059341431, |
|
"epoch": 0.982367758186398, |
|
"grad_norm": 50.576016219867064, |
|
"learning_rate": 6.001790005445606e-08, |
|
"logits": -1.362568974494934, |
|
"logps": -93.19829559326172, |
|
"loss": 1.01, |
|
"objective": 0.9904889464378357, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.33275625109672546, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_loss": 0.6527519226074219, |
|
"epoch": 1.0012594458438286, |
|
"grad_norm": 50.78293400345161, |
|
"learning_rate": 5.839277608172738e-08, |
|
"logits": -1.3711644411087036, |
|
"logps": -93.6723403930664, |
|
"loss": 1.0071, |
|
"objective": 1.026816487312317, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.3740644156932831, |
|
"step": 265 |
|
}, |
|
{ |
|
"dpo_loss": 0.6553524136543274, |
|
"epoch": 1.0201511335012594, |
|
"grad_norm": 52.08673841750398, |
|
"learning_rate": 5.675847473157485e-08, |
|
"logits": -1.4852278232574463, |
|
"logps": -94.36783599853516, |
|
"loss": 0.9818, |
|
"objective": 0.9954887628555298, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.34013625979423523, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_loss": 0.6415101289749146, |
|
"epoch": 1.0390428211586902, |
|
"grad_norm": 44.617567180749866, |
|
"learning_rate": 5.511678308853025e-08, |
|
"logits": -1.4626718759536743, |
|
"logps": -93.63671112060547, |
|
"loss": 0.9878, |
|
"objective": 0.9779664278030396, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.33645620942115784, |
|
"step": 275 |
|
}, |
|
{ |
|
"dpo_loss": 0.6500685811042786, |
|
"epoch": 1.057934508816121, |
|
"grad_norm": 47.25187912639425, |
|
"learning_rate": 5.3469496318302197e-08, |
|
"logits": -1.40652334690094, |
|
"logps": -92.5146255493164, |
|
"loss": 0.9659, |
|
"objective": 0.9707435369491577, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.3206748366355896, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_loss": 0.6382409930229187, |
|
"epoch": 1.0768261964735517, |
|
"grad_norm": 47.30764627956772, |
|
"learning_rate": 5.1818415704788724e-08, |
|
"logits": -1.4587684869766235, |
|
"logps": -94.23441314697266, |
|
"loss": 0.964, |
|
"objective": 0.9715672135353088, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5874999761581421, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.3333263099193573, |
|
"step": 285 |
|
}, |
|
{ |
|
"dpo_loss": 0.6455419063568115, |
|
"epoch": 1.0957178841309823, |
|
"grad_norm": 45.41530383871643, |
|
"learning_rate": 5.016534668039975e-08, |
|
"logits": -1.3359150886535645, |
|
"logps": -91.27457427978516, |
|
"loss": 0.9783, |
|
"objective": 0.9912244081497192, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.3456825315952301, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_loss": 0.6454907655715942, |
|
"epoch": 1.114609571788413, |
|
"grad_norm": 52.660264254351745, |
|
"learning_rate": 4.8512096851843375e-08, |
|
"logits": -1.414397120475769, |
|
"logps": -92.17057037353516, |
|
"loss": 0.9657, |
|
"objective": 0.964141309261322, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.31865036487579346, |
|
"step": 295 |
|
}, |
|
{ |
|
"dpo_loss": 0.6478288173675537, |
|
"epoch": 1.1335012594458438, |
|
"grad_norm": 55.18319202572926, |
|
"learning_rate": 4.686047402353433e-08, |
|
"logits": -1.482143521308899, |
|
"logps": -93.26191711425781, |
|
"loss": 0.9755, |
|
"objective": 0.9665765762329102, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.31874755024909973, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.1335012594458438, |
|
"eval_dpo_loss": 0.6826499700546265, |
|
"eval_logits": -1.3663146495819092, |
|
"eval_logps": -99.1143798828125, |
|
"eval_loss": 1.0829436779022217, |
|
"eval_objective": 1.08933687210083, |
|
"eval_ranking_idealized": 0.5925414562225342, |
|
"eval_ranking_idealized_expo": 0.5165745615959167, |
|
"eval_ranking_simple": 0.5214088559150696, |
|
"eval_regularize": 0.40668678283691406, |
|
"eval_runtime": 235.9585, |
|
"eval_samples_per_second": 24.538, |
|
"eval_steps_per_second": 1.534, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.6493499875068665, |
|
"epoch": 1.1523929471032746, |
|
"grad_norm": 45.82300619637504, |
|
"learning_rate": 4.521228422078649e-08, |
|
"logits": -1.386905550956726, |
|
"logps": -92.7632064819336, |
|
"loss": 0.9773, |
|
"objective": 0.9832318425178528, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.33388179540634155, |
|
"step": 305 |
|
}, |
|
{ |
|
"dpo_loss": 0.643381655216217, |
|
"epoch": 1.1712846347607053, |
|
"grad_norm": 43.431490164469025, |
|
"learning_rate": 4.3569329714950706e-08, |
|
"logits": -1.4345782995224, |
|
"logps": -92.81407928466797, |
|
"loss": 0.9734, |
|
"objective": 0.9835326671600342, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.3401508927345276, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_loss": 0.6410504579544067, |
|
"epoch": 1.190176322418136, |
|
"grad_norm": 47.92691613928351, |
|
"learning_rate": 4.1933407052657454e-08, |
|
"logits": -1.406593918800354, |
|
"logps": -94.20066833496094, |
|
"loss": 0.9734, |
|
"objective": 0.9850514531135559, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 0.3440004587173462, |
|
"step": 315 |
|
}, |
|
{ |
|
"dpo_loss": 0.6493880748748779, |
|
"epoch": 1.2090680100755669, |
|
"grad_norm": 49.39982876479276, |
|
"learning_rate": 4.030630509131959e-08, |
|
"logits": -1.4532923698425293, |
|
"logps": -92.71504974365234, |
|
"loss": 0.9679, |
|
"objective": 0.961419939994812, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.31203168630599976, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_loss": 0.6389760971069336, |
|
"epoch": 1.2279596977329974, |
|
"grad_norm": 50.23647074169398, |
|
"learning_rate": 3.8689803043042996e-08, |
|
"logits": -1.3649462461471558, |
|
"logps": -94.10885620117188, |
|
"loss": 0.9416, |
|
"objective": 0.9290289282798767, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.29005277156829834, |
|
"step": 325 |
|
}, |
|
{ |
|
"dpo_loss": 0.6403206586837769, |
|
"epoch": 1.2468513853904282, |
|
"grad_norm": 48.23351344804798, |
|
"learning_rate": 3.708566852908418e-08, |
|
"logits": -1.4762166738510132, |
|
"logps": -92.44864654541016, |
|
"loss": 0.9562, |
|
"objective": 0.942139744758606, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.30181899666786194, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_loss": 0.6417604088783264, |
|
"epoch": 1.265743073047859, |
|
"grad_norm": 54.445785967306286, |
|
"learning_rate": 3.54956556469825e-08, |
|
"logits": -1.446532130241394, |
|
"logps": -92.52754974365234, |
|
"loss": 0.9746, |
|
"objective": 0.9908974766731262, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.3491368889808655, |
|
"step": 335 |
|
}, |
|
{ |
|
"dpo_loss": 0.6481935977935791, |
|
"epoch": 1.2846347607052897, |
|
"grad_norm": 43.30323866491239, |
|
"learning_rate": 3.392150305248024e-08, |
|
"logits": -1.416210651397705, |
|
"logps": -92.2144775390625, |
|
"loss": 0.9605, |
|
"objective": 0.9179424047470093, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.2697486877441406, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_loss": 0.6473791599273682, |
|
"epoch": 1.3035264483627205, |
|
"grad_norm": 46.63436114142443, |
|
"learning_rate": 3.236493205832794e-08, |
|
"logits": -1.4570471048355103, |
|
"logps": -93.01604461669922, |
|
"loss": 0.9614, |
|
"objective": 0.9653963446617126, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.4958333373069763, |
|
"regularize": 0.3180171847343445, |
|
"step": 345 |
|
}, |
|
{ |
|
"dpo_loss": 0.6448249816894531, |
|
"epoch": 1.322418136020151, |
|
"grad_norm": 48.68063353112031, |
|
"learning_rate": 3.082764475205442e-08, |
|
"logits": -1.4262375831604004, |
|
"logps": -92.78704071044922, |
|
"loss": 0.9708, |
|
"objective": 0.9741830229759216, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.32935774326324463, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.322418136020151, |
|
"eval_dpo_loss": 0.6832794547080994, |
|
"eval_logits": -1.3642104864120483, |
|
"eval_logps": -99.26692962646484, |
|
"eval_loss": 1.082851767539978, |
|
"eval_objective": 1.0900707244873047, |
|
"eval_ranking_idealized": 0.5925414562225342, |
|
"eval_ranking_idealized_expo": 0.5165745615959167, |
|
"eval_ranking_simple": 0.5220994353294373, |
|
"eval_regularize": 0.4067910611629486, |
|
"eval_runtime": 235.8313, |
|
"eval_samples_per_second": 24.551, |
|
"eval_steps_per_second": 1.535, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.6446579098701477, |
|
"epoch": 1.341309823677582, |
|
"grad_norm": 44.218673755410634, |
|
"learning_rate": 2.9311322134758836e-08, |
|
"logits": -1.4359955787658691, |
|
"logps": -91.50603485107422, |
|
"loss": 0.9673, |
|
"objective": 0.991865873336792, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.3472079634666443, |
|
"step": 355 |
|
}, |
|
{ |
|
"dpo_loss": 0.6347137689590454, |
|
"epoch": 1.3602015113350125, |
|
"grad_norm": 46.29947549071951, |
|
"learning_rate": 2.7817622282960812e-08, |
|
"logits": -1.430161714553833, |
|
"logps": -94.7589340209961, |
|
"loss": 0.9565, |
|
"objective": 0.9400501251220703, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.3053361773490906, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_loss": 0.6405820250511169, |
|
"epoch": 1.3790931989924433, |
|
"grad_norm": 47.287766896204, |
|
"learning_rate": 2.6348178535517962e-08, |
|
"logits": -1.4314912557601929, |
|
"logps": -93.3971939086914, |
|
"loss": 0.9446, |
|
"objective": 0.9586126208305359, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.31803005933761597, |
|
"step": 365 |
|
}, |
|
{ |
|
"dpo_loss": 0.6399816274642944, |
|
"epoch": 1.397984886649874, |
|
"grad_norm": 47.51527080377652, |
|
"learning_rate": 2.4904597707593977e-08, |
|
"logits": -1.4040166139602661, |
|
"logps": -93.57247924804688, |
|
"loss": 0.9651, |
|
"objective": 0.9659475088119507, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.32596567273139954, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_loss": 0.6420871615409851, |
|
"epoch": 1.4168765743073048, |
|
"grad_norm": 45.177183298812, |
|
"learning_rate": 2.3488458333629773e-08, |
|
"logits": -1.362334966659546, |
|
"logps": -94.35677337646484, |
|
"loss": 0.9574, |
|
"objective": 0.9662178754806519, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.3241305351257324, |
|
"step": 375 |
|
}, |
|
{ |
|
"dpo_loss": 0.6476179957389832, |
|
"epoch": 1.4357682619647356, |
|
"grad_norm": 42.51629065900929, |
|
"learning_rate": 2.21013089412392e-08, |
|
"logits": -1.4462828636169434, |
|
"logps": -93.31849670410156, |
|
"loss": 0.9686, |
|
"objective": 0.9625572562217712, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5791666507720947, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.31493937969207764, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_loss": 0.6483107209205627, |
|
"epoch": 1.4546599496221662, |
|
"grad_norm": 50.494940681761314, |
|
"learning_rate": 2.0744666357916923e-08, |
|
"logits": -1.3696460723876953, |
|
"logps": -93.16974639892578, |
|
"loss": 0.9674, |
|
"objective": 0.9437763690948486, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 0.2954654395580292, |
|
"step": 385 |
|
}, |
|
{ |
|
"dpo_loss": 0.6387519836425781, |
|
"epoch": 1.473551637279597, |
|
"grad_norm": 48.0772224935248, |
|
"learning_rate": 1.942001405240979e-08, |
|
"logits": -1.4375274181365967, |
|
"logps": -93.4893798828125, |
|
"loss": 0.951, |
|
"objective": 0.9498422145843506, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.3110901117324829, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_loss": 0.6425871253013611, |
|
"epoch": 1.4924433249370277, |
|
"grad_norm": 50.00168420726503, |
|
"learning_rate": 1.8128800512565513e-08, |
|
"logits": -1.4277968406677246, |
|
"logps": -92.17271423339844, |
|
"loss": 0.9714, |
|
"objective": 0.9966804385185242, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.3540932834148407, |
|
"step": 395 |
|
}, |
|
{ |
|
"dpo_loss": 0.6415828466415405, |
|
"epoch": 1.5113350125944585, |
|
"grad_norm": 47.59483679412131, |
|
"learning_rate": 1.6872437661432515e-08, |
|
"logits": -1.4341318607330322, |
|
"logps": -94.33455657958984, |
|
"loss": 0.968, |
|
"objective": 0.9913274049758911, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.3497445285320282, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.5113350125944585, |
|
"eval_dpo_loss": 0.6832301616668701, |
|
"eval_logits": -1.3681507110595703, |
|
"eval_logps": -99.251953125, |
|
"eval_loss": 1.0828720331192017, |
|
"eval_objective": 1.090120792388916, |
|
"eval_ranking_idealized": 0.5925414562225342, |
|
"eval_ranking_idealized_expo": 0.5165745615959167, |
|
"eval_ranking_simple": 0.5214088559150696, |
|
"eval_regularize": 0.4068906307220459, |
|
"eval_runtime": 235.2584, |
|
"eval_samples_per_second": 24.611, |
|
"eval_steps_per_second": 1.539, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.645163357257843, |
|
"epoch": 1.5302267002518892, |
|
"grad_norm": 47.34605640737913, |
|
"learning_rate": 1.5652299313342772e-08, |
|
"logits": -1.4498835802078247, |
|
"logps": -94.09959411621094, |
|
"loss": 0.9622, |
|
"objective": 0.9742183089256287, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.3290548026561737, |
|
"step": 405 |
|
}, |
|
{ |
|
"dpo_loss": 0.6364944577217102, |
|
"epoch": 1.5491183879093198, |
|
"grad_norm": 49.26429643097485, |
|
"learning_rate": 1.4469719671666041e-08, |
|
"logits": -1.3392701148986816, |
|
"logps": -93.40227508544922, |
|
"loss": 0.9596, |
|
"objective": 0.970222532749176, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.33372804522514343, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_loss": 0.6261176466941833, |
|
"epoch": 1.5680100755667508, |
|
"grad_norm": 47.529393740735564, |
|
"learning_rate": 1.3325991869878012e-08, |
|
"logits": -1.398348093032837, |
|
"logps": -93.3853530883789, |
|
"loss": 0.9724, |
|
"objective": 0.9390360116958618, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.3129182457923889, |
|
"step": 415 |
|
}, |
|
{ |
|
"dpo_loss": 0.6377049088478088, |
|
"epoch": 1.5869017632241813, |
|
"grad_norm": 57.43753774126503, |
|
"learning_rate": 1.222236655753791e-08, |
|
"logits": -1.4465179443359375, |
|
"logps": -92.30918884277344, |
|
"loss": 0.9625, |
|
"objective": 0.9549995064735413, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.31729447841644287, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_loss": 0.6409914493560791, |
|
"epoch": 1.605793450881612, |
|
"grad_norm": 48.69171129272466, |
|
"learning_rate": 1.1160050532721526e-08, |
|
"logits": -1.4412765502929688, |
|
"logps": -92.86591339111328, |
|
"loss": 0.9543, |
|
"objective": 0.9196628928184509, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.27867138385772705, |
|
"step": 425 |
|
}, |
|
{ |
|
"dpo_loss": 0.6387853622436523, |
|
"epoch": 1.6246851385390428, |
|
"grad_norm": 45.275230591779184, |
|
"learning_rate": 1.0140205422405212e-08, |
|
"logits": -1.4535937309265137, |
|
"logps": -92.8780517578125, |
|
"loss": 0.9509, |
|
"objective": 0.9649083018302917, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.3261227011680603, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_loss": 0.6288203001022339, |
|
"epoch": 1.6435768261964736, |
|
"grad_norm": 44.1758284644859, |
|
"learning_rate": 9.163946412243894e-09, |
|
"logits": -1.4540935754776, |
|
"logps": -93.5007553100586, |
|
"loss": 0.9616, |
|
"objective": 0.9557725191116333, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.32695215940475464, |
|
"step": 435 |
|
}, |
|
{ |
|
"dpo_loss": 0.6319211721420288, |
|
"epoch": 1.6624685138539044, |
|
"grad_norm": 47.357355663292644, |
|
"learning_rate": 8.232341027131883e-09, |
|
"logits": -1.4391558170318604, |
|
"logps": -92.56800079345703, |
|
"loss": 0.9484, |
|
"objective": 0.9231205582618713, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.2911991775035858, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_loss": 0.6425955295562744, |
|
"epoch": 1.681360201511335, |
|
"grad_norm": 46.443814611627765, |
|
"learning_rate": 7.346407963880136e-09, |
|
"logits": -1.4678118228912354, |
|
"logps": -92.27074432373047, |
|
"loss": 0.9664, |
|
"objective": 0.949626624584198, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.3070310652256012, |
|
"step": 445 |
|
}, |
|
{ |
|
"dpo_loss": 0.6390999555587769, |
|
"epoch": 1.700251889168766, |
|
"grad_norm": 49.0414360352724, |
|
"learning_rate": 6.507115977286143e-09, |
|
"logits": -1.393315076828003, |
|
"logps": -93.02438354492188, |
|
"loss": 0.9495, |
|
"objective": 0.9575169682502747, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.31841692328453064, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.700251889168766, |
|
"eval_dpo_loss": 0.6830143928527832, |
|
"eval_logits": -1.3688236474990845, |
|
"eval_logps": -99.32157135009766, |
|
"eval_loss": 1.0825797319412231, |
|
"eval_objective": 1.0900212526321411, |
|
"eval_ranking_idealized": 0.5925414562225342, |
|
"eval_ranking_idealized_expo": 0.5165745615959167, |
|
"eval_ranking_simple": 0.5214088559150696, |
|
"eval_regularize": 0.4070066511631012, |
|
"eval_runtime": 235.5467, |
|
"eval_samples_per_second": 24.581, |
|
"eval_steps_per_second": 1.537, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.6333620548248291, |
|
"epoch": 1.7191435768261965, |
|
"grad_norm": 51.27723544337059, |
|
"learning_rate": 5.715382820814885e-09, |
|
"logits": -1.463624358177185, |
|
"logps": -95.06629180908203, |
|
"loss": 0.9525, |
|
"objective": 0.981126070022583, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.34776392579078674, |
|
"step": 455 |
|
}, |
|
{ |
|
"dpo_loss": 0.6484085321426392, |
|
"epoch": 1.7380352644836272, |
|
"grad_norm": 46.95616030148672, |
|
"learning_rate": 4.972074243048896e-09, |
|
"logits": -1.4587913751602173, |
|
"logps": -91.8313217163086, |
|
"loss": 0.9569, |
|
"objective": 0.9492828845977783, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.30087435245513916, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_loss": 0.6343809962272644, |
|
"epoch": 1.756926952141058, |
|
"grad_norm": 47.70310058715592, |
|
"learning_rate": 4.278003041004779e-09, |
|
"logits": -1.3960585594177246, |
|
"logps": -92.12299346923828, |
|
"loss": 0.9624, |
|
"objective": 0.9703618884086609, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.33598068356513977, |
|
"step": 465 |
|
}, |
|
{ |
|
"dpo_loss": 0.6355936527252197, |
|
"epoch": 1.7758186397984885, |
|
"grad_norm": 48.36448652264405, |
|
"learning_rate": 3.63392817135173e-09, |
|
"logits": -1.4002528190612793, |
|
"logps": -92.8111343383789, |
|
"loss": 0.9635, |
|
"objective": 0.9694804549217224, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.3338867127895355, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_loss": 0.6456637978553772, |
|
"epoch": 1.7947103274559195, |
|
"grad_norm": 48.611216917206725, |
|
"learning_rate": 3.0405539205035023e-09, |
|
"logits": -1.4299951791763306, |
|
"logps": -92.8174819946289, |
|
"loss": 0.9564, |
|
"objective": 0.9828375577926636, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.3371736407279968, |
|
"step": 475 |
|
}, |
|
{ |
|
"dpo_loss": 0.6420384645462036, |
|
"epoch": 1.81360201511335, |
|
"grad_norm": 46.76089409325827, |
|
"learning_rate": 2.4985291344915673e-09, |
|
"logits": -1.4936844110488892, |
|
"logps": -93.58541870117188, |
|
"loss": 0.967, |
|
"objective": 0.9691051244735718, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.46666666865348816, |
|
"regularize": 0.3270666301250458, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_loss": 0.6282381415367126, |
|
"epoch": 1.8324937027707808, |
|
"grad_norm": 52.789969586082115, |
|
"learning_rate": 2.0084465094614976e-09, |
|
"logits": -1.3527694940567017, |
|
"logps": -94.94033813476562, |
|
"loss": 0.9721, |
|
"objective": 0.9742304682731628, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.34599223732948303, |
|
"step": 485 |
|
}, |
|
{ |
|
"dpo_loss": 0.6374282836914062, |
|
"epoch": 1.8513853904282116, |
|
"grad_norm": 47.2028496146643, |
|
"learning_rate": 1.570841943568446e-09, |
|
"logits": -1.430949091911316, |
|
"logps": -93.16008758544922, |
|
"loss": 0.9587, |
|
"objective": 0.9563023447990417, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.3188740313053131, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_loss": 0.6446228623390198, |
|
"epoch": 1.8702770780856424, |
|
"grad_norm": 47.63126102742745, |
|
"learning_rate": 1.1861939509803687e-09, |
|
"logits": -1.4252774715423584, |
|
"logps": -93.47933959960938, |
|
"loss": 0.9498, |
|
"objective": 0.9525411128997803, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.30791813135147095, |
|
"step": 495 |
|
}, |
|
{ |
|
"dpo_loss": 0.6408995985984802, |
|
"epoch": 1.8891687657430731, |
|
"grad_norm": 47.540323720333014, |
|
"learning_rate": 8.54923138629815e-10, |
|
"logits": -1.4415290355682373, |
|
"logps": -92.98784637451172, |
|
"loss": 0.9463, |
|
"objective": 0.9464013576507568, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.3055017292499542, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.8891687657430731, |
|
"eval_dpo_loss": 0.6829814314842224, |
|
"eval_logits": -1.3685972690582275, |
|
"eval_logps": -99.3604736328125, |
|
"eval_loss": 1.0826866626739502, |
|
"eval_objective": 1.0901830196380615, |
|
"eval_ranking_idealized": 0.5925414562225342, |
|
"eval_ranking_idealized_expo": 0.5165745615959167, |
|
"eval_ranking_simple": 0.5220994353294373, |
|
"eval_regularize": 0.40720152854919434, |
|
"eval_runtime": 235.7418, |
|
"eval_samples_per_second": 24.561, |
|
"eval_steps_per_second": 1.536, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.6330133080482483, |
|
"epoch": 1.9080604534005037, |
|
"grad_norm": 44.27516380619689, |
|
"learning_rate": 5.773917462864264e-10, |
|
"logits": -1.3644249439239502, |
|
"logps": -91.88590240478516, |
|
"loss": 0.954, |
|
"objective": 0.951960027217865, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.3189466595649719, |
|
"step": 505 |
|
}, |
|
{ |
|
"dpo_loss": 0.6388721466064453, |
|
"epoch": 1.9269521410579347, |
|
"grad_norm": 51.31132661100108, |
|
"learning_rate": 3.53903250453047e-10, |
|
"logits": -1.4425408840179443, |
|
"logps": -95.0220718383789, |
|
"loss": 0.9495, |
|
"objective": 0.9448812007904053, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5874999761581421, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.30600884556770325, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_loss": 0.638128936290741, |
|
"epoch": 1.9458438287153652, |
|
"grad_norm": 49.79971218067957, |
|
"learning_rate": 1.8470203251865768e-10, |
|
"logits": -1.4008522033691406, |
|
"logps": -95.15757751464844, |
|
"loss": 0.9567, |
|
"objective": 0.9624292850494385, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.324300080537796, |
|
"step": 515 |
|
}, |
|
{ |
|
"dpo_loss": 0.6457895636558533, |
|
"epoch": 1.964735516372796, |
|
"grad_norm": 55.67667071934356, |
|
"learning_rate": 6.997311153086882e-11, |
|
"logits": -1.464538335800171, |
|
"logps": -93.64386749267578, |
|
"loss": 0.9495, |
|
"objective": 0.9239124059677124, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.2781226336956024, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_loss": 0.6334381103515625, |
|
"epoch": 1.9836272040302267, |
|
"grad_norm": 50.356520924506064, |
|
"learning_rate": 9.841941880361915e-12, |
|
"logits": -1.4272739887237549, |
|
"logps": -93.79329681396484, |
|
"loss": 0.966, |
|
"objective": 0.9610148072242737, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.3275766670703888, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.994962216624685, |
|
"step": 528, |
|
"total_flos": 0.0, |
|
"train_loss": 1.010300681672313, |
|
"train_runtime": 13850.696, |
|
"train_samples_per_second": 7.336, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 528, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|