|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 391, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 422.289956912934, |
|
"learning_rate": 1.25e-09, |
|
"logits/chosen": -4.623842239379883, |
|
"logits/rejected": -4.85917854309082, |
|
"logps/chosen": -239.31422424316406, |
|
"logps/rejected": -207.56365966796875, |
|
"loss": 0.6959, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 397.335507450448, |
|
"learning_rate": 1.25e-08, |
|
"logits/chosen": -4.334493160247803, |
|
"logits/rejected": -4.643917560577393, |
|
"logps/chosen": -265.1265563964844, |
|
"logps/rejected": -215.76223754882812, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0019649432506412268, |
|
"rewards/margins": 0.0034448718652129173, |
|
"rewards/rejected": -0.005409814417362213, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 411.8624893441942, |
|
"learning_rate": 2.5e-08, |
|
"logits/chosen": -4.506826400756836, |
|
"logits/rejected": -4.740732192993164, |
|
"logps/chosen": -267.86932373046875, |
|
"logps/rejected": -216.64578247070312, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.007061410695314407, |
|
"rewards/margins": 0.010701502673327923, |
|
"rewards/rejected": -0.003640091512352228, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 426.4548766919431, |
|
"learning_rate": 3.75e-08, |
|
"logits/chosen": -4.585576057434082, |
|
"logits/rejected": -4.762608528137207, |
|
"logps/chosen": -258.25, |
|
"logps/rejected": -214.71231079101562, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.028281379491090775, |
|
"rewards/margins": 0.04623327776789665, |
|
"rewards/rejected": -0.017951902002096176, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 368.66232685986097, |
|
"learning_rate": 5e-08, |
|
"logits/chosen": -4.62213659286499, |
|
"logits/rejected": -4.706842422485352, |
|
"logps/chosen": -252.6122283935547, |
|
"logps/rejected": -220.41427612304688, |
|
"loss": 0.6177, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.0986957773566246, |
|
"rewards/margins": 0.1730131059885025, |
|
"rewards/rejected": -0.07431730628013611, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 294.81806277707193, |
|
"learning_rate": 4.989992961303737e-08, |
|
"logits/chosen": -4.516692161560059, |
|
"logits/rejected": -4.714283466339111, |
|
"logps/chosen": -269.63470458984375, |
|
"logps/rejected": -227.5962371826172, |
|
"loss": 0.5368, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.2334650307893753, |
|
"rewards/margins": 0.4059367775917053, |
|
"rewards/rejected": -0.17247170209884644, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 263.2732793061953, |
|
"learning_rate": 4.960051957873725e-08, |
|
"logits/chosen": -4.610293388366699, |
|
"logits/rejected": -4.734058856964111, |
|
"logps/chosen": -237.3783721923828, |
|
"logps/rejected": -218.6456298828125, |
|
"loss": 0.4381, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.3744103014469147, |
|
"rewards/margins": 0.7368327975273132, |
|
"rewards/rejected": -0.3624224364757538, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 243.25478495437125, |
|
"learning_rate": 4.910416686333906e-08, |
|
"logits/chosen": -4.496267795562744, |
|
"logits/rejected": -4.724743843078613, |
|
"logps/chosen": -248.19540405273438, |
|
"logps/rejected": -222.69140625, |
|
"loss": 0.4014, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.49465441703796387, |
|
"rewards/margins": 0.9875491261482239, |
|
"rewards/rejected": -0.49289458990097046, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 203.40516001004536, |
|
"learning_rate": 4.841484508350678e-08, |
|
"logits/chosen": -4.488529682159424, |
|
"logits/rejected": -4.711686611175537, |
|
"logps/chosen": -254.6974639892578, |
|
"logps/rejected": -235.7099151611328, |
|
"loss": 0.3429, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.652252197265625, |
|
"rewards/margins": 1.1913691759109497, |
|
"rewards/rejected": -0.5391170978546143, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 237.79736326938396, |
|
"learning_rate": 4.7538072695020406e-08, |
|
"logits/chosen": -4.58192777633667, |
|
"logits/rejected": -4.797459602355957, |
|
"logps/chosen": -243.06143188476562, |
|
"logps/rejected": -210.63308715820312, |
|
"loss": 0.3085, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.8151354789733887, |
|
"rewards/margins": 1.595442533493042, |
|
"rewards/rejected": -0.7803069353103638, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 222.07031689896021, |
|
"learning_rate": 4.6480868814055416e-08, |
|
"logits/chosen": -4.459914207458496, |
|
"logits/rejected": -4.747165679931641, |
|
"logps/chosen": -247.2704620361328, |
|
"logps/rejected": -236.6487274169922, |
|
"loss": 0.2984, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.9447771906852722, |
|
"rewards/margins": 1.8987438678741455, |
|
"rewards/rejected": -0.953966498374939, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -4.55070686340332, |
|
"eval_logits/rejected": -4.762002944946289, |
|
"eval_logps/chosen": -390.7516174316406, |
|
"eval_logps/rejected": -515.337158203125, |
|
"eval_loss": 0.9402573704719543, |
|
"eval_rewards/accuracies": 0.40625, |
|
"eval_rewards/chosen": -0.027714576572179794, |
|
"eval_rewards/margins": -0.3327641487121582, |
|
"eval_rewards/rejected": 0.3050495684146881, |
|
"eval_runtime": 97.8238, |
|
"eval_samples_per_second": 20.445, |
|
"eval_steps_per_second": 0.327, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 179.38114597248955, |
|
"learning_rate": 4.525169702472916e-08, |
|
"logits/chosen": -4.51773738861084, |
|
"logits/rejected": -4.777291297912598, |
|
"logps/chosen": -238.6410369873047, |
|
"logps/rejected": -227.3874053955078, |
|
"loss": 0.2711, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.0544707775115967, |
|
"rewards/margins": 2.1246237754821777, |
|
"rewards/rejected": -1.070152997970581, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 163.17108435846185, |
|
"learning_rate": 4.386039762276975e-08, |
|
"logits/chosen": -4.462746620178223, |
|
"logits/rejected": -4.7056145668029785, |
|
"logps/chosen": -262.86920166015625, |
|
"logps/rejected": -238.99801635742188, |
|
"loss": 0.267, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.3120397329330444, |
|
"rewards/margins": 2.3509771823883057, |
|
"rewards/rejected": -1.0389372110366821, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 229.70994586547334, |
|
"learning_rate": 4.231810883773999e-08, |
|
"logits/chosen": -4.532160758972168, |
|
"logits/rejected": -4.803128719329834, |
|
"logps/chosen": -246.3385009765625, |
|
"logps/rejected": -227.83792114257812, |
|
"loss": 0.2577, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.2105365991592407, |
|
"rewards/margins": 2.461458921432495, |
|
"rewards/rejected": -1.2509223222732544, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 152.87001146329087, |
|
"learning_rate": 4.063717766448194e-08, |
|
"logits/chosen": -4.559675216674805, |
|
"logits/rejected": -4.842075824737549, |
|
"logps/chosen": -272.3346252441406, |
|
"logps/rejected": -246.54464721679688, |
|
"loss": 0.2453, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.1860424280166626, |
|
"rewards/margins": 2.6231608390808105, |
|
"rewards/rejected": -1.4371181726455688, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 176.7311824941399, |
|
"learning_rate": 3.8831061017632845e-08, |
|
"logits/chosen": -4.557957172393799, |
|
"logits/rejected": -4.812293529510498, |
|
"logps/chosen": -239.32144165039062, |
|
"logps/rejected": -232.82479858398438, |
|
"loss": 0.2425, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.3096827268600464, |
|
"rewards/margins": 2.680234432220459, |
|
"rewards/rejected": -1.3705517053604126, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 179.5862213559593, |
|
"learning_rate": 3.691421800053269e-08, |
|
"logits/chosen": -4.614952564239502, |
|
"logits/rejected": -4.799678802490234, |
|
"logps/chosen": -238.4506378173828, |
|
"logps/rejected": -229.4785614013672, |
|
"loss": 0.2216, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.300586462020874, |
|
"rewards/margins": 2.687537908554077, |
|
"rewards/rejected": -1.3869514465332031, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 160.70849517962517, |
|
"learning_rate": 3.490199415097892e-08, |
|
"logits/chosen": -4.499081611633301, |
|
"logits/rejected": -4.779529571533203, |
|
"logps/chosen": -244.9915771484375, |
|
"logps/rejected": -235.46743774414062, |
|
"loss": 0.2108, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.6082652807235718, |
|
"rewards/margins": 3.174710988998413, |
|
"rewards/rejected": -1.5664453506469727, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 149.11333079529007, |
|
"learning_rate": 3.2810498590513937e-08, |
|
"logits/chosen": -4.59390926361084, |
|
"logits/rejected": -4.832152366638184, |
|
"logps/chosen": -222.95986938476562, |
|
"logps/rejected": -211.53585815429688, |
|
"loss": 0.2274, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.3903782367706299, |
|
"rewards/margins": 2.9419426918029785, |
|
"rewards/rejected": -1.5515644550323486, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 171.77003361632143, |
|
"learning_rate": 3.065647506074306e-08, |
|
"logits/chosen": -4.56182861328125, |
|
"logits/rejected": -4.7075724601745605, |
|
"logps/chosen": -245.95556640625, |
|
"logps/rejected": -247.3394012451172, |
|
"loss": 0.2299, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.4937294721603394, |
|
"rewards/margins": 2.711061477661133, |
|
"rewards/rejected": -1.2173320055007935, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 152.3542212939215, |
|
"learning_rate": 2.8457167879118325e-08, |
|
"logits/chosen": -4.556639194488525, |
|
"logits/rejected": -4.735670566558838, |
|
"logps/chosen": -237.8849639892578, |
|
"logps/rejected": -229.5240020751953, |
|
"loss": 0.2338, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.4254279136657715, |
|
"rewards/margins": 2.843172073364258, |
|
"rewards/rejected": -1.4177442789077759, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_logits/chosen": -4.538640975952148, |
|
"eval_logits/rejected": -4.758352279663086, |
|
"eval_logps/chosen": -391.07916259765625, |
|
"eval_logps/rejected": -514.2457885742188, |
|
"eval_loss": 1.0996507406234741, |
|
"eval_rewards/accuracies": 0.38671875, |
|
"eval_rewards/chosen": -0.06046929210424423, |
|
"eval_rewards/margins": -0.4746614694595337, |
|
"eval_rewards/rejected": 0.41419219970703125, |
|
"eval_runtime": 98.0841, |
|
"eval_samples_per_second": 20.391, |
|
"eval_steps_per_second": 0.326, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 139.65082338502864, |
|
"learning_rate": 2.6230183887296952e-08, |
|
"logits/chosen": -4.619709014892578, |
|
"logits/rejected": -4.859663963317871, |
|
"logps/chosen": -249.32113647460938, |
|
"logps/rejected": -246.33468627929688, |
|
"loss": 0.1967, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.6563961505889893, |
|
"rewards/margins": 3.425267457962036, |
|
"rewards/rejected": -1.768871545791626, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 147.42119588032455, |
|
"learning_rate": 2.3993351497264626e-08, |
|
"logits/chosen": -4.466108798980713, |
|
"logits/rejected": -4.793113708496094, |
|
"logps/chosen": -247.28756713867188, |
|
"logps/rejected": -244.1797637939453, |
|
"loss": 0.2159, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.5523773431777954, |
|
"rewards/margins": 3.5030410289764404, |
|
"rewards/rejected": -1.9506635665893555, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 158.88183877851495, |
|
"learning_rate": 2.1764577963648613e-08, |
|
"logits/chosen": -4.541924476623535, |
|
"logits/rejected": -4.858447074890137, |
|
"logps/chosen": -245.6726531982422, |
|
"logps/rejected": -243.28677368164062, |
|
"loss": 0.2197, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.5296446084976196, |
|
"rewards/margins": 3.487210750579834, |
|
"rewards/rejected": -1.957566499710083, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 157.203527489415, |
|
"learning_rate": 1.9561706024845818e-08, |
|
"logits/chosen": -4.5143561363220215, |
|
"logits/rejected": -4.772491455078125, |
|
"logps/chosen": -256.7393798828125, |
|
"logps/rejected": -240.91226196289062, |
|
"loss": 0.2123, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.6083800792694092, |
|
"rewards/margins": 3.60624623298645, |
|
"rewards/rejected": -1.9978656768798828, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 159.9556615524972, |
|
"learning_rate": 1.740237106064383e-08, |
|
"logits/chosen": -4.641883850097656, |
|
"logits/rejected": -4.893360614776611, |
|
"logps/chosen": -234.55264282226562, |
|
"logps/rejected": -224.25631713867188, |
|
"loss": 0.2024, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.414884090423584, |
|
"rewards/margins": 3.079598903656006, |
|
"rewards/rejected": -1.6647151708602905, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 198.60636587673295, |
|
"learning_rate": 1.530385990987863e-08, |
|
"logits/chosen": -4.517378330230713, |
|
"logits/rejected": -4.799233913421631, |
|
"logps/chosen": -238.4044189453125, |
|
"logps/rejected": -242.4611358642578, |
|
"loss": 0.2025, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.5644124746322632, |
|
"rewards/margins": 3.8356785774230957, |
|
"rewards/rejected": -2.271266460418701, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 186.10967020286805, |
|
"learning_rate": 1.3282972478382409e-08, |
|
"logits/chosen": -4.603947639465332, |
|
"logits/rejected": -4.826247215270996, |
|
"logps/chosen": -245.811279296875, |
|
"logps/rejected": -236.3955841064453, |
|
"loss": 0.2127, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.5221991539001465, |
|
"rewards/margins": 3.6592516899108887, |
|
"rewards/rejected": -2.137052536010742, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 144.59147008918274, |
|
"learning_rate": 1.1355887245137383e-08, |
|
"logits/chosen": -4.557550430297852, |
|
"logits/rejected": -4.780216217041016, |
|
"logps/chosen": -248.1402587890625, |
|
"logps/rejected": -262.3576354980469, |
|
"loss": 0.1884, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.636661171913147, |
|
"rewards/margins": 3.578484296798706, |
|
"rewards/rejected": -1.9418232440948486, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 331.8489693457681, |
|
"learning_rate": 9.538031743343628e-09, |
|
"logits/chosen": -4.426544666290283, |
|
"logits/rejected": -4.761611461639404, |
|
"logps/chosen": -255.4153289794922, |
|
"logps/rejected": -234.84487915039062, |
|
"loss": 0.1966, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.7193052768707275, |
|
"rewards/margins": 3.6408512592315674, |
|
"rewards/rejected": -1.9215457439422607, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 177.15049850318, |
|
"learning_rate": 7.843959053281662e-09, |
|
"logits/chosen": -4.541173934936523, |
|
"logits/rejected": -4.741909503936768, |
|
"logps/chosen": -236.41854858398438, |
|
"logps/rejected": -236.483642578125, |
|
"loss": 0.2158, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 1.6989631652832031, |
|
"rewards/margins": 3.7507786750793457, |
|
"rewards/rejected": -2.0518155097961426, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_logits/chosen": -4.539734840393066, |
|
"eval_logits/rejected": -4.759631633758545, |
|
"eval_logps/chosen": -392.2895812988281, |
|
"eval_logps/rejected": -515.406494140625, |
|
"eval_loss": 1.1359957456588745, |
|
"eval_rewards/accuracies": 0.41015625, |
|
"eval_rewards/chosen": -0.18151262402534485, |
|
"eval_rewards/margins": -0.47963014245033264, |
|
"eval_rewards/rejected": 0.2981175184249878, |
|
"eval_runtime": 97.905, |
|
"eval_samples_per_second": 20.428, |
|
"eval_steps_per_second": 0.327, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 159.8961908197972, |
|
"learning_rate": 6.28723129572247e-09, |
|
"logits/chosen": -4.461672782897949, |
|
"logits/rejected": -4.776505470275879, |
|
"logps/chosen": -244.0063934326172, |
|
"logps/rejected": -244.4510955810547, |
|
"loss": 0.2028, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.7133222818374634, |
|
"rewards/margins": 3.9386374950408936, |
|
"rewards/rejected": -2.2253153324127197, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 185.55678167306448, |
|
"learning_rate": 4.880311058593617e-09, |
|
"logits/chosen": -4.521292209625244, |
|
"logits/rejected": -4.848372936248779, |
|
"logps/chosen": -230.04397583007812, |
|
"logps/rejected": -226.9331817626953, |
|
"loss": 0.2095, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.5120347738265991, |
|
"rewards/margins": 3.560204267501831, |
|
"rewards/rejected": -2.0481698513031006, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 155.26912676521727, |
|
"learning_rate": 3.6344616260994942e-09, |
|
"logits/chosen": -4.473151683807373, |
|
"logits/rejected": -4.766911029815674, |
|
"logps/chosen": -251.7646484375, |
|
"logps/rejected": -254.7379913330078, |
|
"loss": 0.1928, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.638421654701233, |
|
"rewards/margins": 3.8422539234161377, |
|
"rewards/rejected": -2.2038321495056152, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 151.70242269299357, |
|
"learning_rate": 2.5596568090246547e-09, |
|
"logits/chosen": -4.4894232749938965, |
|
"logits/rejected": -4.816695213317871, |
|
"logps/chosen": -255.334716796875, |
|
"logps/rejected": -228.4678192138672, |
|
"loss": 0.1963, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.8811309337615967, |
|
"rewards/margins": 4.049709320068359, |
|
"rewards/rejected": -2.1685783863067627, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 150.47860807724385, |
|
"learning_rate": 1.6645010980854079e-09, |
|
"logits/chosen": -4.505433559417725, |
|
"logits/rejected": -4.675290584564209, |
|
"logps/chosen": -240.54714965820312, |
|
"logps/rejected": -246.03665161132812, |
|
"loss": 0.2168, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.4924026727676392, |
|
"rewards/margins": 3.3190674781799316, |
|
"rewards/rejected": -1.8266645669937134, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 165.46679700251414, |
|
"learning_rate": 9.561607795526006e-10, |
|
"logits/chosen": -4.49678373336792, |
|
"logits/rejected": -4.711674690246582, |
|
"logps/chosen": -246.95388793945312, |
|
"logps/rejected": -247.2928009033203, |
|
"loss": 0.2121, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.468100905418396, |
|
"rewards/margins": 3.451129913330078, |
|
"rewards/rejected": -1.9830286502838135, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 149.25359071163066, |
|
"learning_rate": 4.403065646083809e-10, |
|
"logits/chosen": -4.518364906311035, |
|
"logits/rejected": -4.680220603942871, |
|
"logps/chosen": -241.2300262451172, |
|
"logps/rejected": -255.6038818359375, |
|
"loss": 0.2103, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.5591168403625488, |
|
"rewards/margins": 3.434800386428833, |
|
"rewards/rejected": -1.8756835460662842, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 160.57572024314433, |
|
"learning_rate": 1.2106819172520434e-10, |
|
"logits/chosen": -4.618407726287842, |
|
"logits/rejected": -4.8883843421936035, |
|
"logps/chosen": -246.15853881835938, |
|
"logps/rejected": -243.1090087890625, |
|
"loss": 0.2034, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.5924150943756104, |
|
"rewards/margins": 3.8508572578430176, |
|
"rewards/rejected": -2.2584421634674072, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 235.40189038757265, |
|
"learning_rate": 1.0013655036916758e-12, |
|
"logits/chosen": -4.611303329467773, |
|
"logits/rejected": -4.857443809509277, |
|
"logps/chosen": -242.1800994873047, |
|
"logps/rejected": -241.1270294189453, |
|
"loss": 0.199, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.7492481470108032, |
|
"rewards/margins": 3.8104407787323, |
|
"rewards/rejected": -2.0611929893493652, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 391, |
|
"total_flos": 0.0, |
|
"train_loss": 0.29024992444935965, |
|
"train_runtime": 6148.7126, |
|
"train_samples_per_second": 8.132, |
|
"train_steps_per_second": 0.064 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 391, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|