|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984301412872841, |
|
"eval_steps": 500, |
|
"global_step": 159, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006279434850863423, |
|
"grad_norm": 16.698454749053152, |
|
"learning_rate": 1.875e-08, |
|
"logits/chosen": 0.13163629174232483, |
|
"logits/rejected": 0.7037353515625, |
|
"logps/chosen": -296.6709289550781, |
|
"logps/pi_response": -123.40753173828125, |
|
"logps/ref_response": -123.40753173828125, |
|
"logps/rejected": -325.5771484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 24.513917430946538, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": 0.6406950354576111, |
|
"logits/rejected": 0.8759365081787109, |
|
"logps/chosen": -260.0070495605469, |
|
"logps/pi_response": -114.28534698486328, |
|
"logps/ref_response": -114.47286224365234, |
|
"logps/rejected": -385.5276184082031, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.4097222089767456, |
|
"rewards/chosen": 0.00021976388234179467, |
|
"rewards/margins": -2.7502783268573694e-05, |
|
"rewards/rejected": 0.000247266492806375, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 20.98441253721937, |
|
"learning_rate": 2.9942119880575817e-07, |
|
"logits/chosen": 0.5097376704216003, |
|
"logits/rejected": 0.8540347814559937, |
|
"logps/chosen": -268.75433349609375, |
|
"logps/pi_response": -118.2417221069336, |
|
"logps/ref_response": -118.39286041259766, |
|
"logps/rejected": -400.633544921875, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.03052676096558571, |
|
"rewards/margins": 0.040752165019512177, |
|
"rewards/rejected": -0.07127892971038818, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 13.999591521451507, |
|
"learning_rate": 2.929608750821129e-07, |
|
"logits/chosen": 0.5199416875839233, |
|
"logits/rejected": 0.992133617401123, |
|
"logps/chosen": -295.43292236328125, |
|
"logps/pi_response": -119.1610336303711, |
|
"logps/ref_response": -118.39522552490234, |
|
"logps/rejected": -421.61041259765625, |
|
"loss": 0.6174, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.17318478226661682, |
|
"rewards/margins": 0.18207214772701263, |
|
"rewards/rejected": -0.35525694489479065, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 14.705995187750815, |
|
"learning_rate": 2.7962832564252725e-07, |
|
"logits/chosen": 0.5350409746170044, |
|
"logits/rejected": 0.9762212634086609, |
|
"logps/chosen": -292.2400207519531, |
|
"logps/pi_response": -122.87149810791016, |
|
"logps/ref_response": -120.0985336303711, |
|
"logps/rejected": -484.0975646972656, |
|
"loss": 0.5739, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.30772843956947327, |
|
"rewards/margins": 0.45096302032470703, |
|
"rewards/rejected": -0.7586914300918579, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 20.925472606748368, |
|
"learning_rate": 2.6006445513357056e-07, |
|
"logits/chosen": 0.6897233724594116, |
|
"logits/rejected": 1.0123343467712402, |
|
"logps/chosen": -320.56976318359375, |
|
"logps/pi_response": -124.61143493652344, |
|
"logps/ref_response": -115.71650695800781, |
|
"logps/rejected": -523.7175903320312, |
|
"loss": 0.5629, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6995627284049988, |
|
"rewards/margins": 0.6693333387374878, |
|
"rewards/rejected": -1.3688960075378418, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 13.008934683020064, |
|
"learning_rate": 2.3520971200967334e-07, |
|
"logits/chosen": 0.6137208938598633, |
|
"logits/rejected": 1.0412781238555908, |
|
"logps/chosen": -332.25738525390625, |
|
"logps/pi_response": -126.09577941894531, |
|
"logps/ref_response": -118.1528549194336, |
|
"logps/rejected": -495.4088439941406, |
|
"loss": 0.555, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5905637145042419, |
|
"rewards/margins": 0.6077089309692383, |
|
"rewards/rejected": -1.198272705078125, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 12.777487677582881, |
|
"learning_rate": 2.0625888054143427e-07, |
|
"logits/chosen": 0.593045175075531, |
|
"logits/rejected": 0.9839151501655579, |
|
"logps/chosen": -273.0267333984375, |
|
"logps/pi_response": -126.1861801147461, |
|
"logps/ref_response": -120.62638854980469, |
|
"logps/rejected": -531.7461547851562, |
|
"loss": 0.5431, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.3775605261325836, |
|
"rewards/margins": 0.8122557401657104, |
|
"rewards/rejected": -1.1898162364959717, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 10.709251992827037, |
|
"learning_rate": 1.7460364672965327e-07, |
|
"logits/chosen": 0.6686810255050659, |
|
"logits/rejected": 1.0736128091812134, |
|
"logps/chosen": -280.6498107910156, |
|
"logps/pi_response": -112.1661376953125, |
|
"logps/ref_response": -106.67897033691406, |
|
"logps/rejected": -489.26556396484375, |
|
"loss": 0.5321, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4469337463378906, |
|
"rewards/margins": 0.6373990774154663, |
|
"rewards/rejected": -1.084332823753357, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 11.68215452300686, |
|
"learning_rate": 1.4176569902035086e-07, |
|
"logits/chosen": 0.6378764510154724, |
|
"logits/rejected": 1.0353127717971802, |
|
"logps/chosen": -339.03973388671875, |
|
"logps/pi_response": -119.71498107910156, |
|
"logps/ref_response": -111.9307861328125, |
|
"logps/rejected": -513.7333984375, |
|
"loss": 0.5119, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5252664685249329, |
|
"rewards/margins": 0.7039340734481812, |
|
"rewards/rejected": -1.2292006015777588, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 11.96502204484806, |
|
"learning_rate": 1.0932357971453743e-07, |
|
"logits/chosen": 0.7395003437995911, |
|
"logits/rejected": 1.0328724384307861, |
|
"logps/chosen": -301.8004455566406, |
|
"logps/pi_response": -116.29144287109375, |
|
"logps/ref_response": -108.0909423828125, |
|
"logps/rejected": -524.360107421875, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.5083015561103821, |
|
"rewards/margins": 0.7502217888832092, |
|
"rewards/rejected": -1.2585232257843018, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 11.40631298798362, |
|
"learning_rate": 7.883680337481599e-08, |
|
"logits/chosen": 0.7460795640945435, |
|
"logits/rejected": 0.9838323593139648, |
|
"logps/chosen": -305.3519592285156, |
|
"logps/pi_response": -125.8452377319336, |
|
"logps/ref_response": -117.07008361816406, |
|
"logps/rejected": -504.4964294433594, |
|
"loss": 0.5349, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4922094941139221, |
|
"rewards/margins": 0.6827653646469116, |
|
"rewards/rejected": -1.174974799156189, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 10.671563097729658, |
|
"learning_rate": 5.177088990820725e-08, |
|
"logits/chosen": 0.5097354650497437, |
|
"logits/rejected": 0.8302543759346008, |
|
"logps/chosen": -327.8287353515625, |
|
"logps/pi_response": -134.05953979492188, |
|
"logps/ref_response": -125.61170959472656, |
|
"logps/rejected": -554.1288452148438, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5032998323440552, |
|
"rewards/margins": 0.7790510058403015, |
|
"rewards/rejected": -1.282350778579712, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 13.393410138993277, |
|
"learning_rate": 2.942691603548416e-08, |
|
"logits/chosen": 0.5484687089920044, |
|
"logits/rejected": 1.0104806423187256, |
|
"logps/chosen": -329.3583984375, |
|
"logps/pi_response": -133.8960723876953, |
|
"logps/ref_response": -126.83935546875, |
|
"logps/rejected": -530.1004028320312, |
|
"loss": 0.5048, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4904448986053467, |
|
"rewards/margins": 0.7761750221252441, |
|
"rewards/rejected": -1.2666199207305908, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 12.267577779535525, |
|
"learning_rate": 1.2878971655412513e-08, |
|
"logits/chosen": 0.5720739364624023, |
|
"logits/rejected": 0.9325042963027954, |
|
"logps/chosen": -310.2190856933594, |
|
"logps/pi_response": -136.25198364257812, |
|
"logps/ref_response": -126.86582946777344, |
|
"logps/rejected": -564.576171875, |
|
"loss": 0.5164, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.481649786233902, |
|
"rewards/margins": 0.8537376523017883, |
|
"rewards/rejected": -1.3353874683380127, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 11.331362077198552, |
|
"learning_rate": 2.922527618666465e-09, |
|
"logits/chosen": 0.5811373591423035, |
|
"logits/rejected": 0.9567831158638, |
|
"logps/chosen": -304.1393737792969, |
|
"logps/pi_response": -123.76485443115234, |
|
"logps/ref_response": -114.90129089355469, |
|
"logps/rejected": -522.5949096679688, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.5134440064430237, |
|
"rewards/margins": 0.7166833281517029, |
|
"rewards/rejected": -1.2301273345947266, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9984301412872841, |
|
"step": 159, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5592624436384477, |
|
"train_runtime": 4324.4895, |
|
"train_samples_per_second": 4.712, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|