|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9983361064891847, |
|
"eval_steps": 500, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1151.6129333090275, |
|
"learning_rate": 3.333333333333333e-10, |
|
"logits/chosen": -4.106247425079346, |
|
"logits/rejected": -4.200438499450684, |
|
"logps/chosen": -382.81439208984375, |
|
"logps/rejected": -357.65960693359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1175.0279383615512, |
|
"learning_rate": 3.3333333333333334e-09, |
|
"logits/chosen": -4.217203617095947, |
|
"logits/rejected": -4.32081413269043, |
|
"logps/chosen": -334.6714172363281, |
|
"logps/rejected": -313.4322509765625, |
|
"loss": 0.7317, |
|
"rewards/accuracies": 0.3923611044883728, |
|
"rewards/chosen": -0.044815655797719955, |
|
"rewards/margins": -0.03612741455435753, |
|
"rewards/rejected": -0.008688241243362427, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1253.9538230101048, |
|
"learning_rate": 4.983095894354858e-09, |
|
"logits/chosen": -4.266427040100098, |
|
"logits/rejected": -4.4187798500061035, |
|
"logps/chosen": -313.9143371582031, |
|
"logps/rejected": -288.782470703125, |
|
"loss": 0.732, |
|
"rewards/accuracies": 0.4781250059604645, |
|
"rewards/chosen": -0.000932177877984941, |
|
"rewards/margins": -0.012739461846649647, |
|
"rewards/rejected": 0.011807283386588097, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1147.308654200848, |
|
"learning_rate": 4.849231551964771e-09, |
|
"logits/chosen": -4.301184177398682, |
|
"logits/rejected": -4.36545991897583, |
|
"logps/chosen": -308.3140563964844, |
|
"logps/rejected": -285.6829833984375, |
|
"loss": 0.7352, |
|
"rewards/accuracies": 0.46562498807907104, |
|
"rewards/chosen": -0.018803134560585022, |
|
"rewards/margins": 0.008298242464661598, |
|
"rewards/rejected": -0.02710137702524662, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1152.01480268075, |
|
"learning_rate": 4.588719528532341e-09, |
|
"logits/chosen": -4.186487674713135, |
|
"logits/rejected": -4.270204544067383, |
|
"logps/chosen": -332.467041015625, |
|
"logps/rejected": -307.530517578125, |
|
"loss": 0.722, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": 0.012754167430102825, |
|
"rewards/margins": 0.04999501258134842, |
|
"rewards/rejected": -0.03724084421992302, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1161.340877574017, |
|
"learning_rate": 4.215604094671834e-09, |
|
"logits/chosen": -4.197475910186768, |
|
"logits/rejected": -4.366654396057129, |
|
"logps/chosen": -333.69940185546875, |
|
"logps/rejected": -309.05511474609375, |
|
"loss": 0.7263, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.022939234972000122, |
|
"rewards/margins": -0.01653190515935421, |
|
"rewards/rejected": 0.03947114199399948, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1135.5326964714952, |
|
"learning_rate": 3.7500000000000005e-09, |
|
"logits/chosen": -4.186155796051025, |
|
"logits/rejected": -4.269167900085449, |
|
"logps/chosen": -323.9843444824219, |
|
"logps/rejected": -308.364990234375, |
|
"loss": 0.7133, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": 0.009113344363868237, |
|
"rewards/margins": -0.005971288774162531, |
|
"rewards/rejected": 0.015084633603692055, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1170.598014061488, |
|
"learning_rate": 3.2170080817777257e-09, |
|
"logits/chosen": -4.14601469039917, |
|
"logits/rejected": -4.301178932189941, |
|
"logps/chosen": -344.8753967285156, |
|
"logps/rejected": -314.6096496582031, |
|
"loss": 0.7182, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.08767497539520264, |
|
"rewards/margins": 0.08200599253177643, |
|
"rewards/rejected": 0.005668987520039082, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1088.822875661876, |
|
"learning_rate": 2.6453620722761897e-09, |
|
"logits/chosen": -4.220850944519043, |
|
"logits/rejected": -4.3867106437683105, |
|
"logps/chosen": -334.7141418457031, |
|
"logps/rejected": -305.6958923339844, |
|
"loss": 0.7175, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.06255482137203217, |
|
"rewards/margins": 0.04721928387880325, |
|
"rewards/rejected": 0.015335534699261189, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1120.1472438016, |
|
"learning_rate": 2.0658795558326744e-09, |
|
"logits/chosen": -4.226521968841553, |
|
"logits/rejected": -4.347161293029785, |
|
"logps/chosen": -327.5687561035156, |
|
"logps/rejected": -308.19744873046875, |
|
"loss": 0.7128, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.03935312479734421, |
|
"rewards/margins": 0.012340927496552467, |
|
"rewards/rejected": 0.02701219543814659, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1147.49194388717, |
|
"learning_rate": 1.5098005849021078e-09, |
|
"logits/chosen": -4.208071708679199, |
|
"logits/rejected": -4.417771339416504, |
|
"logps/chosen": -320.9470520019531, |
|
"logps/rejected": -290.018310546875, |
|
"loss": 0.712, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": 0.060337893664836884, |
|
"rewards/margins": 0.03545590117573738, |
|
"rewards/rejected": 0.0248819962143898, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1131.801124941311, |
|
"learning_rate": 1.0071035207430352e-09, |
|
"logits/chosen": -4.222798824310303, |
|
"logits/rejected": -4.275721549987793, |
|
"logps/chosen": -315.00030517578125, |
|
"logps/rejected": -303.7385559082031, |
|
"loss": 0.7022, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.08098876476287842, |
|
"rewards/margins": 0.037886131554841995, |
|
"rewards/rejected": 0.04310264065861702, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1201.8232728734756, |
|
"learning_rate": 5.848888922025553e-10, |
|
"logits/chosen": -4.147335052490234, |
|
"logits/rejected": -4.3030242919921875, |
|
"logps/chosen": -339.2781677246094, |
|
"logps/rejected": -313.5749206542969, |
|
"loss": 0.7129, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.08869560062885284, |
|
"rewards/margins": 0.048744406551122665, |
|
"rewards/rejected": 0.03995119035243988, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1182.4206440855232, |
|
"learning_rate": 2.659183991914696e-10, |
|
"logits/chosen": -4.173482418060303, |
|
"logits/rejected": -4.367284297943115, |
|
"logps/chosen": -324.4493713378906, |
|
"logps/rejected": -305.2012634277344, |
|
"loss": 0.7, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.08546491712331772, |
|
"rewards/margins": 0.01908993348479271, |
|
"rewards/rejected": 0.06637498736381531, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1158.9214495930962, |
|
"learning_rate": 6.738782355044049e-11, |
|
"logits/chosen": -4.290203094482422, |
|
"logits/rejected": -4.35637092590332, |
|
"logps/chosen": -308.0101318359375, |
|
"logps/rejected": -297.37701416015625, |
|
"loss": 0.7082, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": 0.12010886520147324, |
|
"rewards/margins": 0.05074785277247429, |
|
"rewards/rejected": 0.06936100870370865, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1208.2615961890754, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -4.257304668426514, |
|
"logits/rejected": -4.3310017585754395, |
|
"logps/chosen": -312.7613830566406, |
|
"logps/rejected": -300.3546447753906, |
|
"loss": 0.7025, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.10821112245321274, |
|
"rewards/margins": 0.047078292816877365, |
|
"rewards/rejected": 0.06113281846046448, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 150, |
|
"total_flos": 0.0, |
|
"train_loss": 0.7162073644002279, |
|
"train_runtime": 4512.4874, |
|
"train_samples_per_second": 8.52, |
|
"train_steps_per_second": 0.033 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|