|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 81, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.271874539371896, |
|
"learning_rate": 5.555555555555555e-08, |
|
"logits/chosen": 0.23574040830135345, |
|
"logits/rejected": -0.9465489983558655, |
|
"logps/chosen": -461.99664306640625, |
|
"logps/rejected": -1076.3154296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.053815107503334, |
|
"learning_rate": 4.997620553954645e-07, |
|
"logits/chosen": 0.3158790171146393, |
|
"logits/rejected": -0.6447792649269104, |
|
"logps/chosen": -491.0052185058594, |
|
"logps/rejected": -1057.68212890625, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 0.0007979909423738718, |
|
"rewards/margins": 0.00010702761210268363, |
|
"rewards/rejected": 0.0006909631192684174, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.8468513788685024, |
|
"learning_rate": 4.717527082945554e-07, |
|
"logits/chosen": 0.3795907199382782, |
|
"logits/rejected": -0.6666702032089233, |
|
"logps/chosen": -426.83050537109375, |
|
"logps/rejected": -1043.7567138671875, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.00618498120456934, |
|
"rewards/margins": 0.005285660736262798, |
|
"rewards/rejected": 0.0008993210503831506, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.809120262255183, |
|
"learning_rate": 4.0219035725218013e-07, |
|
"logits/chosen": 0.4218289256095886, |
|
"logits/rejected": -0.7673497796058655, |
|
"logps/chosen": -476.8456115722656, |
|
"logps/rejected": -1008.1292114257812, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.019507069140672684, |
|
"rewards/margins": 0.0191163569688797, |
|
"rewards/rejected": 0.00039071092032827437, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 3.8969795803158, |
|
"learning_rate": 3.041099034845257e-07, |
|
"logits/chosen": 0.24538719654083252, |
|
"logits/rejected": -0.5925542712211609, |
|
"logps/chosen": -496.660400390625, |
|
"logps/rejected": -976.67041015625, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.027163496240973473, |
|
"rewards/margins": 0.030033668503165245, |
|
"rewards/rejected": -0.0028701708652079105, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 3.9669240189360293, |
|
"learning_rate": 1.9589009651547428e-07, |
|
"logits/chosen": 0.4538944661617279, |
|
"logits/rejected": -0.61766117811203, |
|
"logps/chosen": -444.9410095214844, |
|
"logps/rejected": -980.8577270507812, |
|
"loss": 0.6697, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.04584876075387001, |
|
"rewards/margins": 0.04866841062903404, |
|
"rewards/rejected": -0.002819649875164032, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 3.761586308179101, |
|
"learning_rate": 9.780964274781983e-08, |
|
"logits/chosen": 0.3308876156806946, |
|
"logits/rejected": -0.6796020269393921, |
|
"logps/chosen": -471.8916931152344, |
|
"logps/rejected": -997.9669799804688, |
|
"loss": 0.6645, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 0.05425606295466423, |
|
"rewards/margins": 0.06382595747709274, |
|
"rewards/rejected": -0.009569892659783363, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 3.8973053098411667, |
|
"learning_rate": 2.824729170544457e-08, |
|
"logits/chosen": 0.3237437307834625, |
|
"logits/rejected": -0.6007438898086548, |
|
"logps/chosen": -452.1813049316406, |
|
"logps/rejected": -974.1201171875, |
|
"loss": 0.6626, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.041471779346466064, |
|
"rewards/margins": 0.04470660537481308, |
|
"rewards/rejected": -0.003234823001548648, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 3.6208468629029484, |
|
"learning_rate": 2.3794460453555044e-10, |
|
"logits/chosen": 0.3552563190460205, |
|
"logits/rejected": -0.7167419195175171, |
|
"logps/chosen": -452.73162841796875, |
|
"logps/rejected": -1027.007568359375, |
|
"loss": 0.665, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 0.05553414672613144, |
|
"rewards/margins": 0.059998393058776855, |
|
"rewards/rejected": -0.00446424912661314, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 81, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6753969060050117, |
|
"train_runtime": 1121.4796, |
|
"train_samples_per_second": 4.608, |
|
"train_steps_per_second": 0.072 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 81, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|