|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 200, |
|
"global_step": 125, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008, |
|
"grad_norm": 195.76847577214488, |
|
"learning_rate": 3.846153846153846e-08, |
|
"logits/generated": -2.827054262161255, |
|
"logits/real": -2.5581681728363037, |
|
"logps/generated": -192.72244262695312, |
|
"logps/real": -149.0590057373047, |
|
"loss": 0.8742, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 218.63661928274885, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/generated": -2.7630491256713867, |
|
"logits/real": -2.6888210773468018, |
|
"logps/generated": -214.9255828857422, |
|
"logps/real": -163.09947204589844, |
|
"loss": 0.8317, |
|
"rewards/accuracies": 0.6111111044883728, |
|
"rewards/generated": -0.16190433502197266, |
|
"rewards/margins": 0.3583752512931824, |
|
"rewards/real": 0.19647091627120972, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 169.19013750338308, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/generated": -2.662771701812744, |
|
"logits/real": -2.601508140563965, |
|
"logps/generated": -220.6349334716797, |
|
"logps/real": -179.73020935058594, |
|
"loss": 0.6427, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/generated": -1.4358702898025513, |
|
"rewards/margins": 1.1641441583633423, |
|
"rewards/real": -0.271726131439209, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 230.2618516037045, |
|
"learning_rate": 4.2410714285714283e-07, |
|
"logits/generated": -2.8341822624206543, |
|
"logits/real": -2.751523733139038, |
|
"logps/generated": -218.51016235351562, |
|
"logps/real": -174.5108184814453, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/generated": -0.7633816003799438, |
|
"rewards/margins": 1.2909877300262451, |
|
"rewards/real": 0.5276059508323669, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 102.92598060702612, |
|
"learning_rate": 3.794642857142857e-07, |
|
"logits/generated": -2.670307159423828, |
|
"logits/real": -2.709695816040039, |
|
"logps/generated": -225.74386596679688, |
|
"logps/real": -180.8232879638672, |
|
"loss": 0.5411, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/generated": -1.9952844381332397, |
|
"rewards/margins": 2.492525100708008, |
|
"rewards/real": 0.4972406327724457, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 133.1268603857592, |
|
"learning_rate": 3.348214285714285e-07, |
|
"logits/generated": -2.6185507774353027, |
|
"logits/real": -2.7017085552215576, |
|
"logps/generated": -229.56289672851562, |
|
"logps/real": -180.7897491455078, |
|
"loss": 0.5403, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/generated": -1.7043462991714478, |
|
"rewards/margins": 2.115872859954834, |
|
"rewards/real": 0.4115265905857086, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 121.16388402268068, |
|
"learning_rate": 2.9017857142857143e-07, |
|
"logits/generated": -2.3595099449157715, |
|
"logits/real": -2.389936923980713, |
|
"logps/generated": -231.27981567382812, |
|
"logps/real": -185.60150146484375, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/generated": -1.7304115295410156, |
|
"rewards/margins": 1.988268494606018, |
|
"rewards/real": 0.25785699486732483, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 128.2291938662715, |
|
"learning_rate": 2.4553571428571425e-07, |
|
"logits/generated": -2.2344260215759277, |
|
"logits/real": -2.396183967590332, |
|
"logps/generated": -219.56826782226562, |
|
"logps/real": -175.6507568359375, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/generated": -1.3278710842132568, |
|
"rewards/margins": 2.380357027053833, |
|
"rewards/real": 1.0524860620498657, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 111.69308541433698, |
|
"learning_rate": 2.0089285714285714e-07, |
|
"logits/generated": -2.416640043258667, |
|
"logits/real": -2.4667611122131348, |
|
"logps/generated": -239.8185577392578, |
|
"logps/real": -192.8653106689453, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/generated": -2.3186020851135254, |
|
"rewards/margins": 2.0476338863372803, |
|
"rewards/real": -0.2709681987762451, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 82.56461355690034, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/generated": -2.270512580871582, |
|
"logits/real": -2.530308961868286, |
|
"logps/generated": -230.5257568359375, |
|
"logps/real": -196.55819702148438, |
|
"loss": 0.4677, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/generated": -1.9373829364776611, |
|
"rewards/margins": 1.8367639780044556, |
|
"rewards/real": -0.10061860084533691, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 107.57601154514983, |
|
"learning_rate": 1.1160714285714285e-07, |
|
"logits/generated": -2.257317304611206, |
|
"logits/real": -2.4022774696350098, |
|
"logps/generated": -231.86752319335938, |
|
"logps/real": -181.0973358154297, |
|
"loss": 0.5317, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/generated": -2.2553870677948, |
|
"rewards/margins": 2.6081173419952393, |
|
"rewards/real": 0.35273003578186035, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 71.26563004679359, |
|
"learning_rate": 6.696428571428571e-08, |
|
"logits/generated": -2.0771467685699463, |
|
"logits/real": -2.54282546043396, |
|
"logps/generated": -235.5596466064453, |
|
"logps/real": -190.90234375, |
|
"loss": 0.3627, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -3.041977643966675, |
|
"rewards/margins": 3.7202351093292236, |
|
"rewards/real": 0.6782575249671936, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 162.74800080175157, |
|
"learning_rate": 2.2321428571428572e-08, |
|
"logits/generated": -2.3272647857666016, |
|
"logits/real": -2.594679117202759, |
|
"logps/generated": -231.59286499023438, |
|
"logps/real": -206.37869262695312, |
|
"loss": 0.4876, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/generated": -2.5086724758148193, |
|
"rewards/margins": 2.344866991043091, |
|
"rewards/real": -0.16380572319030762, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 125, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5433647909164429, |
|
"train_runtime": 756.8858, |
|
"train_samples_per_second": 2.642, |
|
"train_steps_per_second": 0.165 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 125, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|