ShenaoZhang's picture
Model save
56aaa64 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9952153110047847,
"eval_steps": 500,
"global_step": 52,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 8.333333333333333e-08,
"logits/chosen": -2.8187968730926514,
"logits/rejected": -2.8237557411193848,
"logps/chosen": -257.11737060546875,
"logps/pi_response": -65.15000915527344,
"logps/ref_response": -65.15000915527344,
"logps/rejected": -166.6063995361328,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.19,
"learning_rate": 4.907293218369498e-07,
"logits/chosen": -2.834031105041504,
"logits/rejected": -2.794311046600342,
"logps/chosen": -247.26991271972656,
"logps/pi_response": -71.50384521484375,
"logps/ref_response": -71.02489471435547,
"logps/rejected": -163.82879638671875,
"loss": 0.6885,
"rewards/accuracies": 0.5972222089767456,
"rewards/chosen": 0.007015190087258816,
"rewards/margins": 0.0075297304429113865,
"rewards/rejected": -0.0005145410541445017,
"step": 10
},
{
"epoch": 0.38,
"learning_rate": 3.941700805287168e-07,
"logits/chosen": -2.7059969902038574,
"logits/rejected": -2.682796001434326,
"logps/chosen": -233.1520538330078,
"logps/pi_response": -87.71420288085938,
"logps/ref_response": -74.39585876464844,
"logps/rejected": -170.5820770263672,
"loss": 0.6558,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": 0.023435983806848526,
"rewards/margins": 0.09170379489660263,
"rewards/rejected": -0.0682678073644638,
"step": 20
},
{
"epoch": 0.57,
"learning_rate": 2.3293939665883228e-07,
"logits/chosen": -2.607896327972412,
"logits/rejected": -2.5777342319488525,
"logps/chosen": -247.3583526611328,
"logps/pi_response": -125.3393325805664,
"logps/ref_response": -79.46585845947266,
"logps/rejected": -196.52218627929688,
"loss": 0.6311,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.16959059238433838,
"rewards/margins": 0.12048976123332977,
"rewards/rejected": -0.29008033871650696,
"step": 30
},
{
"epoch": 0.77,
"learning_rate": 7.936171419533652e-08,
"logits/chosen": -2.627960443496704,
"logits/rejected": -2.5933032035827637,
"logps/chosen": -272.2423400878906,
"logps/pi_response": -141.93287658691406,
"logps/ref_response": -77.87845611572266,
"logps/rejected": -232.07913208007812,
"loss": 0.5952,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.19411739706993103,
"rewards/margins": 0.2757735848426819,
"rewards/rejected": -0.4698909819126129,
"step": 40
},
{
"epoch": 0.96,
"learning_rate": 2.328513490917311e-09,
"logits/chosen": -2.6459906101226807,
"logits/rejected": -2.6088039875030518,
"logps/chosen": -273.8865051269531,
"logps/pi_response": -136.73643493652344,
"logps/ref_response": -74.40654754638672,
"logps/rejected": -227.901123046875,
"loss": 0.5864,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": -0.17903554439544678,
"rewards/margins": 0.36069053411483765,
"rewards/rejected": -0.5397260785102844,
"step": 50
},
{
"epoch": 1.0,
"step": 52,
"total_flos": 0.0,
"train_loss": 0.6293867803536929,
"train_runtime": 3116.1807,
"train_samples_per_second": 4.279,
"train_steps_per_second": 0.017
}
],
"logging_steps": 10,
"max_steps": 52,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}