Mistral-7B-v0.1-spin-2k-hhrlhf / trainer_state.json
AmberYifan's picture
Model save
b4d174e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.992,
"eval_steps": 200,
"global_step": 62,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016,
"grad_norm": 307.76555800630456,
"learning_rate": 7.142857142857142e-08,
"logits/generated": -2.8404788970947266,
"logits/real": -2.5973095893859863,
"logps/generated": -199.41073608398438,
"logps/real": -84.76593017578125,
"loss": 0.7771,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.16,
"grad_norm": 8.428438696613153,
"learning_rate": 4.727272727272727e-07,
"logits/generated": -2.9082934856414795,
"logits/real": -2.201085329055786,
"logps/generated": -260.89495849609375,
"logps/real": -89.60836791992188,
"loss": 0.2691,
"rewards/accuracies": 0.8888888955116272,
"rewards/generated": -3.8854570388793945,
"rewards/margins": 5.224937438964844,
"rewards/real": 1.3394801616668701,
"step": 10
},
{
"epoch": 0.32,
"grad_norm": 2.6808156850513947,
"learning_rate": 3.818181818181818e-07,
"logits/generated": -3.0079009532928467,
"logits/real": -2.238185167312622,
"logps/generated": -310.7685241699219,
"logps/real": -82.25010681152344,
"loss": 0.0707,
"rewards/accuracies": 1.0,
"rewards/generated": -8.675054550170898,
"rewards/margins": 11.541936874389648,
"rewards/real": 2.866882562637329,
"step": 20
},
{
"epoch": 0.48,
"grad_norm": 0.7630367545230997,
"learning_rate": 2.909090909090909e-07,
"logits/generated": -3.0362448692321777,
"logits/real": -2.1702117919921875,
"logps/generated": -325.0323181152344,
"logps/real": -72.29302978515625,
"loss": 0.0691,
"rewards/accuracies": 1.0,
"rewards/generated": -9.070144653320312,
"rewards/margins": 12.167816162109375,
"rewards/real": 3.0976719856262207,
"step": 30
},
{
"epoch": 0.64,
"grad_norm": 0.8045898247287383,
"learning_rate": 2e-07,
"logits/generated": -3.0557637214660645,
"logits/real": -2.27079176902771,
"logps/generated": -326.16363525390625,
"logps/real": -73.30236053466797,
"loss": 0.0736,
"rewards/accuracies": 1.0,
"rewards/generated": -10.190264701843262,
"rewards/margins": 13.376245498657227,
"rewards/real": 3.1859793663024902,
"step": 40
},
{
"epoch": 0.8,
"grad_norm": 0.9553834700952871,
"learning_rate": 1.0909090909090908e-07,
"logits/generated": -3.051811933517456,
"logits/real": -2.1524760723114014,
"logps/generated": -327.13031005859375,
"logps/real": -71.540771484375,
"loss": 0.0724,
"rewards/accuracies": 1.0,
"rewards/generated": -10.402329444885254,
"rewards/margins": 13.604934692382812,
"rewards/real": 3.2026054859161377,
"step": 50
},
{
"epoch": 0.96,
"grad_norm": 0.8111910021687768,
"learning_rate": 1.818181818181818e-08,
"logits/generated": -3.0647284984588623,
"logits/real": -2.1906068325042725,
"logps/generated": -329.8739318847656,
"logps/real": -82.74003601074219,
"loss": 0.0694,
"rewards/accuracies": 1.0,
"rewards/generated": -10.355340003967285,
"rewards/margins": 13.413922309875488,
"rewards/real": 3.0585832595825195,
"step": 60
},
{
"epoch": 0.992,
"step": 62,
"total_flos": 0.0,
"train_loss": 0.11182688801519332,
"train_runtime": 795.5053,
"train_samples_per_second": 2.513,
"train_steps_per_second": 0.078
}
],
"logging_steps": 10,
"max_steps": 62,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}