ShenaoZ's picture
Model save
16823d9 verified
raw
history blame contribute delete
No virus
4.09 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9874476987447699,
"eval_steps": 500,
"global_step": 59,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 1.6666666666666664e-08,
"logits/chosen": -2.592315673828125,
"logits/rejected": -2.5873923301696777,
"logps/chosen": -239.20050048828125,
"logps/pi_response": -152.18771362304688,
"logps/ref_response": -152.18771362304688,
"logps/rejected": -436.1141357421875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.17,
"learning_rate": 9.860114570402053e-08,
"logits/chosen": -2.6213459968566895,
"logits/rejected": -2.5811781883239746,
"logps/chosen": -248.51345825195312,
"logps/pi_response": -134.393310546875,
"logps/ref_response": -133.8142852783203,
"logps/rejected": -403.0088806152344,
"loss": 0.6892,
"rewards/accuracies": 0.5590277910232544,
"rewards/chosen": -0.009415436536073685,
"rewards/margins": 0.012970829382538795,
"rewards/rejected": -0.02238626405596733,
"step": 10
},
{
"epoch": 0.33,
"learning_rate": 8.374915007591053e-08,
"logits/chosen": -2.6401333808898926,
"logits/rejected": -2.572584867477417,
"logps/chosen": -278.6589660644531,
"logps/pi_response": -156.27084350585938,
"logps/ref_response": -148.03988647460938,
"logps/rejected": -419.31231689453125,
"loss": 0.6467,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.16653795540332794,
"rewards/margins": 0.11585883796215057,
"rewards/rejected": -0.2823967933654785,
"step": 20
},
{
"epoch": 0.5,
"learning_rate": 5.738232820012406e-08,
"logits/chosen": -2.5704445838928223,
"logits/rejected": -2.506669521331787,
"logps/chosen": -278.9919738769531,
"logps/pi_response": -148.90841674804688,
"logps/ref_response": -142.97976684570312,
"logps/rejected": -427.6094665527344,
"loss": 0.5973,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.2087964117527008,
"rewards/margins": 0.24212436378002167,
"rewards/rejected": -0.4509207606315613,
"step": 30
},
{
"epoch": 0.67,
"learning_rate": 2.8496739886173992e-08,
"logits/chosen": -2.5911762714385986,
"logits/rejected": -2.5108911991119385,
"logps/chosen": -294.02593994140625,
"logps/pi_response": -145.10809326171875,
"logps/ref_response": -145.1625213623047,
"logps/rejected": -495.380615234375,
"loss": 0.5482,
"rewards/accuracies": 0.778124988079071,
"rewards/chosen": -0.2709793448448181,
"rewards/margins": 0.4595873951911926,
"rewards/rejected": -0.7305666208267212,
"step": 40
},
{
"epoch": 0.84,
"learning_rate": 6.947819411632222e-09,
"logits/chosen": -2.584728240966797,
"logits/rejected": -2.518796443939209,
"logps/chosen": -303.9441833496094,
"logps/pi_response": -139.6694793701172,
"logps/ref_response": -138.4994354248047,
"logps/rejected": -476.5013732910156,
"loss": 0.572,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -0.36765211820602417,
"rewards/margins": 0.42311352491378784,
"rewards/rejected": -0.7907657027244568,
"step": 50
},
{
"epoch": 0.99,
"step": 59,
"total_flos": 0.0,
"train_loss": 0.603613182649774,
"train_runtime": 3504.5408,
"train_samples_per_second": 4.361,
"train_steps_per_second": 0.017
}
],
"logging_steps": 10,
"max_steps": 59,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}