|
{ |
|
"best_metric": 0.35275527834892273, |
|
"best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.18-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100", |
|
"epoch": 0.684931506849315, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 6.346695899963379, |
|
"learning_rate": 0.00018142857142857142, |
|
"loss": 0.467, |
|
"step": 20, |
|
"train/kl": 6.117425918579102, |
|
"train/logps/chosen": -259.1842447916667, |
|
"train/logps/rejected": -290.1948988970588, |
|
"train/rewards/chosen": 0.930299072265625, |
|
"train/rewards/margins": 0.5634524266860065, |
|
"train/rewards/rejected": 0.36684664557961855 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 4.604153633117676, |
|
"learning_rate": 0.00015285714285714287, |
|
"loss": 0.4224, |
|
"step": 40, |
|
"train/kl": 6.080809116363525, |
|
"train/logps/chosen": -274.32459677419354, |
|
"train/logps/rejected": -291.6558948863636, |
|
"train/rewards/chosen": 0.9791939027847782, |
|
"train/rewards/margins": 1.6478286295692244, |
|
"train/rewards/rejected": -0.668634726784446 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval/kl": 7.511639595031738, |
|
"eval/logps/chosen": -263.8732394366197, |
|
"eval/logps/rejected": -268.6064082278481, |
|
"eval/rewards/chosen": 1.9797810299295775, |
|
"eval/rewards/margins": 2.403955568071142, |
|
"eval/rewards/rejected": -0.42417453814156447, |
|
"eval_loss": 0.3916032016277313, |
|
"eval_runtime": 141.678, |
|
"eval_samples_per_second": 2.117, |
|
"eval_steps_per_second": 0.529, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 5.5113444328308105, |
|
"learning_rate": 0.00012428571428571428, |
|
"loss": 0.3832, |
|
"step": 60, |
|
"train/kl": 16.34114646911621, |
|
"train/logps/chosen": -240.63917267628204, |
|
"train/logps/rejected": -277.463486089939, |
|
"train/rewards/chosen": 3.0404166197165465, |
|
"train/rewards/margins": 2.3469540618075815, |
|
"train/rewards/rejected": 0.6934625579089653 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.7619431018829346, |
|
"learning_rate": 9.571428571428573e-05, |
|
"loss": 0.3233, |
|
"step": 80, |
|
"train/kl": 0.7871202826499939, |
|
"train/logps/chosen": -294.5624213506711, |
|
"train/logps/rejected": -336.1468612938597, |
|
"train/rewards/chosen": 0.5479572987396445, |
|
"train/rewards/margins": 5.502926202933724, |
|
"train/rewards/rejected": -4.954968904194079 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 4.996425151824951, |
|
"learning_rate": 6.714285714285714e-05, |
|
"loss": 0.3749, |
|
"step": 100, |
|
"train/kl": 6.590612888336182, |
|
"train/logps/chosen": -265.2372325922819, |
|
"train/logps/rejected": -300.5130665204678, |
|
"train/rewards/chosen": 2.635832920970533, |
|
"train/rewards/margins": 3.7929440163766914, |
|
"train/rewards/rejected": -1.1571110954061585 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval/kl": 2.193509817123413, |
|
"eval/logps/chosen": -268.1729478433099, |
|
"eval/logps/rejected": -291.57960838607596, |
|
"eval/rewards/chosen": 1.5498130690883583, |
|
"eval/rewards/margins": 4.271308299075008, |
|
"eval/rewards/rejected": -2.7214952299866497, |
|
"eval_loss": 0.35275527834892273, |
|
"eval_runtime": 141.6617, |
|
"eval_samples_per_second": 2.118, |
|
"eval_steps_per_second": 0.529, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 145, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|