|
{ |
|
"best_metric": 0.47333332896232605, |
|
"best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.16-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-08_ppid_9/checkpoint-100", |
|
"epoch": 0.684931506849315, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018, |
|
"loss": 0.4847, |
|
"step": 20, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -1869.3137335526317, |
|
"train/logps/rejected": -1463.0350632440477, |
|
"train/rewards/chosen": -157.77652138157896, |
|
"train/rewards/margins": -39.35788996416825, |
|
"train/rewards/rejected": -118.41863141741071 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00015142857142857143, |
|
"loss": 0.4938, |
|
"step": 40, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -2339.678006329114, |
|
"train/logps/rejected": -2094.7118055555557, |
|
"train/rewards/chosen": -204.7826839398734, |
|
"train/rewards/margins": -23.895109190645, |
|
"train/rewards/rejected": -180.8875747492284 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval/kl": 0.0, |
|
"eval/logps/chosen": -2160.559198943662, |
|
"eval/logps/rejected": -1937.6455696202531, |
|
"eval/rewards/chosen": -187.69000330105635, |
|
"eval/rewards/margins": -20.362446932385467, |
|
"eval/rewards/rejected": -167.32755636867088, |
|
"eval_loss": 0.47333332896232605, |
|
"eval_runtime": 141.6142, |
|
"eval_samples_per_second": 2.118, |
|
"eval_steps_per_second": 0.53, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00012285714285714287, |
|
"loss": 0.5, |
|
"step": 60, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -2174.43828125, |
|
"train/logps/rejected": -2130.2125, |
|
"train/rewards/chosen": -189.318359375, |
|
"train/rewards/margins": -4.711669921875, |
|
"train/rewards/rejected": -184.606689453125 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.428571428571429e-05, |
|
"loss": 0.4562, |
|
"step": 80, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -2384.5894691780823, |
|
"train/logps/rejected": -1893.6566091954023, |
|
"train/rewards/chosen": -209.14641748715752, |
|
"train/rewards/margins": -46.95111621201383, |
|
"train/rewards/rejected": -162.1953012751437 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.571428571428571e-05, |
|
"loss": 0.4562, |
|
"step": 100, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -2356.8424657534247, |
|
"train/logps/rejected": -2293.712643678161, |
|
"train/rewards/chosen": -207.19579409246575, |
|
"train/rewards/margins": -8.242444595339322, |
|
"train/rewards/rejected": -198.95334949712642 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval/kl": 0.0, |
|
"eval/logps/chosen": -2160.684639084507, |
|
"eval/logps/rejected": -1938.0195806962026, |
|
"eval/rewards/chosen": -187.70257482394365, |
|
"eval/rewards/margins": -20.337624764608194, |
|
"eval/rewards/rejected": -167.36495005933546, |
|
"eval_loss": 0.47333332896232605, |
|
"eval_runtime": 141.5959, |
|
"eval_samples_per_second": 2.119, |
|
"eval_steps_per_second": 0.53, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 145, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|