|
{ |
|
"best_metric": 0.47333332896232605, |
|
"best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.16-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100", |
|
"epoch": 0.684931506849315, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018, |
|
"loss": 0.4713, |
|
"step": 20, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -1423.408907312925, |
|
"train/logps/rejected": -1301.3419436416184, |
|
"train/rewards/chosen": -113.72140731292517, |
|
"train/rewards/margins": -11.091552724774871, |
|
"train/rewards/rejected": -102.6298545881503 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00015142857142857143, |
|
"loss": 0.45, |
|
"step": 40, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -2404.9383680555557, |
|
"train/logps/rejected": -2267.5051491477275, |
|
"train/rewards/chosen": -211.85392252604166, |
|
"train/rewards/margins": -15.253491950757564, |
|
"train/rewards/rejected": -196.6004305752841 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval/kl": 0.0, |
|
"eval/logps/chosen": -2179.756602112676, |
|
"eval/logps/rejected": -1974.1839398734178, |
|
"eval/rewards/chosen": -189.60969135123239, |
|
"eval/rewards/margins": -18.62818423097923, |
|
"eval/rewards/rejected": -170.98150712025316, |
|
"eval_loss": 0.47333332896232605, |
|
"eval_runtime": 140.7497, |
|
"eval_samples_per_second": 2.131, |
|
"eval_steps_per_second": 0.533, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00012285714285714287, |
|
"loss": 0.4469, |
|
"step": 60, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -2463.7814685314684, |
|
"train/logps/rejected": -2104.4410310734465, |
|
"train/rewards/chosen": -216.41613854895104, |
|
"train/rewards/margins": -33.82629479895104, |
|
"train/rewards/rejected": -182.58984375 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.428571428571429e-05, |
|
"loss": 0.4281, |
|
"step": 80, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -2216.6425638686133, |
|
"train/logps/rejected": -2146.411031420765, |
|
"train/rewards/chosen": -193.77794251824818, |
|
"train/rewards/margins": -6.814528925351993, |
|
"train/rewards/rejected": -186.9634135928962 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.571428571428571e-05, |
|
"loss": 0.5031, |
|
"step": 100, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -2453.4375, |
|
"train/logps/rejected": -2285.3565251572327, |
|
"train/rewards/chosen": -214.367794060559, |
|
"train/rewards/margins": -16.124685275181633, |
|
"train/rewards/rejected": -198.24310878537736 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval/kl": 0.0, |
|
"eval/logps/chosen": -2181.0286091549297, |
|
"eval/logps/rejected": -1975.4440268987341, |
|
"eval/rewards/chosen": -189.73686454665494, |
|
"eval/rewards/margins": -18.629343779249865, |
|
"eval/rewards/rejected": -171.10752076740508, |
|
"eval_loss": 0.47333332896232605, |
|
"eval_runtime": 140.6959, |
|
"eval_samples_per_second": 2.132, |
|
"eval_steps_per_second": 0.533, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 145, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|