{ "best_metric": 0.47333332896232605, "best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.16-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-08_ppid_9/checkpoint-100", "epoch": 0.684931506849315, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "grad_norm": 0.0, "learning_rate": 0.00018, "loss": 0.4847, "step": 20, "train/kl": 0.0, "train/logps/chosen": -1869.3137335526317, "train/logps/rejected": -1463.0350632440477, "train/rewards/chosen": -157.77652138157896, "train/rewards/margins": -39.35788996416825, "train/rewards/rejected": -118.41863141741071 }, { "epoch": 0.27, "grad_norm": 0.0, "learning_rate": 0.00015142857142857143, "loss": 0.4938, "step": 40, "train/kl": 0.0, "train/logps/chosen": -2339.678006329114, "train/logps/rejected": -2094.7118055555557, "train/rewards/chosen": -204.7826839398734, "train/rewards/margins": -23.895109190645, "train/rewards/rejected": -180.8875747492284 }, { "epoch": 0.34, "eval/kl": 0.0, "eval/logps/chosen": -2160.559198943662, "eval/logps/rejected": -1937.6455696202531, "eval/rewards/chosen": -187.69000330105635, "eval/rewards/margins": -20.362446932385467, "eval/rewards/rejected": -167.32755636867088, "eval_loss": 0.47333332896232605, "eval_runtime": 141.6142, "eval_samples_per_second": 2.118, "eval_steps_per_second": 0.53, "step": 50 }, { "epoch": 0.41, "grad_norm": 0.0, "learning_rate": 0.00012285714285714287, "loss": 0.5, "step": 60, "train/kl": 0.0, "train/logps/chosen": -2174.43828125, "train/logps/rejected": -2130.2125, "train/rewards/chosen": -189.318359375, "train/rewards/margins": -4.711669921875, "train/rewards/rejected": -184.606689453125 }, { "epoch": 0.55, "grad_norm": 0.0, "learning_rate": 9.428571428571429e-05, "loss": 0.4562, "step": 80, "train/kl": 0.0, "train/logps/chosen": -2384.5894691780823, "train/logps/rejected": -1893.6566091954023, "train/rewards/chosen": -209.14641748715752, "train/rewards/margins": -46.95111621201383, "train/rewards/rejected": -162.1953012751437 }, { "epoch": 0.68, "grad_norm": 0.0, "learning_rate": 6.571428571428571e-05, "loss": 0.4562, "step": 100, "train/kl": 0.0, "train/logps/chosen": -2356.8424657534247, "train/logps/rejected": -2293.712643678161, "train/rewards/chosen": -207.19579409246575, "train/rewards/margins": -8.242444595339322, "train/rewards/rejected": -198.95334949712642 }, { "epoch": 0.68, "eval/kl": 0.0, "eval/logps/chosen": -2160.684639084507, "eval/logps/rejected": -1938.0195806962026, "eval/rewards/chosen": -187.70257482394365, "eval/rewards/margins": -20.337624764608194, "eval/rewards/rejected": -167.36495005933546, "eval_loss": 0.47333332896232605, "eval_runtime": 141.5959, "eval_samples_per_second": 2.119, "eval_steps_per_second": 0.53, "step": 100 } ], "logging_steps": 20, "max_steps": 145, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }