{ "best_metric": 0.9905983805656433, "best_model_checkpoint": "th_cl_13epochs_lora_pos_neg/checkpoint-96", "epoch": 3.0, "eval_steps": 500, "global_step": 96, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3125, "grad_norm": 28.00575065612793, "learning_rate": 8.958333333333335e-05, "loss": 0.4959, "step": 10 }, { "epoch": 0.625, "grad_norm": 26.350143432617188, "learning_rate": 7.916666666666666e-05, "loss": 0.6218, "step": 20 }, { "epoch": 0.9375, "grad_norm": 31.33298110961914, "learning_rate": 6.875e-05, "loss": 0.7442, "step": 30 }, { "epoch": 1.0, "eval_accuracy": 0.5111111111111111, "eval_balanced_accuracy": 0.515, "eval_loss": 1.0410274267196655, "eval_runtime": 86.3882, "eval_samples_per_second": 0.521, "eval_steps_per_second": 0.069, "step": 32 }, { "epoch": 1.25, "grad_norm": 32.04572677612305, "learning_rate": 5.833333333333334e-05, "loss": 0.3933, "step": 40 }, { "epoch": 1.5625, "grad_norm": 27.043235778808594, "learning_rate": 4.791666666666667e-05, "loss": 0.3407, "step": 50 }, { "epoch": 1.875, "grad_norm": 20.865083694458008, "learning_rate": 3.7500000000000003e-05, "loss": 0.4596, "step": 60 }, { "epoch": 2.0, "eval_accuracy": 0.5111111111111111, "eval_balanced_accuracy": 0.5118577075098814, "eval_loss": 1.0013209581375122, "eval_runtime": 87.3168, "eval_samples_per_second": 0.515, "eval_steps_per_second": 0.069, "step": 64 }, { "epoch": 2.1875, "grad_norm": 7.867964744567871, "learning_rate": 2.7083333333333332e-05, "loss": 0.317, "step": 70 }, { "epoch": 2.5, "grad_norm": 21.569629669189453, "learning_rate": 1.6666666666666667e-05, "loss": 0.2957, "step": 80 }, { "epoch": 2.8125, "grad_norm": 14.822030067443848, "learning_rate": 6.25e-06, "loss": 0.2864, "step": 90 }, { "epoch": 3.0, "eval_accuracy": 0.5111111111111111, "eval_balanced_accuracy": 0.5118577075098814, "eval_loss": 0.9905983805656433, "eval_runtime": 86.9479, "eval_samples_per_second": 0.518, "eval_steps_per_second": 0.069, "step": 96 }, { "epoch": 3.0, "step": 96, "total_flos": 1.6176865332953088e+16, "train_loss": 0.4266343352695306, "train_runtime": 4808.092, "train_samples_per_second": 0.157, "train_steps_per_second": 0.02 } ], "logging_steps": 10, "max_steps": 96, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.6176865332953088e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }