| { |
| "best_metric": 1.495205044746399, |
| "best_model_checkpoint": "safety_instruct_detect/checkpoint-2556", |
| "epoch": 10.0, |
| "global_step": 12780, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.39, |
| "learning_rate": 2.2455e-05, |
| "loss": 1.5162, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 4.4955000000000006e-05, |
| "loss": 1.4983, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.4036969420356002, |
| "eval_loss": 1.4984349012374878, |
| "eval_runtime": 11.2442, |
| "eval_samples_per_second": 389.712, |
| "eval_steps_per_second": 48.736, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 4.309380305602717e-05, |
| "loss": 1.4868, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 4.1183786078098476e-05, |
| "loss": 1.4851, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 3.927376910016978e-05, |
| "loss": 1.4886, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.4036969420356002, |
| "eval_loss": 1.495205044746399, |
| "eval_runtime": 11.4186, |
| "eval_samples_per_second": 383.76, |
| "eval_steps_per_second": 47.992, |
| "step": 2556 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 3.736757215619695e-05, |
| "loss": 1.491, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 3.545755517826825e-05, |
| "loss": 1.4828, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.4036969420356002, |
| "eval_loss": 1.4983419179916382, |
| "eval_runtime": 11.4982, |
| "eval_samples_per_second": 381.103, |
| "eval_steps_per_second": 47.66, |
| "step": 3834 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 3.354753820033956e-05, |
| "loss": 1.4937, |
| "step": 4000 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 3.163752122241087e-05, |
| "loss": 1.4751, |
| "step": 4500 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 2.9731324278438033e-05, |
| "loss": 1.4853, |
| "step": 5000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.4036969420356002, |
| "eval_loss": 1.5341633558273315, |
| "eval_runtime": 11.4113, |
| "eval_samples_per_second": 384.006, |
| "eval_steps_per_second": 48.023, |
| "step": 5112 |
| }, |
| { |
| "epoch": 4.3, |
| "learning_rate": 2.782130730050934e-05, |
| "loss": 1.4528, |
| "step": 5500 |
| }, |
| { |
| "epoch": 4.69, |
| "learning_rate": 2.5911290322580646e-05, |
| "loss": 1.4446, |
| "step": 6000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.39639434048379735, |
| "eval_loss": 1.557020664215088, |
| "eval_runtime": 11.9866, |
| "eval_samples_per_second": 365.575, |
| "eval_steps_per_second": 45.718, |
| "step": 6390 |
| }, |
| { |
| "epoch": 5.09, |
| "learning_rate": 2.4001273344651956e-05, |
| "loss": 1.4092, |
| "step": 6500 |
| }, |
| { |
| "epoch": 5.48, |
| "learning_rate": 2.209125636672326e-05, |
| "loss": 1.3309, |
| "step": 7000 |
| }, |
| { |
| "epoch": 5.87, |
| "learning_rate": 2.018123938879457e-05, |
| "loss": 1.3054, |
| "step": 7500 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.36376083979917845, |
| "eval_loss": 1.8100188970565796, |
| "eval_runtime": 11.9522, |
| "eval_samples_per_second": 366.627, |
| "eval_steps_per_second": 45.849, |
| "step": 7668 |
| }, |
| { |
| "epoch": 6.26, |
| "learning_rate": 1.8271222410865876e-05, |
| "loss": 1.2194, |
| "step": 8000 |
| }, |
| { |
| "epoch": 6.65, |
| "learning_rate": 1.6361205432937183e-05, |
| "loss": 1.1677, |
| "step": 8500 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.3265632131446828, |
| "eval_loss": 2.0681562423706055, |
| "eval_runtime": 12.1156, |
| "eval_samples_per_second": 361.683, |
| "eval_steps_per_second": 45.231, |
| "step": 8946 |
| }, |
| { |
| "epoch": 7.04, |
| "learning_rate": 1.4455008488964348e-05, |
| "loss": 1.1879, |
| "step": 9000 |
| }, |
| { |
| "epoch": 7.43, |
| "learning_rate": 1.2548811544991511e-05, |
| "loss": 1.0508, |
| "step": 9500 |
| }, |
| { |
| "epoch": 7.82, |
| "learning_rate": 1.063879456706282e-05, |
| "loss": 1.0861, |
| "step": 10000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.3014605203103606, |
| "eval_loss": 2.157534599304199, |
| "eval_runtime": 11.8194, |
| "eval_samples_per_second": 370.747, |
| "eval_steps_per_second": 46.365, |
| "step": 10224 |
| }, |
| { |
| "epoch": 8.22, |
| "learning_rate": 8.728777589134126e-06, |
| "loss": 1.0225, |
| "step": 10500 |
| }, |
| { |
| "epoch": 8.61, |
| "learning_rate": 6.818760611205433e-06, |
| "loss": 0.9752, |
| "step": 11000 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 4.912563667232598e-06, |
| "loss": 0.9729, |
| "step": 11500 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.30785029666818803, |
| "eval_loss": 2.388486385345459, |
| "eval_runtime": 11.8239, |
| "eval_samples_per_second": 370.605, |
| "eval_steps_per_second": 46.347, |
| "step": 11502 |
| }, |
| { |
| "epoch": 9.39, |
| "learning_rate": 3.002546689303905e-06, |
| "loss": 0.908, |
| "step": 12000 |
| }, |
| { |
| "epoch": 9.78, |
| "learning_rate": 1.0925297113752123e-06, |
| "loss": 0.8921, |
| "step": 12500 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.3023733455043359, |
| "eval_loss": 2.518986701965332, |
| "eval_runtime": 11.4293, |
| "eval_samples_per_second": 383.399, |
| "eval_steps_per_second": 47.947, |
| "step": 12780 |
| } |
| ], |
| "max_steps": 12780, |
| "num_train_epochs": 10, |
| "total_flos": 418971279265446.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|