| { | |
| "best_global_step": 5538, | |
| "best_metric": 0.8634780589737636, | |
| "best_model_checkpoint": "outputs/bartpho-hsd/checkpoint-best/checkpoint-5538", | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 5538, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.5697989463806152, | |
| "learning_rate": 4.998769072267837e-05, | |
| "loss": 0.5389, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8428140236823776, | |
| "eval_loss": 0.5179033875465393, | |
| "eval_runtime": 60.9906, | |
| "eval_samples_per_second": 70.617, | |
| "eval_steps_per_second": 2.213, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.148050308227539, | |
| "learning_rate": 4.995072162589518e-05, | |
| "loss": 0.5208, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8428140236823776, | |
| "eval_loss": 0.5180655121803284, | |
| "eval_runtime": 55.4284, | |
| "eval_samples_per_second": 77.704, | |
| "eval_steps_per_second": 2.436, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 6.275445461273193, | |
| "learning_rate": 4.988912917920435e-05, | |
| "loss": 0.4785, | |
| "step": 2769 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.850243789180404, | |
| "eval_loss": 0.48658695816993713, | |
| "eval_runtime": 55.6378, | |
| "eval_samples_per_second": 77.411, | |
| "eval_steps_per_second": 2.426, | |
| "step": 2769 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.8800065517425537, | |
| "learning_rate": 4.980297416691463e-05, | |
| "loss": 0.4072, | |
| "step": 3692 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8379382400742976, | |
| "eval_loss": 0.45363402366638184, | |
| "eval_runtime": 55.9448, | |
| "eval_samples_per_second": 76.987, | |
| "eval_steps_per_second": 2.413, | |
| "step": 3692 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.930692195892334, | |
| "learning_rate": 4.969234161362153e-05, | |
| "loss": 0.372, | |
| "step": 4615 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8576735546784304, | |
| "eval_loss": 0.4497613310813904, | |
| "eval_runtime": 55.8766, | |
| "eval_samples_per_second": 77.081, | |
| "eval_steps_per_second": 2.416, | |
| "step": 4615 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 3.390305995941162, | |
| "learning_rate": 4.9557340700298316e-05, | |
| "loss": 0.345, | |
| "step": 5538 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8634780589737636, | |
| "eval_loss": 0.4286285638809204, | |
| "eval_runtime": 55.8527, | |
| "eval_samples_per_second": 77.114, | |
| "eval_steps_per_second": 2.417, | |
| "step": 5538 | |
| } | |
| ], | |
| "logging_steps": 923, | |
| "max_steps": 92300, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.625341380307149e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |