| { |
| "best_global_step": 68, |
| "best_metric": 0.610907256603241, |
| "best_model_checkpoint": "bert_base_km_5_v2_cola/checkpoint-68", |
| "epoch": 7.0, |
| "eval_steps": 500, |
| "global_step": 238, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.8859699964523315, |
| "learning_rate": 4.9e-05, |
| "loss": 0.6153, |
| "step": 34 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.6912751793861389, |
| "eval_loss": 0.6196200251579285, |
| "eval_matthews_correlation": 0.0, |
| "eval_runtime": 0.6905, |
| "eval_samples_per_second": 1510.588, |
| "eval_steps_per_second": 7.242, |
| "step": 34 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.576741099357605, |
| "learning_rate": 4.8e-05, |
| "loss": 0.5911, |
| "step": 68 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.6864812970161438, |
| "eval_loss": 0.610907256603241, |
| "eval_matthews_correlation": 0.006434621036303265, |
| "eval_runtime": 0.707, |
| "eval_samples_per_second": 1475.242, |
| "eval_steps_per_second": 7.072, |
| "step": 68 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 2.2591376304626465, |
| "learning_rate": 4.7e-05, |
| "loss": 0.5354, |
| "step": 102 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.6816874146461487, |
| "eval_loss": 0.6186987161636353, |
| "eval_matthews_correlation": 0.06958362417408409, |
| "eval_runtime": 0.6766, |
| "eval_samples_per_second": 1541.478, |
| "eval_steps_per_second": 7.39, |
| "step": 102 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 3.7871906757354736, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.4723, |
| "step": 136 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.6586769223213196, |
| "eval_loss": 0.6894361972808838, |
| "eval_matthews_correlation": 0.0819224208673349, |
| "eval_runtime": 0.662, |
| "eval_samples_per_second": 1575.441, |
| "eval_steps_per_second": 7.552, |
| "step": 136 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 2.169605016708374, |
| "learning_rate": 4.5e-05, |
| "loss": 0.4026, |
| "step": 170 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.6577181220054626, |
| "eval_loss": 0.7197646498680115, |
| "eval_matthews_correlation": 0.12428013934170246, |
| "eval_runtime": 0.7117, |
| "eval_samples_per_second": 1465.447, |
| "eval_steps_per_second": 7.025, |
| "step": 170 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 2.733868360519409, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.3374, |
| "step": 204 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.6347075700759888, |
| "eval_loss": 0.7933048009872437, |
| "eval_matthews_correlation": 0.1072591766108324, |
| "eval_runtime": 0.6674, |
| "eval_samples_per_second": 1562.878, |
| "eval_steps_per_second": 7.492, |
| "step": 204 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 4.486270427703857, |
| "learning_rate": 4.3e-05, |
| "loss": 0.2903, |
| "step": 238 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.6366251111030579, |
| "eval_loss": 0.9325408935546875, |
| "eval_matthews_correlation": 0.09472138771242544, |
| "eval_runtime": 0.6742, |
| "eval_samples_per_second": 1547.052, |
| "eval_steps_per_second": 7.416, |
| "step": 238 |
| }, |
| { |
| "epoch": 7.0, |
| "step": 238, |
| "total_flos": 7874519220341760.0, |
| "train_loss": 0.4634779841959977, |
| "train_runtime": 117.537, |
| "train_samples_per_second": 3637.579, |
| "train_steps_per_second": 14.464 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1700, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 50, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 5 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7874519220341760.0, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|