{ "best_metric": 0.512192712055601, "best_model_checkpoint": "./xlnet-base-cased/fine_tuned_models/checkpoint-1876", "epoch": 10.0, "global_step": 2680, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.6242, "step": 268 }, { "epoch": 1.0, "eval_loss": 0.6478146314620972, "eval_matthews_correlation": 0.018148342420931135, "eval_runtime": 5.3259, "eval_samples_per_second": 195.835, "eval_steps_per_second": 24.597, "step": 268 }, { "epoch": 2.0, "learning_rate": 1.7777777777777777e-05, "loss": 0.5475, "step": 536 }, { "epoch": 2.0, "eval_loss": 0.5800684690475464, "eval_matthews_correlation": 0.3687428254676126, "eval_runtime": 5.3218, "eval_samples_per_second": 195.987, "eval_steps_per_second": 24.616, "step": 536 }, { "epoch": 3.0, "learning_rate": 1.555555555555556e-05, "loss": 0.4228, "step": 804 }, { "epoch": 3.0, "eval_loss": 0.518549382686615, "eval_matthews_correlation": 0.4729503605591106, "eval_runtime": 5.3206, "eval_samples_per_second": 196.031, "eval_steps_per_second": 24.621, "step": 804 }, { "epoch": 4.0, "learning_rate": 1.3333333333333333e-05, "loss": 0.3132, "step": 1072 }, { "epoch": 4.0, "eval_loss": 0.7433116436004639, "eval_matthews_correlation": 0.4456190824541546, "eval_runtime": 5.2944, "eval_samples_per_second": 197.001, "eval_steps_per_second": 24.743, "step": 1072 }, { "epoch": 5.0, "learning_rate": 1.1111111111111113e-05, "loss": 0.2426, "step": 1340 }, { "epoch": 5.0, "eval_loss": 0.7162166833877563, "eval_matthews_correlation": 0.4729503605591106, "eval_runtime": 5.3233, "eval_samples_per_second": 195.929, "eval_steps_per_second": 24.609, "step": 1340 }, { "epoch": 6.0, "learning_rate": 8.888888888888888e-06, "loss": 0.1892, "step": 1608 }, { "epoch": 6.0, "eval_loss": 0.7776421308517456, "eval_matthews_correlation": 0.48239382723457414, "eval_runtime": 5.3253, "eval_samples_per_second": 195.858, "eval_steps_per_second": 24.6, "step": 1608 }, { "epoch": 7.0, "learning_rate": 6.666666666666667e-06, "loss": 0.1555, "step": 1876 }, { "epoch": 7.0, "eval_loss": 0.855113685131073, "eval_matthews_correlation": 0.512192712055601, "eval_runtime": 5.304, "eval_samples_per_second": 196.642, "eval_steps_per_second": 24.698, "step": 1876 }, { "epoch": 8.0, "learning_rate": 4.444444444444444e-06, "loss": 0.123, "step": 2144 }, { "epoch": 8.0, "eval_loss": 1.1328730583190918, "eval_matthews_correlation": 0.47806047639386823, "eval_runtime": 5.3251, "eval_samples_per_second": 195.866, "eval_steps_per_second": 24.601, "step": 2144 }, { "epoch": 9.0, "learning_rate": 2.222222222222222e-06, "loss": 0.096, "step": 2412 }, { "epoch": 9.0, "eval_loss": 1.1940616369247437, "eval_matthews_correlation": 0.47687961708523924, "eval_runtime": 5.3349, "eval_samples_per_second": 195.504, "eval_steps_per_second": 24.555, "step": 2412 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 0.0864, "step": 2680 }, { "epoch": 10.0, "eval_loss": 1.3239350318908691, "eval_matthews_correlation": 0.45569507663230396, "eval_runtime": 5.3171, "eval_samples_per_second": 196.161, "eval_steps_per_second": 24.638, "step": 2680 }, { "epoch": 10.0, "step": 2680, "total_flos": 3851987691901440.0, "train_loss": 0.28002330011396265, "train_runtime": 1315.8713, "train_samples_per_second": 64.984, "train_steps_per_second": 2.037 } ], "max_steps": 2680, "num_train_epochs": 10, "total_flos": 3851987691901440.0, "trial_name": null, "trial_params": null }