{ "best_metric": 0.698971152305603, "best_model_checkpoint": "hBERTv2_new_pretrain_w_init__wnli/checkpoint-15", "epoch": 8.0, "global_step": 40, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 3.9200000000000004e-05, "loss": 0.9111, "step": 5 }, { "epoch": 1.0, "eval_accuracy": 0.5492957746478874, "eval_loss": 0.7287906408309937, "eval_runtime": 0.1334, "eval_samples_per_second": 532.163, "eval_steps_per_second": 7.495, "step": 5 }, { "epoch": 2.0, "learning_rate": 3.8400000000000005e-05, "loss": 0.7278, "step": 10 }, { "epoch": 2.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.7028499841690063, "eval_runtime": 0.1349, "eval_samples_per_second": 526.399, "eval_steps_per_second": 7.414, "step": 10 }, { "epoch": 3.0, "learning_rate": 3.76e-05, "loss": 0.707, "step": 15 }, { "epoch": 3.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.698971152305603, "eval_runtime": 0.1329, "eval_samples_per_second": 534.238, "eval_steps_per_second": 7.524, "step": 15 }, { "epoch": 4.0, "learning_rate": 3.680000000000001e-05, "loss": 0.7068, "step": 20 }, { "epoch": 4.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.7350625395774841, "eval_runtime": 0.1282, "eval_samples_per_second": 553.85, "eval_steps_per_second": 7.801, "step": 20 }, { "epoch": 5.0, "learning_rate": 3.6e-05, "loss": 0.7424, "step": 25 }, { "epoch": 5.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.7129454016685486, "eval_runtime": 0.1329, "eval_samples_per_second": 534.144, "eval_steps_per_second": 7.523, "step": 25 }, { "epoch": 6.0, "learning_rate": 3.52e-05, "loss": 0.7298, "step": 30 }, { "epoch": 6.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.710222065448761, "eval_runtime": 0.1285, "eval_samples_per_second": 552.441, "eval_steps_per_second": 7.781, "step": 30 }, { "epoch": 7.0, "learning_rate": 3.44e-05, "loss": 0.7043, "step": 35 }, { "epoch": 7.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.7217209935188293, "eval_runtime": 0.1315, "eval_samples_per_second": 540.126, "eval_steps_per_second": 7.607, "step": 35 }, { "epoch": 8.0, "learning_rate": 3.3600000000000004e-05, "loss": 0.7081, "step": 40 }, { "epoch": 8.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.7002915143966675, "eval_runtime": 0.1279, "eval_samples_per_second": 554.968, "eval_steps_per_second": 7.816, "step": 40 }, { "epoch": 8.0, "step": 40, "total_flos": 743403004362752.0, "train_loss": 0.7421743512153626, "train_runtime": 52.5784, "train_samples_per_second": 603.86, "train_steps_per_second": 4.755 } ], "max_steps": 250, "num_train_epochs": 50, "total_flos": 743403004362752.0, "trial_name": null, "trial_params": null }