{ "best_metric": 0.11308582127094269, "best_model_checkpoint": "/data/wheld3/mt5-base-pointer-mtop/checkpoint-1200", "epoch": 99.9800918836141, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.65, "learning_rate": 0.0009333333333333333, "loss": 1.7749, "step": 200 }, { "epoch": 6.65, "eval_exact_match": 0.003131991051454139, "eval_loss": 0.5892038345336914, "eval_runtime": 135.5401, "eval_samples_per_second": 16.49, "eval_steps_per_second": 2.066, "step": 200 }, { "epoch": 13.33, "learning_rate": 0.0008666666666666667, "loss": 0.6021, "step": 400 }, { "epoch": 13.33, "eval_exact_match": 0.013870246085011185, "eval_loss": 0.5159956216812134, "eval_runtime": 130.613, "eval_samples_per_second": 17.112, "eval_steps_per_second": 2.144, "step": 400 }, { "epoch": 19.98, "learning_rate": 0.0008, "loss": 0.6044, "step": 600 }, { "epoch": 19.98, "eval_exact_match": 0.053243847874720356, "eval_loss": 0.40801870822906494, "eval_runtime": 132.1423, "eval_samples_per_second": 16.914, "eval_steps_per_second": 2.119, "step": 600 }, { "epoch": 26.65, "learning_rate": 0.0007333333333333333, "loss": 0.3302, "step": 800 }, { "epoch": 26.65, "eval_exact_match": 0.36196868008948546, "eval_loss": 0.18654391169548035, "eval_runtime": 134.276, "eval_samples_per_second": 16.645, "eval_steps_per_second": 2.085, "step": 800 }, { "epoch": 33.33, "learning_rate": 0.0006666666666666666, "loss": 0.1483, "step": 1000 }, { "epoch": 33.33, "eval_exact_match": 0.5105145413870246, "eval_loss": 0.1267053484916687, "eval_runtime": 133.0912, "eval_samples_per_second": 16.793, "eval_steps_per_second": 2.104, "step": 1000 }, { "epoch": 39.98, "learning_rate": 0.0006, "loss": 0.0768, "step": 1200 }, { "epoch": 39.98, "eval_exact_match": 0.5297539149888143, "eval_loss": 0.11308582127094269, "eval_runtime": 139.8147, "eval_samples_per_second": 15.985, "eval_steps_per_second": 2.003, "step": 1200 }, { "epoch": 46.65, "learning_rate": 0.0005333333333333334, "loss": 0.0525, "step": 1400 }, { "epoch": 46.65, "eval_exact_match": 0.5413870246085011, "eval_loss": 0.12185565382242203, "eval_runtime": 132.0637, "eval_samples_per_second": 16.924, "eval_steps_per_second": 2.12, "step": 1400 }, { "epoch": 53.33, "learning_rate": 0.00046666666666666666, "loss": 0.0801, "step": 1600 }, { "epoch": 53.33, "eval_exact_match": 0.5275167785234899, "eval_loss": 0.11860152333974838, "eval_runtime": 132.256, "eval_samples_per_second": 16.899, "eval_steps_per_second": 2.117, "step": 1600 }, { "epoch": 59.98, "learning_rate": 0.0004, "loss": 0.0331, "step": 1800 }, { "epoch": 59.98, "eval_exact_match": 0.5422818791946309, "eval_loss": 0.13056021928787231, "eval_runtime": 132.9523, "eval_samples_per_second": 16.811, "eval_steps_per_second": 2.106, "step": 1800 }, { "epoch": 66.65, "learning_rate": 0.0003333333333333333, "loss": 0.0254, "step": 2000 }, { "epoch": 66.65, "eval_exact_match": 0.5395973154362416, "eval_loss": 0.13960428535938263, "eval_runtime": 135.7759, "eval_samples_per_second": 16.461, "eval_steps_per_second": 2.062, "step": 2000 }, { "epoch": 73.33, "learning_rate": 0.0002666666666666667, "loss": 0.0168, "step": 2200 }, { "epoch": 73.33, "eval_exact_match": 0.5436241610738255, "eval_loss": 0.15595464408397675, "eval_runtime": 134.0818, "eval_samples_per_second": 16.669, "eval_steps_per_second": 2.088, "step": 2200 }, { "epoch": 79.98, "learning_rate": 0.0002, "loss": 0.0129, "step": 2400 }, { "epoch": 79.98, "eval_exact_match": 0.5494407158836689, "eval_loss": 0.16592496633529663, "eval_runtime": 133.0433, "eval_samples_per_second": 16.799, "eval_steps_per_second": 2.105, "step": 2400 }, { "epoch": 86.65, "learning_rate": 0.00013333333333333334, "loss": 0.0105, "step": 2600 }, { "epoch": 86.65, "eval_exact_match": 0.5422818791946309, "eval_loss": 0.16985595226287842, "eval_runtime": 137.112, "eval_samples_per_second": 16.301, "eval_steps_per_second": 2.042, "step": 2600 }, { "epoch": 93.33, "learning_rate": 6.666666666666667e-05, "loss": 0.0088, "step": 2800 }, { "epoch": 93.33, "eval_exact_match": 0.5472035794183445, "eval_loss": 0.17423103749752045, "eval_runtime": 132.2768, "eval_samples_per_second": 16.896, "eval_steps_per_second": 2.117, "step": 2800 }, { "epoch": 99.98, "learning_rate": 0.0, "loss": 0.0077, "step": 3000 }, { "epoch": 99.98, "eval_exact_match": 0.5467561521252796, "eval_loss": 0.17754317820072174, "eval_runtime": 132.953, "eval_samples_per_second": 16.81, "eval_steps_per_second": 2.106, "step": 3000 }, { "epoch": 99.98, "step": 3000, "total_flos": 7.72413437221586e+16, "train_loss": 0.2523062037229538, "train_runtime": 33274.6395, "train_samples_per_second": 46.161, "train_steps_per_second": 0.09 } ], "max_steps": 3000, "num_train_epochs": 100, "total_flos": 7.72413437221586e+16, "trial_name": null, "trial_params": null }