{ "best_metric": 0.9245835621453414, "best_model_checkpoint": "./fine-tune/roberta-base/qnli/checkpoint-19641", "epoch": 6.0, "global_step": 39282, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 2.545176889793841e-06, "loss": 0.6928, "step": 500 }, { "epoch": 0.15, "learning_rate": 5.090353779587682e-06, "loss": 0.4818, "step": 1000 }, { "epoch": 0.23, "learning_rate": 7.635530669381522e-06, "loss": 0.3934, "step": 1500 }, { "epoch": 0.31, "learning_rate": 1.0180707559175364e-05, "loss": 0.379, "step": 2000 }, { "epoch": 0.38, "learning_rate": 1.2725884448969203e-05, "loss": 0.3509, "step": 2500 }, { "epoch": 0.46, "learning_rate": 1.5271061338763045e-05, "loss": 0.3652, "step": 3000 }, { "epoch": 0.53, "learning_rate": 1.7816238228556887e-05, "loss": 0.3552, "step": 3500 }, { "epoch": 0.61, "learning_rate": 1.997692595180449e-05, "loss": 0.3218, "step": 4000 }, { "epoch": 0.69, "learning_rate": 1.9814432654652997e-05, "loss": 0.3598, "step": 4500 }, { "epoch": 0.76, "learning_rate": 1.9651939357501505e-05, "loss": 0.3159, "step": 5000 }, { "epoch": 0.84, "learning_rate": 1.9489446060350013e-05, "loss": 0.3182, "step": 5500 }, { "epoch": 0.92, "learning_rate": 1.932695276319852e-05, "loss": 0.3031, "step": 6000 }, { "epoch": 0.99, "learning_rate": 1.916445946604703e-05, "loss": 0.2986, "step": 6500 }, { "epoch": 1.0, "eval_accuracy": 0.9170785282811642, "eval_loss": 0.22146184742450714, "eval_runtime": 9.2911, "eval_samples_per_second": 587.983, "eval_steps_per_second": 73.511, "step": 6547 }, { "epoch": 1.07, "learning_rate": 1.9001966168895533e-05, "loss": 0.2681, "step": 7000 }, { "epoch": 1.15, "learning_rate": 1.883947287174404e-05, "loss": 0.2681, "step": 7500 }, { "epoch": 1.22, "learning_rate": 1.867697957459255e-05, "loss": 0.2643, "step": 8000 }, { "epoch": 1.3, "learning_rate": 1.8514486277441056e-05, "loss": 0.253, "step": 8500 }, { "epoch": 1.37, "learning_rate": 1.8351992980289564e-05, "loss": 0.2503, "step": 9000 }, { "epoch": 1.45, "learning_rate": 1.8189499683138072e-05, "loss": 0.2597, "step": 9500 }, { "epoch": 1.53, "learning_rate": 1.802700638598658e-05, "loss": 0.2601, "step": 10000 }, { "epoch": 1.6, "learning_rate": 1.7864513088835088e-05, "loss": 0.2483, "step": 10500 }, { "epoch": 1.68, "learning_rate": 1.7702019791683592e-05, "loss": 0.2532, "step": 11000 }, { "epoch": 1.76, "learning_rate": 1.75395264945321e-05, "loss": 0.2455, "step": 11500 }, { "epoch": 1.83, "learning_rate": 1.737703319738061e-05, "loss": 0.2637, "step": 12000 }, { "epoch": 1.91, "learning_rate": 1.7214539900229116e-05, "loss": 0.2391, "step": 12500 }, { "epoch": 1.99, "learning_rate": 1.7052046603077624e-05, "loss": 0.243, "step": 13000 }, { "epoch": 2.0, "eval_accuracy": 0.9172615778876075, "eval_loss": 0.23211686313152313, "eval_runtime": 9.2969, "eval_samples_per_second": 587.613, "eval_steps_per_second": 73.465, "step": 13094 }, { "epoch": 2.06, "learning_rate": 1.6889553305926132e-05, "loss": 0.2067, "step": 13500 }, { "epoch": 2.14, "learning_rate": 1.672706000877464e-05, "loss": 0.206, "step": 14000 }, { "epoch": 2.21, "learning_rate": 1.6564566711623148e-05, "loss": 0.1964, "step": 14500 }, { "epoch": 2.29, "learning_rate": 1.6402073414471655e-05, "loss": 0.1993, "step": 15000 }, { "epoch": 2.37, "learning_rate": 1.6239580117320163e-05, "loss": 0.2153, "step": 15500 }, { "epoch": 2.44, "learning_rate": 1.607708682016867e-05, "loss": 0.2103, "step": 16000 }, { "epoch": 2.52, "learning_rate": 1.5914593523017176e-05, "loss": 0.2023, "step": 16500 }, { "epoch": 2.6, "learning_rate": 1.5752100225865684e-05, "loss": 0.2063, "step": 17000 }, { "epoch": 2.67, "learning_rate": 1.558960692871419e-05, "loss": 0.2047, "step": 17500 }, { "epoch": 2.75, "learning_rate": 1.54271136315627e-05, "loss": 0.2076, "step": 18000 }, { "epoch": 2.83, "learning_rate": 1.5264620334411207e-05, "loss": 0.2086, "step": 18500 }, { "epoch": 2.9, "learning_rate": 1.5102127037259715e-05, "loss": 0.2001, "step": 19000 }, { "epoch": 2.98, "learning_rate": 1.4939633740108221e-05, "loss": 0.2048, "step": 19500 }, { "epoch": 3.0, "eval_accuracy": 0.9245835621453414, "eval_loss": 0.2992143929004669, "eval_runtime": 9.1061, "eval_samples_per_second": 599.927, "eval_steps_per_second": 75.005, "step": 19641 }, { "epoch": 3.05, "learning_rate": 1.477714044295673e-05, "loss": 0.1717, "step": 20000 }, { "epoch": 3.13, "learning_rate": 1.4614647145805237e-05, "loss": 0.1483, "step": 20500 }, { "epoch": 3.21, "learning_rate": 1.4452153848653745e-05, "loss": 0.1743, "step": 21000 }, { "epoch": 3.28, "learning_rate": 1.4289660551502251e-05, "loss": 0.1442, "step": 21500 }, { "epoch": 3.36, "learning_rate": 1.412716725435076e-05, "loss": 0.1744, "step": 22000 }, { "epoch": 3.44, "learning_rate": 1.3964673957199267e-05, "loss": 0.1694, "step": 22500 }, { "epoch": 3.51, "learning_rate": 1.3802180660047775e-05, "loss": 0.1669, "step": 23000 }, { "epoch": 3.59, "learning_rate": 1.3639687362896281e-05, "loss": 0.1539, "step": 23500 }, { "epoch": 3.67, "learning_rate": 1.347719406574479e-05, "loss": 0.1601, "step": 24000 }, { "epoch": 3.74, "learning_rate": 1.3314700768593297e-05, "loss": 0.1689, "step": 24500 }, { "epoch": 3.82, "learning_rate": 1.3152207471441804e-05, "loss": 0.1608, "step": 25000 }, { "epoch": 3.89, "learning_rate": 1.298971417429031e-05, "loss": 0.1639, "step": 25500 }, { "epoch": 3.97, "learning_rate": 1.282722087713882e-05, "loss": 0.1629, "step": 26000 }, { "epoch": 4.0, "eval_accuracy": 0.9220208676551346, "eval_loss": 0.3538360595703125, "eval_runtime": 9.1087, "eval_samples_per_second": 599.755, "eval_steps_per_second": 74.983, "step": 26188 }, { "epoch": 4.05, "learning_rate": 1.2664727579987326e-05, "loss": 0.1307, "step": 26500 }, { "epoch": 4.12, "learning_rate": 1.2502234282835834e-05, "loss": 0.1112, "step": 27000 }, { "epoch": 4.2, "learning_rate": 1.233974098568434e-05, "loss": 0.1243, "step": 27500 }, { "epoch": 4.28, "learning_rate": 1.217724768853285e-05, "loss": 0.1111, "step": 28000 }, { "epoch": 4.35, "learning_rate": 1.2014754391381356e-05, "loss": 0.1065, "step": 28500 }, { "epoch": 4.43, "learning_rate": 1.1852261094229864e-05, "loss": 0.1319, "step": 29000 }, { "epoch": 4.51, "learning_rate": 1.168976779707837e-05, "loss": 0.1172, "step": 29500 }, { "epoch": 4.58, "learning_rate": 1.152727449992688e-05, "loss": 0.1356, "step": 30000 }, { "epoch": 4.66, "learning_rate": 1.1364781202775386e-05, "loss": 0.1262, "step": 30500 }, { "epoch": 4.73, "learning_rate": 1.1202287905623894e-05, "loss": 0.1236, "step": 31000 }, { "epoch": 4.81, "learning_rate": 1.10397946084724e-05, "loss": 0.1276, "step": 31500 }, { "epoch": 4.89, "learning_rate": 1.087730131132091e-05, "loss": 0.126, "step": 32000 }, { "epoch": 4.96, "learning_rate": 1.0714808014169416e-05, "loss": 0.1308, "step": 32500 }, { "epoch": 5.0, "eval_accuracy": 0.9209225700164745, "eval_loss": 0.35333874821662903, "eval_runtime": 9.1134, "eval_samples_per_second": 599.446, "eval_steps_per_second": 74.944, "step": 32735 }, { "epoch": 5.04, "learning_rate": 1.0552314717017924e-05, "loss": 0.1111, "step": 33000 }, { "epoch": 5.12, "learning_rate": 1.038982141986643e-05, "loss": 0.0724, "step": 33500 }, { "epoch": 5.19, "learning_rate": 1.022732812271494e-05, "loss": 0.0885, "step": 34000 }, { "epoch": 5.27, "learning_rate": 1.0064834825563446e-05, "loss": 0.0855, "step": 34500 }, { "epoch": 5.35, "learning_rate": 9.902341528411954e-06, "loss": 0.0851, "step": 35000 }, { "epoch": 5.42, "learning_rate": 9.739848231260461e-06, "loss": 0.0852, "step": 35500 }, { "epoch": 5.5, "learning_rate": 9.57735493410897e-06, "loss": 0.0888, "step": 36000 }, { "epoch": 5.58, "learning_rate": 9.414861636957477e-06, "loss": 0.0893, "step": 36500 }, { "epoch": 5.65, "learning_rate": 9.252368339805983e-06, "loss": 0.0865, "step": 37000 }, { "epoch": 5.73, "learning_rate": 9.089875042654491e-06, "loss": 0.0814, "step": 37500 }, { "epoch": 5.8, "learning_rate": 8.927381745502999e-06, "loss": 0.0967, "step": 38000 }, { "epoch": 5.88, "learning_rate": 8.764888448351507e-06, "loss": 0.0882, "step": 38500 }, { "epoch": 5.96, "learning_rate": 8.602395151200013e-06, "loss": 0.0846, "step": 39000 }, { "epoch": 6.0, "eval_accuracy": 0.9229361156873512, "eval_loss": 0.427664577960968, "eval_runtime": 9.0686, "eval_samples_per_second": 602.409, "eval_steps_per_second": 75.315, "step": 39282 }, { "epoch": 6.0, "step": 39282, "total_flos": 4.133856190735872e+16, "train_loss": 0.20157804863496975, "train_runtime": 4031.9268, "train_samples_per_second": 259.784, "train_steps_per_second": 16.238 } ], "max_steps": 65470, "num_train_epochs": 10, "total_flos": 4.133856190735872e+16, "trial_name": null, "trial_params": null }