{ "best_metric": 2.390516519546509, "best_model_checkpoint": "MIReAD_3e-05/checkpoint-13806", "epoch": 6.0, "global_step": 27612, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 2.94567579313342e-05, "loss": 4.5385, "step": 500 }, { "epoch": 0.22, "learning_rate": 2.8913515862668408e-05, "loss": 3.745, "step": 1000 }, { "epoch": 0.33, "learning_rate": 2.8370273794002608e-05, "loss": 3.3652, "step": 1500 }, { "epoch": 0.43, "learning_rate": 2.782703172533681e-05, "loss": 3.1532, "step": 2000 }, { "epoch": 0.54, "learning_rate": 2.728378965667101e-05, "loss": 2.9768, "step": 2500 }, { "epoch": 0.65, "learning_rate": 2.6740547588005218e-05, "loss": 2.8561, "step": 3000 }, { "epoch": 0.76, "learning_rate": 2.6197305519339418e-05, "loss": 2.7393, "step": 3500 }, { "epoch": 0.87, "learning_rate": 2.565406345067362e-05, "loss": 2.6236, "step": 4000 }, { "epoch": 0.98, "learning_rate": 2.511082138200782e-05, "loss": 2.6074, "step": 4500 }, { "epoch": 1.0, "eval_accuracy": 0.3415979708306912, "eval_f1": 0.28057933815352265, "eval_loss": 2.6715340614318848, "eval_precision": 0.3280031388570244, "eval_recall": 0.30555570676656096, "eval_runtime": 128.8869, "eval_samples_per_second": 122.355, "eval_steps_per_second": 7.65, "step": 4602 }, { "epoch": 1.09, "learning_rate": 2.456757931334203e-05, "loss": 2.4343, "step": 5000 }, { "epoch": 1.2, "learning_rate": 2.402433724467623e-05, "loss": 2.2922, "step": 5500 }, { "epoch": 1.3, "learning_rate": 2.3481095176010432e-05, "loss": 2.2894, "step": 6000 }, { "epoch": 1.41, "learning_rate": 2.2937853107344632e-05, "loss": 2.2635, "step": 6500 }, { "epoch": 1.52, "learning_rate": 2.239461103867884e-05, "loss": 2.238, "step": 7000 }, { "epoch": 1.63, "learning_rate": 2.185136897001304e-05, "loss": 2.2425, "step": 7500 }, { "epoch": 1.74, "learning_rate": 2.1308126901347242e-05, "loss": 2.1702, "step": 8000 }, { "epoch": 1.85, "learning_rate": 2.0764884832681442e-05, "loss": 2.1909, "step": 8500 }, { "epoch": 1.96, "learning_rate": 2.0221642764015646e-05, "loss": 2.1209, "step": 9000 }, { "epoch": 2.0, "eval_accuracy": 0.3929613189600507, "eval_f1": 0.3569292122063319, "eval_loss": 2.4266042709350586, "eval_precision": 0.37263788729318065, "eval_recall": 0.3854876449553655, "eval_runtime": 127.8545, "eval_samples_per_second": 123.343, "eval_steps_per_second": 7.712, "step": 9204 }, { "epoch": 2.06, "learning_rate": 1.967840069534985e-05, "loss": 1.9419, "step": 9500 }, { "epoch": 2.17, "learning_rate": 1.913515862668405e-05, "loss": 1.7981, "step": 10000 }, { "epoch": 2.28, "learning_rate": 1.8591916558018253e-05, "loss": 1.82, "step": 10500 }, { "epoch": 2.39, "learning_rate": 1.8048674489352456e-05, "loss": 1.7695, "step": 11000 }, { "epoch": 2.5, "learning_rate": 1.750543242068666e-05, "loss": 1.7817, "step": 11500 }, { "epoch": 2.61, "learning_rate": 1.696219035202086e-05, "loss": 1.7977, "step": 12000 }, { "epoch": 2.72, "learning_rate": 1.6418948283355063e-05, "loss": 1.7337, "step": 12500 }, { "epoch": 2.82, "learning_rate": 1.5875706214689266e-05, "loss": 1.7563, "step": 13000 }, { "epoch": 2.93, "learning_rate": 1.533246414602347e-05, "loss": 1.7413, "step": 13500 }, { "epoch": 3.0, "eval_accuracy": 0.40786303107165506, "eval_f1": 0.3830566278260409, "eval_loss": 2.390516519546509, "eval_precision": 0.40575163850397916, "eval_recall": 0.39817650401283294, "eval_runtime": 129.2884, "eval_samples_per_second": 121.975, "eval_steps_per_second": 7.626, "step": 13806 }, { "epoch": 3.04, "learning_rate": 1.478922207735767e-05, "loss": 1.6035, "step": 14000 }, { "epoch": 3.15, "learning_rate": 1.4245980008691873e-05, "loss": 1.4123, "step": 14500 }, { "epoch": 3.26, "learning_rate": 1.3702737940026075e-05, "loss": 1.3897, "step": 15000 }, { "epoch": 3.37, "learning_rate": 1.3159495871360279e-05, "loss": 1.3592, "step": 15500 }, { "epoch": 3.48, "learning_rate": 1.261625380269448e-05, "loss": 1.3673, "step": 16000 }, { "epoch": 3.59, "learning_rate": 1.2073011734028684e-05, "loss": 1.3792, "step": 16500 }, { "epoch": 3.69, "learning_rate": 1.1529769665362886e-05, "loss": 1.3385, "step": 17000 }, { "epoch": 3.8, "learning_rate": 1.0986527596697089e-05, "loss": 1.3258, "step": 17500 }, { "epoch": 3.91, "learning_rate": 1.044328552803129e-05, "loss": 1.3415, "step": 18000 }, { "epoch": 4.0, "eval_accuracy": 0.4099556119213697, "eval_f1": 0.3978990080253614, "eval_loss": 2.46903133392334, "eval_precision": 0.4214347585145809, "eval_recall": 0.40462921661844276, "eval_runtime": 129.7625, "eval_samples_per_second": 121.53, "eval_steps_per_second": 7.598, "step": 18408 }, { "epoch": 4.02, "learning_rate": 9.900043459365494e-06, "loss": 1.2695, "step": 18500 }, { "epoch": 4.13, "learning_rate": 9.356801390699696e-06, "loss": 1.0401, "step": 19000 }, { "epoch": 4.24, "learning_rate": 8.8135593220339e-06, "loss": 1.0362, "step": 19500 }, { "epoch": 4.35, "learning_rate": 8.270317253368101e-06, "loss": 1.0133, "step": 20000 }, { "epoch": 4.45, "learning_rate": 7.727075184702305e-06, "loss": 1.0128, "step": 20500 }, { "epoch": 4.56, "learning_rate": 7.183833116036506e-06, "loss": 0.9873, "step": 21000 }, { "epoch": 4.67, "learning_rate": 6.640591047370709e-06, "loss": 0.9855, "step": 21500 }, { "epoch": 4.78, "learning_rate": 6.0973489787049115e-06, "loss": 1.0153, "step": 22000 }, { "epoch": 4.89, "learning_rate": 5.554106910039114e-06, "loss": 0.9783, "step": 22500 }, { "epoch": 5.0, "learning_rate": 5.010864841373317e-06, "loss": 0.9837, "step": 23000 }, { "epoch": 5.0, "eval_accuracy": 0.4050095117311351, "eval_f1": 0.3983089847187763, "eval_loss": 2.622011184692383, "eval_precision": 0.411294379610805, "eval_recall": 0.40211336103710194, "eval_runtime": 129.5856, "eval_samples_per_second": 121.696, "eval_steps_per_second": 7.609, "step": 23010 }, { "epoch": 5.11, "learning_rate": 4.467622772707519e-06, "loss": 0.7989, "step": 23500 }, { "epoch": 5.22, "learning_rate": 3.924380704041722e-06, "loss": 0.776, "step": 24000 }, { "epoch": 5.32, "learning_rate": 3.3811386353759236e-06, "loss": 0.7795, "step": 24500 }, { "epoch": 5.43, "learning_rate": 2.837896566710126e-06, "loss": 0.7417, "step": 25000 }, { "epoch": 5.54, "learning_rate": 2.2946544980443283e-06, "loss": 0.7566, "step": 25500 }, { "epoch": 5.65, "learning_rate": 1.7514124293785311e-06, "loss": 0.7571, "step": 26000 }, { "epoch": 5.76, "learning_rate": 1.2081703607127337e-06, "loss": 0.7666, "step": 26500 }, { "epoch": 5.87, "learning_rate": 6.649282920469362e-07, "loss": 0.7696, "step": 27000 }, { "epoch": 5.98, "learning_rate": 1.2168622338113866e-07, "loss": 0.7456, "step": 27500 }, { "epoch": 6.0, "eval_accuracy": 0.40107799619530754, "eval_f1": 0.39625451475720025, "eval_loss": 2.7117061614990234, "eval_precision": 0.4066294033650095, "eval_recall": 0.39986711618175386, "eval_runtime": 130.357, "eval_samples_per_second": 120.975, "eval_steps_per_second": 7.564, "step": 27612 } ], "max_steps": 27612, "num_train_epochs": 6, "total_flos": 1.1642961087465062e+17, "trial_name": null, "trial_params": null }