{ "best_metric": 0.8182447038478167, "best_model_checkpoint": "final_models/glue_models/structroberta_s2_final//finetune/qqp/checkpoint-9600", "epoch": 4.7290640394088665, "global_step": 9600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "eval_accuracy": 0.7285135388374329, "eval_f1": 0.6788102780711017, "eval_loss": 0.5273743271827698, "eval_mcc": 0.44373753898319895, "eval_runtime": 52.3791, "eval_samples_per_second": 513.354, "eval_steps_per_second": 64.186, "step": 400 }, { "epoch": 0.25, "learning_rate": 4.876847290640394e-05, "loss": 0.5651, "step": 500 }, { "epoch": 0.39, "eval_accuracy": 0.7599390149116516, "eval_f1": 0.6933346002185378, "eval_loss": 0.48232948780059814, "eval_mcc": 0.5015541735435936, "eval_runtime": 52.3511, "eval_samples_per_second": 513.628, "eval_steps_per_second": 64.22, "step": 800 }, { "epoch": 0.49, "learning_rate": 4.753694581280788e-05, "loss": 0.4957, "step": 1000 }, { "epoch": 0.59, "eval_accuracy": 0.7784224152565002, "eval_f1": 0.7293048614266242, "eval_loss": 0.4581160545349121, "eval_mcc": 0.542554696937411, "eval_runtime": 52.3159, "eval_samples_per_second": 513.974, "eval_steps_per_second": 64.263, "step": 1200 }, { "epoch": 0.74, "learning_rate": 4.630541871921182e-05, "loss": 0.4616, "step": 1500 }, { "epoch": 0.79, "eval_accuracy": 0.7962735891342163, "eval_f1": 0.7594836670179136, "eval_loss": 0.43172687292099, "eval_mcc": 0.582852877479166, "eval_runtime": 52.3144, "eval_samples_per_second": 513.989, "eval_steps_per_second": 64.265, "step": 1600 }, { "epoch": 0.99, "learning_rate": 4.507389162561577e-05, "loss": 0.4345, "step": 2000 }, { "epoch": 0.99, "eval_accuracy": 0.7980214953422546, "eval_f1": 0.7422768471503821, "eval_loss": 0.4275752305984497, "eval_mcc": 0.5817912489010888, "eval_runtime": 52.3547, "eval_samples_per_second": 513.593, "eval_steps_per_second": 64.216, "step": 2000 }, { "epoch": 1.18, "eval_accuracy": 0.8100338578224182, "eval_f1": 0.7739022662889519, "eval_loss": 0.4070776104927063, "eval_mcc": 0.6101111570276126, "eval_runtime": 52.603, "eval_samples_per_second": 511.169, "eval_steps_per_second": 63.913, "step": 2400 }, { "epoch": 1.23, "learning_rate": 4.384236453201971e-05, "loss": 0.3724, "step": 2500 }, { "epoch": 1.38, "eval_accuracy": 0.815277636051178, "eval_f1": 0.7796850742958528, "eval_loss": 0.4088333547115326, "eval_mcc": 0.6206627379992179, "eval_runtime": 52.4, "eval_samples_per_second": 513.148, "eval_steps_per_second": 64.16, "step": 2800 }, { "epoch": 1.48, "learning_rate": 4.261083743842365e-05, "loss": 0.3577, "step": 3000 }, { "epoch": 1.58, "eval_accuracy": 0.8183643817901611, "eval_f1": 0.7709408123065379, "eval_loss": 0.4194847047328949, "eval_mcc": 0.6244513021568135, "eval_runtime": 52.3303, "eval_samples_per_second": 513.832, "eval_steps_per_second": 64.246, "step": 3200 }, { "epoch": 1.72, "learning_rate": 4.1379310344827587e-05, "loss": 0.3508, "step": 3500 }, { "epoch": 1.77, "eval_accuracy": 0.8219718337059021, "eval_f1": 0.7793907553343471, "eval_loss": 0.3924076557159424, "eval_mcc": 0.6321434549442672, "eval_runtime": 52.3776, "eval_samples_per_second": 513.368, "eval_steps_per_second": 64.188, "step": 3600 }, { "epoch": 1.97, "learning_rate": 4.014778325123153e-05, "loss": 0.3482, "step": 4000 }, { "epoch": 1.97, "eval_accuracy": 0.8285172581672668, "eval_f1": 0.7950393385784771, "eval_loss": 0.3821839690208435, "eval_mcc": 0.6476609486444006, "eval_runtime": 52.4185, "eval_samples_per_second": 512.968, "eval_steps_per_second": 64.138, "step": 4000 }, { "epoch": 2.17, "eval_accuracy": 0.8318271636962891, "eval_f1": 0.7981610426709517, "eval_loss": 0.4135289788246155, "eval_mcc": 0.6541252204741957, "eval_runtime": 52.3538, "eval_samples_per_second": 513.602, "eval_steps_per_second": 64.217, "step": 4400 }, { "epoch": 2.22, "learning_rate": 3.891625615763547e-05, "loss": 0.2674, "step": 4500 }, { "epoch": 2.36, "eval_accuracy": 0.832124650478363, "eval_f1": 0.7885318092382648, "eval_loss": 0.4241204261779785, "eval_mcc": 0.6532604645809198, "eval_runtime": 52.317, "eval_samples_per_second": 513.963, "eval_steps_per_second": 64.262, "step": 4800 }, { "epoch": 2.46, "learning_rate": 3.768472906403941e-05, "loss": 0.2675, "step": 5000 }, { "epoch": 2.56, "eval_accuracy": 0.8314180374145508, "eval_f1": 0.7988283850352818, "eval_loss": 0.3950100243091583, "eval_mcc": 0.6537612657210461, "eval_runtime": 52.3502, "eval_samples_per_second": 513.637, "eval_steps_per_second": 64.221, "step": 5200 }, { "epoch": 2.71, "learning_rate": 3.645320197044335e-05, "loss": 0.2675, "step": 5500 }, { "epoch": 2.76, "eval_accuracy": 0.8390420079231262, "eval_f1": 0.8080028391447077, "eval_loss": 0.3883987367153168, "eval_mcc": 0.6694540753123261, "eval_runtime": 52.3719, "eval_samples_per_second": 513.424, "eval_steps_per_second": 64.195, "step": 5600 }, { "epoch": 2.96, "learning_rate": 3.522167487684729e-05, "loss": 0.2697, "step": 6000 }, { "epoch": 2.96, "eval_accuracy": 0.8381866216659546, "eval_f1": 0.8078604548465443, "eval_loss": 0.3748666048049927, "eval_mcc": 0.668109270008498, "eval_runtime": 52.4002, "eval_samples_per_second": 513.147, "eval_steps_per_second": 64.16, "step": 6000 }, { "epoch": 3.15, "eval_accuracy": 0.8381122350692749, "eval_f1": 0.8102358428876585, "eval_loss": 0.43886855244636536, "eval_mcc": 0.669352918279841, "eval_runtime": 52.3858, "eval_samples_per_second": 513.288, "eval_steps_per_second": 64.178, "step": 6400 }, { "epoch": 3.2, "learning_rate": 3.399014778325123e-05, "loss": 0.201, "step": 6500 }, { "epoch": 3.35, "eval_accuracy": 0.84197998046875, "eval_f1": 0.8148503202753933, "eval_loss": 0.45135924220085144, "eval_mcc": 0.6773127279657783, "eval_runtime": 52.3499, "eval_samples_per_second": 513.64, "eval_steps_per_second": 64.222, "step": 6800 }, { "epoch": 3.45, "learning_rate": 3.275862068965517e-05, "loss": 0.1952, "step": 7000 }, { "epoch": 3.55, "eval_accuracy": 0.8422403335571289, "eval_f1": 0.8114834236956716, "eval_loss": 0.4344791769981384, "eval_mcc": 0.67587759010705, "eval_runtime": 52.3592, "eval_samples_per_second": 513.548, "eval_steps_per_second": 64.21, "step": 7200 }, { "epoch": 3.69, "learning_rate": 3.152709359605912e-05, "loss": 0.1935, "step": 7500 }, { "epoch": 3.74, "eval_accuracy": 0.8433560132980347, "eval_f1": 0.8172826652785007, "eval_loss": 0.4027557373046875, "eval_mcc": 0.6806843267469705, "eval_runtime": 52.467, "eval_samples_per_second": 512.494, "eval_steps_per_second": 64.078, "step": 7600 }, { "epoch": 3.94, "learning_rate": 3.0295566502463057e-05, "loss": 0.1979, "step": 8000 }, { "epoch": 3.94, "eval_accuracy": 0.8457361459732056, "eval_f1": 0.8150361187906895, "eval_loss": 0.4362601339817047, "eval_mcc": 0.6828095359569987, "eval_runtime": 52.375, "eval_samples_per_second": 513.394, "eval_steps_per_second": 64.191, "step": 8000 }, { "epoch": 4.14, "eval_accuracy": 0.8406411409378052, "eval_f1": 0.8119541844033878, "eval_loss": 0.4881964325904846, "eval_mcc": 0.6737691323117093, "eval_runtime": 52.3705, "eval_samples_per_second": 513.438, "eval_steps_per_second": 64.196, "step": 8400 }, { "epoch": 4.19, "learning_rate": 2.9064039408866993e-05, "loss": 0.1456, "step": 8500 }, { "epoch": 4.33, "eval_accuracy": 0.8454014658927917, "eval_f1": 0.8155641332800924, "eval_loss": 0.505477786064148, "eval_mcc": 0.682503608511531, "eval_runtime": 52.3839, "eval_samples_per_second": 513.306, "eval_steps_per_second": 64.18, "step": 8800 }, { "epoch": 4.43, "learning_rate": 2.7832512315270936e-05, "loss": 0.1379, "step": 9000 }, { "epoch": 4.53, "eval_accuracy": 0.8446948528289795, "eval_f1": 0.8108866950457387, "eval_loss": 0.47982802987098694, "eval_mcc": 0.6798065059175836, "eval_runtime": 52.4432, "eval_samples_per_second": 512.726, "eval_steps_per_second": 64.107, "step": 9200 }, { "epoch": 4.68, "learning_rate": 2.660098522167488e-05, "loss": 0.1396, "step": 9500 }, { "epoch": 4.73, "eval_accuracy": 0.8436535596847534, "eval_f1": 0.8182447038478167, "eval_loss": 0.4731481373310089, "eval_mcc": 0.681741240221474, "eval_runtime": 52.3629, "eval_samples_per_second": 513.513, "eval_steps_per_second": 64.206, "step": 9600 }, { "epoch": 4.73, "step": 9600, "total_flos": 9.609924849312154e+16, "train_loss": 0.29670360455910366, "train_runtime": 6262.587, "train_samples_per_second": 388.814, "train_steps_per_second": 3.241 } ], "max_steps": 20300, "num_train_epochs": 10, "total_flos": 9.609924849312154e+16, "trial_name": null, "trial_params": null }