{ "best_metric": 0.3541961908340454, "best_model_checkpoint": "./runtime-text-classification/electra-base-discriminator-CoLA/checkpoint-134", "epoch": 8.0, "global_step": 536, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 2.3529411764705884e-05, "loss": 0.6818, "step": 5 }, { "epoch": 0.15, "learning_rate": 4.705882352941177e-05, "loss": 0.6093, "step": 10 }, { "epoch": 0.22, "learning_rate": 7.058823529411765e-05, "loss": 0.5991, "step": 15 }, { "epoch": 0.3, "learning_rate": 7.999340483599093e-05, "loss": 0.5014, "step": 20 }, { "epoch": 0.37, "learning_rate": 7.995310893151923e-05, "loss": 0.4253, "step": 25 }, { "epoch": 0.45, "learning_rate": 7.987621796687375e-05, "loss": 0.4231, "step": 30 }, { "epoch": 0.52, "learning_rate": 7.976280237035291e-05, "loss": 0.411, "step": 35 }, { "epoch": 0.6, "learning_rate": 7.964584084578578e-05, "loss": 0.4267, "step": 40 }, { "epoch": 0.67, "learning_rate": 7.95056379670812e-05, "loss": 0.4357, "step": 45 }, { "epoch": 0.75, "learning_rate": 7.929782771972567e-05, "loss": 0.4468, "step": 50 }, { "epoch": 0.82, "learning_rate": 7.905402261718713e-05, "loss": 0.4651, "step": 55 }, { "epoch": 0.9, "learning_rate": 7.877444597281055e-05, "loss": 0.3906, "step": 60 }, { "epoch": 0.97, "learning_rate": 7.845935386489366e-05, "loss": 0.4004, "step": 65 }, { "epoch": 1.0, "eval_loss": 0.35688334703445435, "eval_matthews_correlation": 0.6339887598158311, "eval_runtime": 1.9677, "eval_samples_per_second": 530.071, "eval_steps_per_second": 33.542, "step": 67 }, { "epoch": 1.04, "learning_rate": 7.810903490213196e-05, "loss": 0.3209, "step": 70 }, { "epoch": 1.12, "learning_rate": 7.77238099592675e-05, "loss": 0.3024, "step": 75 }, { "epoch": 1.19, "learning_rate": 7.730403188318356e-05, "loss": 0.2579, "step": 80 }, { "epoch": 1.27, "learning_rate": 7.685008516971451e-05, "loss": 0.3063, "step": 85 }, { "epoch": 1.34, "learning_rate": 7.636238561146672e-05, "loss": 0.2636, "step": 90 }, { "epoch": 1.42, "learning_rate": 7.58413799169732e-05, "loss": 0.294, "step": 95 }, { "epoch": 1.49, "learning_rate": 7.528754530153087e-05, "loss": 0.2836, "step": 100 }, { "epoch": 1.57, "learning_rate": 7.470138905009503e-05, "loss": 0.3346, "step": 105 }, { "epoch": 1.64, "learning_rate": 7.408344805263159e-05, "loss": 0.2939, "step": 110 }, { "epoch": 1.72, "learning_rate": 7.343428831235258e-05, "loss": 0.2815, "step": 115 }, { "epoch": 1.79, "learning_rate": 7.275450442728538e-05, "loss": 0.2799, "step": 120 }, { "epoch": 1.87, "learning_rate": 7.204471904565049e-05, "loss": 0.2491, "step": 125 }, { "epoch": 1.94, "learning_rate": 7.130558229554668e-05, "loss": 0.2843, "step": 130 }, { "epoch": 2.0, "eval_loss": 0.3541961908340454, "eval_matthews_correlation": 0.6579677841732349, "eval_runtime": 2.0826, "eval_samples_per_second": 500.817, "eval_steps_per_second": 31.691, "step": 134 }, { "epoch": 2.01, "learning_rate": 7.053777118946615e-05, "loss": 0.2591, "step": 135 }, { "epoch": 2.09, "learning_rate": 6.974198900418456e-05, "loss": 0.182, "step": 140 }, { "epoch": 2.16, "learning_rate": 6.89189646365947e-05, "loss": 0.1888, "step": 145 }, { "epoch": 2.24, "learning_rate": 6.806945193607303e-05, "loss": 0.1455, "step": 150 }, { "epoch": 2.31, "learning_rate": 6.719422901399121e-05, "loss": 0.1544, "step": 155 }, { "epoch": 2.39, "learning_rate": 6.629409753100488e-05, "loss": 0.1622, "step": 160 }, { "epoch": 2.46, "learning_rate": 6.536988196277224e-05, "loss": 0.1784, "step": 165 }, { "epoch": 2.54, "learning_rate": 6.442242884477545e-05, "loss": 0.203, "step": 170 }, { "epoch": 2.61, "learning_rate": 6.345260599693621e-05, "loss": 0.1598, "step": 175 }, { "epoch": 2.69, "learning_rate": 6.24613017287359e-05, "loss": 0.1555, "step": 180 }, { "epoch": 2.76, "learning_rate": 6.144942402556829e-05, "loss": 0.1363, "step": 185 }, { "epoch": 2.84, "learning_rate": 6.041789971707008e-05, "loss": 0.1907, "step": 190 }, { "epoch": 2.91, "learning_rate": 5.9367673628191066e-05, "loss": 0.1935, "step": 195 }, { "epoch": 2.99, "learning_rate": 5.8299707713781464e-05, "loss": 0.1228, "step": 200 }, { "epoch": 3.0, "eval_loss": 0.42005565762519836, "eval_matthews_correlation": 0.6412441886324401, "eval_runtime": 1.931, "eval_samples_per_second": 540.144, "eval_steps_per_second": 34.18, "step": 201 }, { "epoch": 3.06, "learning_rate": 5.721498017748917e-05, "loss": 0.1156, "step": 205 }, { "epoch": 3.13, "learning_rate": 5.6114484575773854e-05, "loss": 0.0937, "step": 210 }, { "epoch": 3.21, "learning_rate": 5.499922890785868e-05, "loss": 0.1138, "step": 215 }, { "epoch": 3.28, "learning_rate": 5.3870234692453145e-05, "loss": 0.0948, "step": 220 }, { "epoch": 3.36, "learning_rate": 5.272853603209279e-05, "loss": 0.0777, "step": 225 }, { "epoch": 3.43, "learning_rate": 5.15751786659527e-05, "loss": 0.1148, "step": 230 }, { "epoch": 3.51, "learning_rate": 5.0411219012002445e-05, "loss": 0.1676, "step": 235 }, { "epoch": 3.58, "learning_rate": 4.92377231993798e-05, "loss": 0.0868, "step": 240 }, { "epoch": 3.66, "learning_rate": 4.805576609186946e-05, "loss": 0.1009, "step": 245 }, { "epoch": 3.73, "learning_rate": 4.686643030338131e-05, "loss": 0.1072, "step": 250 }, { "epoch": 3.81, "learning_rate": 4.5670805206329904e-05, "loss": 0.0908, "step": 255 }, { "epoch": 3.88, "learning_rate": 4.4469985933823594e-05, "loss": 0.1031, "step": 260 }, { "epoch": 3.96, "learning_rate": 4.326507237657703e-05, "loss": 0.0989, "step": 265 }, { "epoch": 4.0, "eval_loss": 0.4780134856700897, "eval_matthews_correlation": 0.6757073194553476, "eval_runtime": 1.9344, "eval_samples_per_second": 539.172, "eval_steps_per_second": 34.118, "step": 268 }, { "epoch": 4.03, "learning_rate": 4.205716817546599e-05, "loss": 0.0854, "step": 270 }, { "epoch": 4.1, "learning_rate": 4.084737971064728e-05, "loss": 0.0493, "step": 275 }, { "epoch": 4.18, "learning_rate": 3.9636815088169476e-05, "loss": 0.0902, "step": 280 }, { "epoch": 4.25, "learning_rate": 3.842658312500302e-05, "loss": 0.0567, "step": 285 }, { "epoch": 4.33, "learning_rate": 3.721779233341892e-05, "loss": 0.0549, "step": 290 }, { "epoch": 4.4, "learning_rate": 3.6011549905646726e-05, "loss": 0.0596, "step": 295 }, { "epoch": 4.48, "learning_rate": 3.480896069974151e-05, "loss": 0.048, "step": 300 }, { "epoch": 4.55, "learning_rate": 3.361112622758889e-05, "loss": 0.081, "step": 305 }, { "epoch": 4.63, "learning_rate": 3.241914364597488e-05, "loss": 0.0871, "step": 310 }, { "epoch": 4.7, "learning_rate": 3.123410475164486e-05, "loss": 0.062, "step": 315 }, { "epoch": 4.78, "learning_rate": 3.005709498127213e-05, "loss": 0.0763, "step": 320 }, { "epoch": 4.85, "learning_rate": 2.8889192417251865e-05, "loss": 0.0634, "step": 325 }, { "epoch": 4.93, "learning_rate": 2.7731466800231328e-05, "loss": 0.0798, "step": 330 }, { "epoch": 5.0, "learning_rate": 2.658497854928058e-05, "loss": 0.0681, "step": 335 }, { "epoch": 5.0, "eval_loss": 0.49001187086105347, "eval_matthews_correlation": 0.6925407764960233, "eval_runtime": 1.9334, "eval_samples_per_second": 539.471, "eval_steps_per_second": 34.137, "step": 335 }, { "epoch": 5.07, "learning_rate": 2.5450777790601394e-05, "loss": 0.0465, "step": 340 }, { "epoch": 5.15, "learning_rate": 2.432990339566394e-05, "loss": 0.0674, "step": 345 }, { "epoch": 5.22, "learning_rate": 2.322338202965213e-05, "loss": 0.0419, "step": 350 }, { "epoch": 5.3, "learning_rate": 2.2132227211089496e-05, "loss": 0.0481, "step": 355 }, { "epoch": 5.37, "learning_rate": 2.1057438383506473e-05, "loss": 0.0362, "step": 360 }, { "epoch": 5.45, "learning_rate": 2.0000000000000012e-05, "loss": 0.0336, "step": 365 }, { "epoch": 5.52, "learning_rate": 1.89608806215234e-05, "loss": 0.0571, "step": 370 }, { "epoch": 5.6, "learning_rate": 1.79410320297327e-05, "loss": 0.0459, "step": 375 }, { "epoch": 5.67, "learning_rate": 1.694138835520206e-05, "loss": 0.0238, "step": 380 }, { "epoch": 5.75, "learning_rate": 1.5962865221806545e-05, "loss": 0.054, "step": 385 }, { "epoch": 5.82, "learning_rate": 1.5006358908056302e-05, "loss": 0.017, "step": 390 }, { "epoch": 5.9, "learning_rate": 1.4072745526149944e-05, "loss": 0.0224, "step": 395 }, { "epoch": 5.97, "learning_rate": 1.316288021949944e-05, "loss": 0.0506, "step": 400 }, { "epoch": 6.0, "eval_loss": 0.5837473273277283, "eval_matthews_correlation": 0.6785370776792806, "eval_runtime": 1.9297, "eval_samples_per_second": 540.494, "eval_steps_per_second": 34.202, "step": 402 }, { "epoch": 6.04, "learning_rate": 1.2277596379461248e-05, "loss": 0.0317, "step": 405 }, { "epoch": 6.12, "learning_rate": 1.1417704881991271e-05, "loss": 0.0451, "step": 410 }, { "epoch": 6.19, "learning_rate": 1.0583993344922905e-05, "loss": 0.0061, "step": 415 }, { "epoch": 6.27, "learning_rate": 9.777225406548166e-06, "loss": 0.028, "step": 420 }, { "epoch": 6.34, "learning_rate": 8.998140026163119e-06, "loss": 0.0358, "step": 425 }, { "epoch": 6.42, "learning_rate": 8.24745080721788e-06, "loss": 0.0154, "step": 430 }, { "epoch": 6.49, "learning_rate": 7.5258453436913185e-06, "loss": 0.0194, "step": 435 }, { "epoch": 6.57, "learning_rate": 6.83398459028922e-06, "loss": 0.0292, "step": 440 }, { "epoch": 6.64, "learning_rate": 6.172502257042672e-06, "loss": 0.0473, "step": 445 }, { "epoch": 6.72, "learning_rate": 5.542004228861171e-06, "loss": 0.0327, "step": 450 }, { "epoch": 6.79, "learning_rate": 4.94306801057221e-06, "loss": 0.0106, "step": 455 }, { "epoch": 6.87, "learning_rate": 4.376242197955542e-06, "loss": 0.0226, "step": 460 }, { "epoch": 6.94, "learning_rate": 3.842045975256774e-06, "loss": 0.0093, "step": 465 }, { "epoch": 7.0, "eval_loss": 0.6298416256904602, "eval_matthews_correlation": 0.6651627094568674, "eval_runtime": 2.9643, "eval_samples_per_second": 351.848, "eval_steps_per_second": 22.265, "step": 469 }, { "epoch": 7.01, "learning_rate": 3.340968639640463e-06, "loss": 0.0178, "step": 470 }, { "epoch": 7.09, "learning_rate": 2.8734691530181603e-06, "loss": 0.0129, "step": 475 }, { "epoch": 7.16, "learning_rate": 2.439975721662231e-06, "loss": 0.034, "step": 480 }, { "epoch": 7.24, "learning_rate": 2.040885403990136e-06, "loss": 0.0067, "step": 485 }, { "epoch": 7.31, "learning_rate": 1.6765637468787011e-06, "loss": 0.0251, "step": 490 }, { "epoch": 7.39, "learning_rate": 1.3473444508414102e-06, "loss": 0.0137, "step": 495 }, { "epoch": 7.46, "learning_rate": 1.0535290643752448e-06, "loss": 0.0125, "step": 500 }, { "epoch": 7.54, "learning_rate": 7.953867077573396e-07, "loss": 0.0213, "step": 505 }, { "epoch": 7.61, "learning_rate": 5.731538265441572e-07, "loss": 0.0297, "step": 510 }, { "epoch": 7.69, "learning_rate": 3.870339749991292e-07, "loss": 0.0356, "step": 515 }, { "epoch": 7.76, "learning_rate": 2.3719762964709903e-07, "loss": 0.0213, "step": 520 }, { "epoch": 7.84, "learning_rate": 1.2378203312626292e-07, "loss": 0.0122, "step": 525 }, { "epoch": 7.91, "learning_rate": 4.689106848078773e-08, "loss": 0.0355, "step": 530 }, { "epoch": 7.99, "learning_rate": 6.595164009066679e-09, "loss": 0.0244, "step": 535 }, { "epoch": 8.0, "eval_loss": 0.6291629076004028, "eval_matthews_correlation": 0.6750024002589107, "eval_runtime": 2.0387, "eval_samples_per_second": 511.593, "eval_steps_per_second": 32.373, "step": 536 }, { "epoch": 8.0, "step": 536, "total_flos": 4499725164216320.0, "train_loss": 0.148403340177402, "train_runtime": 707.366, "train_samples_per_second": 96.708, "train_steps_per_second": 0.758 } ], "max_steps": 536, "num_train_epochs": 8, "total_flos": 4499725164216320.0, "trial_name": null, "trial_params": null }