|
{ |
|
"best_metric": 0.7166821507529032, |
|
"best_model_checkpoint": "finetuned-affecthq/checkpoint-1740", |
|
"epoch": 10.0, |
|
"global_step": 1740, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.747126436781609e-07, |
|
"loss": 1.943, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.1494252873563219e-06, |
|
"loss": 1.9472, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.724137931034483e-06, |
|
"loss": 1.9405, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.2988505747126437e-06, |
|
"loss": 1.9412, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.8735632183908046e-06, |
|
"loss": 1.9366, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.448275862068966e-06, |
|
"loss": 1.9228, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.022988505747127e-06, |
|
"loss": 1.9197, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.5977011494252875e-06, |
|
"loss": 1.9044, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.172413793103449e-06, |
|
"loss": 1.8946, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5.747126436781609e-06, |
|
"loss": 1.88, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.321839080459771e-06, |
|
"loss": 1.8502, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.896551724137932e-06, |
|
"loss": 1.8237, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.4712643678160925e-06, |
|
"loss": 1.7791, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 8.045977011494253e-06, |
|
"loss": 1.73, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.620689655172414e-06, |
|
"loss": 1.6821, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.195402298850575e-06, |
|
"loss": 1.6046, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.770114942528738e-06, |
|
"loss": 1.5413, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4897592526051024, |
|
"eval_f1": 0.44086225397711515, |
|
"eval_loss": 1.481001615524292, |
|
"eval_precision": 0.4866510289564923, |
|
"eval_recall": 0.4897592526051024, |
|
"eval_runtime": 120.4538, |
|
"eval_samples_per_second": 23.104, |
|
"eval_steps_per_second": 0.722, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.96168582375479e-06, |
|
"loss": 1.4788, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.897828863346106e-06, |
|
"loss": 1.4199, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.833971902937422e-06, |
|
"loss": 1.3742, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 9.770114942528738e-06, |
|
"loss": 1.3286, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 9.706257982120052e-06, |
|
"loss": 1.2958, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 9.642401021711367e-06, |
|
"loss": 1.2637, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 9.578544061302683e-06, |
|
"loss": 1.2016, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 9.514687100893997e-06, |
|
"loss": 1.1857, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9.450830140485315e-06, |
|
"loss": 1.1846, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.386973180076629e-06, |
|
"loss": 1.1625, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.323116219667945e-06, |
|
"loss": 1.1132, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.25925925925926e-06, |
|
"loss": 1.125, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 9.195402298850575e-06, |
|
"loss": 1.0897, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 9.13154533844189e-06, |
|
"loss": 1.0997, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.067688378033207e-06, |
|
"loss": 1.1013, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.003831417624522e-06, |
|
"loss": 1.0534, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 8.939974457215838e-06, |
|
"loss": 1.0367, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6155228171038448, |
|
"eval_f1": 0.6040554573829957, |
|
"eval_loss": 1.057146430015564, |
|
"eval_precision": 0.6172414617774471, |
|
"eval_recall": 0.6155228171038448, |
|
"eval_runtime": 114.6311, |
|
"eval_samples_per_second": 24.278, |
|
"eval_steps_per_second": 0.759, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 8.876117496807152e-06, |
|
"loss": 1.0755, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 8.812260536398468e-06, |
|
"loss": 1.0458, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.748403575989784e-06, |
|
"loss": 1.0355, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.684546615581098e-06, |
|
"loss": 1.0074, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 8.620689655172414e-06, |
|
"loss": 1.0282, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 8.55683269476373e-06, |
|
"loss": 0.9839, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 8.492975734355046e-06, |
|
"loss": 1.0115, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 8.429118773946362e-06, |
|
"loss": 1.0112, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.365261813537676e-06, |
|
"loss": 1.0144, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.301404853128992e-06, |
|
"loss": 0.9901, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 8.237547892720307e-06, |
|
"loss": 0.9606, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 8.173690932311623e-06, |
|
"loss": 0.9495, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 8.10983397190294e-06, |
|
"loss": 0.9854, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.045977011494253e-06, |
|
"loss": 0.9145, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 7.98212005108557e-06, |
|
"loss": 0.9542, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 7.918263090676885e-06, |
|
"loss": 0.9523, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.854406130268199e-06, |
|
"loss": 0.9471, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 7.790549169859515e-06, |
|
"loss": 0.9534, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.647502694933525, |
|
"eval_f1": 0.6374814059701129, |
|
"eval_loss": 0.9672996997833252, |
|
"eval_precision": 0.6476383048329313, |
|
"eval_recall": 0.647502694933525, |
|
"eval_runtime": 119.4635, |
|
"eval_samples_per_second": 23.296, |
|
"eval_steps_per_second": 0.728, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 7.72669220945083e-06, |
|
"loss": 0.9929, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 7.662835249042147e-06, |
|
"loss": 0.8761, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.598978288633462e-06, |
|
"loss": 0.943, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 7.535121328224777e-06, |
|
"loss": 0.919, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 7.4712643678160925e-06, |
|
"loss": 0.9353, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.9132, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 7.343550446998724e-06, |
|
"loss": 0.8632, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 7.279693486590039e-06, |
|
"loss": 0.9182, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 7.215836526181354e-06, |
|
"loss": 0.9147, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 7.15197956577267e-06, |
|
"loss": 0.8804, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 7.088122605363985e-06, |
|
"loss": 0.8728, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 7.0242656449553e-06, |
|
"loss": 0.9024, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 6.960408684546616e-06, |
|
"loss": 0.8856, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 6.896551724137932e-06, |
|
"loss": 0.8737, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 6.8326947637292475e-06, |
|
"loss": 0.8769, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 6.7688378033205625e-06, |
|
"loss": 0.8458, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 6.7049808429118775e-06, |
|
"loss": 0.8532, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6748113546532519, |
|
"eval_f1": 0.6704058091543136, |
|
"eval_loss": 0.9056226015090942, |
|
"eval_precision": 0.671011997838509, |
|
"eval_recall": 0.6748113546532519, |
|
"eval_runtime": 122.0902, |
|
"eval_samples_per_second": 22.795, |
|
"eval_steps_per_second": 0.713, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 6.641123882503193e-06, |
|
"loss": 0.8764, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 6.577266922094508e-06, |
|
"loss": 0.8438, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 6.513409961685824e-06, |
|
"loss": 0.859, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 6.44955300127714e-06, |
|
"loss": 0.8099, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 6.385696040868455e-06, |
|
"loss": 0.8806, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 6.321839080459771e-06, |
|
"loss": 0.8234, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 6.257982120051086e-06, |
|
"loss": 0.8176, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 6.194125159642402e-06, |
|
"loss": 0.82, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 6.130268199233717e-06, |
|
"loss": 0.8561, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 6.0664112388250325e-06, |
|
"loss": 0.8107, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 6.002554278416348e-06, |
|
"loss": 0.8412, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 5.938697318007663e-06, |
|
"loss": 0.8313, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 5.874840357598979e-06, |
|
"loss": 0.7896, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 5.810983397190294e-06, |
|
"loss": 0.8225, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 5.747126436781609e-06, |
|
"loss": 0.8159, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 5.683269476372925e-06, |
|
"loss": 0.8163, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 5.619412515964241e-06, |
|
"loss": 0.8419, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.8211, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6902623068630974, |
|
"eval_f1": 0.6835928391775077, |
|
"eval_loss": 0.8707393407821655, |
|
"eval_precision": 0.6911502048300588, |
|
"eval_recall": 0.6902623068630974, |
|
"eval_runtime": 119.881, |
|
"eval_samples_per_second": 23.215, |
|
"eval_steps_per_second": 0.726, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 5.491698595146872e-06, |
|
"loss": 0.8147, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 5.427841634738187e-06, |
|
"loss": 0.8019, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 5.3639846743295025e-06, |
|
"loss": 0.7958, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 5.3001277139208175e-06, |
|
"loss": 0.7809, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 5.236270753512134e-06, |
|
"loss": 0.7725, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 5.172413793103449e-06, |
|
"loss": 0.7699, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 5.108556832694764e-06, |
|
"loss": 0.7894, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 5.04469987228608e-06, |
|
"loss": 0.7626, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 4.980842911877395e-06, |
|
"loss": 0.7706, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 4.916985951468711e-06, |
|
"loss": 0.7901, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 4.853128991060026e-06, |
|
"loss": 0.7871, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 4.789272030651342e-06, |
|
"loss": 0.7821, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 4.7254150702426575e-06, |
|
"loss": 0.7738, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 4.6615581098339725e-06, |
|
"loss": 0.7532, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 4.5977011494252875e-06, |
|
"loss": 0.7959, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 4.533844189016603e-06, |
|
"loss": 0.7924, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 4.469987228607919e-06, |
|
"loss": 0.7797, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7049946101329501, |
|
"eval_f1": 0.7018550701144215, |
|
"eval_loss": 0.8472315669059753, |
|
"eval_precision": 0.7049811673336778, |
|
"eval_recall": 0.7049946101329501, |
|
"eval_runtime": 118.5302, |
|
"eval_samples_per_second": 23.479, |
|
"eval_steps_per_second": 0.734, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 4.406130268199234e-06, |
|
"loss": 0.7548, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 4.342273307790549e-06, |
|
"loss": 0.7448, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 4.278416347381865e-06, |
|
"loss": 0.7411, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 4.214559386973181e-06, |
|
"loss": 0.7411, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 4.150702426564496e-06, |
|
"loss": 0.7549, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 4.086845466155812e-06, |
|
"loss": 0.7561, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 4.022988505747127e-06, |
|
"loss": 0.7306, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 3.9591315453384425e-06, |
|
"loss": 0.7402, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 3.8952745849297575e-06, |
|
"loss": 0.7481, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 3.831417624521073e-06, |
|
"loss": 0.7514, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 3.7675606641123883e-06, |
|
"loss": 0.7209, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.7588, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 3.6398467432950196e-06, |
|
"loss": 0.7301, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 3.575989782886335e-06, |
|
"loss": 0.7595, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 3.51213282247765e-06, |
|
"loss": 0.7503, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 3.448275862068966e-06, |
|
"loss": 0.7577, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 3.3844189016602812e-06, |
|
"loss": 0.7816, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.711103126122889, |
|
"eval_f1": 0.7096465788234323, |
|
"eval_loss": 0.8298304677009583, |
|
"eval_precision": 0.709944412281279, |
|
"eval_recall": 0.711103126122889, |
|
"eval_runtime": 119.1978, |
|
"eval_samples_per_second": 23.348, |
|
"eval_steps_per_second": 0.73, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.3205619412515967e-06, |
|
"loss": 0.7419, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 3.256704980842912e-06, |
|
"loss": 0.7268, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 3.1928480204342275e-06, |
|
"loss": 0.7362, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 3.128991060025543e-06, |
|
"loss": 0.7113, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 3.0651340996168583e-06, |
|
"loss": 0.7304, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 3.001277139208174e-06, |
|
"loss": 0.7268, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 2.9374201787994896e-06, |
|
"loss": 0.7492, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 2.8735632183908046e-06, |
|
"loss": 0.7495, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 2.8097062579821204e-06, |
|
"loss": 0.7137, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 2.745849297573436e-06, |
|
"loss": 0.6653, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 2.6819923371647512e-06, |
|
"loss": 0.7458, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 2.618135376756067e-06, |
|
"loss": 0.7084, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 2.554278416347382e-06, |
|
"loss": 0.7152, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 2.4904214559386975e-06, |
|
"loss": 0.7296, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 2.426564495530013e-06, |
|
"loss": 0.7302, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 2.3627075351213288e-06, |
|
"loss": 0.7115, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 2.2988505747126437e-06, |
|
"loss": 0.7102, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 2.2349936143039596e-06, |
|
"loss": 0.7135, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.711103126122889, |
|
"eval_f1": 0.7104715357081519, |
|
"eval_loss": 0.8186008930206299, |
|
"eval_precision": 0.7115641078956643, |
|
"eval_recall": 0.711103126122889, |
|
"eval_runtime": 117.6876, |
|
"eval_samples_per_second": 23.647, |
|
"eval_steps_per_second": 0.739, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 2.1711366538952746e-06, |
|
"loss": 0.7341, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 2.1072796934865904e-06, |
|
"loss": 0.7087, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 2.043422733077906e-06, |
|
"loss": 0.6933, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 1.9795657726692212e-06, |
|
"loss": 0.6797, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 1.9157088122605367e-06, |
|
"loss": 0.7131, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.6984, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 1.7879948914431675e-06, |
|
"loss": 0.7297, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 1.724137931034483e-06, |
|
"loss": 0.682, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 1.6602809706257983e-06, |
|
"loss": 0.7342, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 1.5964240102171137e-06, |
|
"loss": 0.6765, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 1.5325670498084292e-06, |
|
"loss": 0.7025, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 1.4687100893997448e-06, |
|
"loss": 0.7083, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 1.4048531289910602e-06, |
|
"loss": 0.736, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 1.3409961685823756e-06, |
|
"loss": 0.7322, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 1.277139208173691e-06, |
|
"loss": 0.7155, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 1.2132822477650065e-06, |
|
"loss": 0.6858, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 1.1494252873563219e-06, |
|
"loss": 0.6697, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7139777218828602, |
|
"eval_f1": 0.7125796647900873, |
|
"eval_loss": 0.8142775893211365, |
|
"eval_precision": 0.7124041188610695, |
|
"eval_recall": 0.7139777218828602, |
|
"eval_runtime": 118.3402, |
|
"eval_samples_per_second": 23.517, |
|
"eval_steps_per_second": 0.735, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 1.0855683269476373e-06, |
|
"loss": 0.6946, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 1.021711366538953e-06, |
|
"loss": 0.7208, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 9.578544061302683e-07, |
|
"loss": 0.6807, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 8.939974457215837e-07, |
|
"loss": 0.6802, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 8.301404853128992e-07, |
|
"loss": 0.6758, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 7.662835249042146e-07, |
|
"loss": 0.701, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 7.024265644955301e-07, |
|
"loss": 0.6773, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 6.385696040868455e-07, |
|
"loss": 0.7187, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 5.747126436781609e-07, |
|
"loss": 0.718, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 5.108556832694765e-07, |
|
"loss": 0.6475, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 4.469987228607919e-07, |
|
"loss": 0.6674, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 3.831417624521073e-07, |
|
"loss": 0.7031, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 3.1928480204342276e-07, |
|
"loss": 0.6774, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 2.5542784163473823e-07, |
|
"loss": 0.714, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 1.9157088122605365e-07, |
|
"loss": 0.7166, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 1.2771392081736911e-07, |
|
"loss": 0.6785, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 6.385696040868456e-08, |
|
"loss": 0.6709, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.6765, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7179302910528207, |
|
"eval_f1": 0.7166821507529032, |
|
"eval_loss": 0.8116216063499451, |
|
"eval_precision": 0.7173911115103917, |
|
"eval_recall": 0.7179302910528207, |
|
"eval_runtime": 116.0529, |
|
"eval_samples_per_second": 23.98, |
|
"eval_steps_per_second": 0.75, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 1740, |
|
"total_flos": 1.7248939997743964e+19, |
|
"train_loss": 0.9384366534222132, |
|
"train_runtime": 18925.0232, |
|
"train_samples_per_second": 11.761, |
|
"train_steps_per_second": 0.092 |
|
} |
|
], |
|
"max_steps": 1740, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.7248939997743964e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|