|
{ |
|
"best_metric": 0.9835737347602844, |
|
"best_model_checkpoint": "wav2vec2-base-lang-id/checkpoint-1557", |
|
"epoch": 9.997118155619596, |
|
"global_step": 1730, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.7341040462427742e-05, |
|
"loss": 3.8045, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.4682080924855485e-05, |
|
"loss": 3.795, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5.2023121387283234e-05, |
|
"loss": 3.7695, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 6.936416184971097e-05, |
|
"loss": 3.748, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.670520231213873e-05, |
|
"loss": 3.7078, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00010404624277456647, |
|
"loss": 3.7034, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00012138728323699421, |
|
"loss": 3.6422, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00013872832369942194, |
|
"loss": 3.5837, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00015606936416184968, |
|
"loss": 3.4935, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00017341040462427745, |
|
"loss": 3.4542, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001907514450867052, |
|
"loss": 3.3921, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00020809248554913294, |
|
"loss": 3.3001, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022543352601156065, |
|
"loss": 3.272, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00024277456647398842, |
|
"loss": 3.1537, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00026011560693641616, |
|
"loss": 3.1013, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0002774566473988439, |
|
"loss": 2.9714, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00029479768786127165, |
|
"loss": 2.9568, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.11463994565217392, |
|
"eval_loss": 3.2865560054779053, |
|
"eval_runtime": 166.1472, |
|
"eval_samples_per_second": 35.438, |
|
"eval_steps_per_second": 8.86, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00029865125240847784, |
|
"loss": 3.2585, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0002967244701348747, |
|
"loss": 2.9694, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00029479768786127165, |
|
"loss": 2.7459, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0002928709055876686, |
|
"loss": 2.6486, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0002909441233140655, |
|
"loss": 2.6249, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0002890173410404624, |
|
"loss": 2.5935, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0002870905587668593, |
|
"loss": 2.422, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0002851637764932562, |
|
"loss": 2.4574, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0002832369942196532, |
|
"loss": 2.427, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00028131021194605007, |
|
"loss": 2.3682, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.000279383429672447, |
|
"loss": 2.2995, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0002774566473988439, |
|
"loss": 2.1737, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0002755298651252408, |
|
"loss": 2.1063, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00027360308285163774, |
|
"loss": 2.0759, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0002716763005780347, |
|
"loss": 1.9768, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00026974951830443155, |
|
"loss": 2.0376, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0002678227360308285, |
|
"loss": 1.9243, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.38400135869565216, |
|
"eval_loss": 2.124117612838745, |
|
"eval_runtime": 166.2747, |
|
"eval_samples_per_second": 35.411, |
|
"eval_steps_per_second": 8.853, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0002658959537572254, |
|
"loss": 1.8395, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00026396917148362235, |
|
"loss": 1.7052, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00026204238921001923, |
|
"loss": 1.6084, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00026011560693641616, |
|
"loss": 1.5298, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.0002581888246628131, |
|
"loss": 1.5755, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00025626204238921, |
|
"loss": 1.4869, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0002543352601156069, |
|
"loss": 1.5374, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00025240847784200384, |
|
"loss": 1.448, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.00025048169556840077, |
|
"loss": 1.4246, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.00024855491329479765, |
|
"loss": 1.4545, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.0002466281310211946, |
|
"loss": 1.4417, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.00024470134874759146, |
|
"loss": 1.3385, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.00024277456647398842, |
|
"loss": 1.4229, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.00024084778420038535, |
|
"loss": 1.4225, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.00023892100192678226, |
|
"loss": 1.4259, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.00023699421965317916, |
|
"loss": 1.3656, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.00023506743737957607, |
|
"loss": 1.2923, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5489130434782609, |
|
"eval_loss": 1.5498348474502563, |
|
"eval_runtime": 166.2498, |
|
"eval_samples_per_second": 35.417, |
|
"eval_steps_per_second": 8.854, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.00023314065510597303, |
|
"loss": 1.3854, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.00023121387283236994, |
|
"loss": 1.1289, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.00022928709055876684, |
|
"loss": 0.921, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.00022736030828516375, |
|
"loss": 1.004, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.00022543352601156065, |
|
"loss": 1.0642, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.0002235067437379576, |
|
"loss": 1.0275, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.00022157996146435452, |
|
"loss": 0.9369, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 0.00021965317919075142, |
|
"loss": 0.9486, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00021772639691714833, |
|
"loss": 1.0021, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.00021579961464354526, |
|
"loss": 0.8916, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.0002138728323699422, |
|
"loss": 0.9455, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.0002119460500963391, |
|
"loss": 0.9796, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.000210019267822736, |
|
"loss": 0.8681, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.00020809248554913294, |
|
"loss": 0.948, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.00020616570327552984, |
|
"loss": 0.9276, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 0.00020423892100192677, |
|
"loss": 0.847, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.00020231213872832368, |
|
"loss": 0.9261, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.0002003853564547206, |
|
"loss": 0.8659, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6126019021739131, |
|
"eval_loss": 1.4952939748764038, |
|
"eval_runtime": 165.9075, |
|
"eval_samples_per_second": 35.49, |
|
"eval_steps_per_second": 8.872, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.00019845857418111752, |
|
"loss": 0.6758, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.00019653179190751442, |
|
"loss": 0.5885, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.00019460500963391133, |
|
"loss": 0.6432, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.0001926782273603083, |
|
"loss": 0.6774, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.0001907514450867052, |
|
"loss": 0.7333, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.0001888246628131021, |
|
"loss": 0.6775, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.000186897880539499, |
|
"loss": 0.6461, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.0001849710982658959, |
|
"loss": 0.6192, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.00018304431599229287, |
|
"loss": 0.569, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.00018131021194605008, |
|
"loss": 0.6447, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 0.00017938342967244698, |
|
"loss": 0.556, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.0001774566473988439, |
|
"loss": 0.6883, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.00017552986512524085, |
|
"loss": 0.6146, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.00017360308285163775, |
|
"loss": 0.6318, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 0.00017167630057803466, |
|
"loss": 0.5785, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 0.00016974951830443156, |
|
"loss": 0.6383, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.0001678227360308285, |
|
"loss": 0.5539, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6925951086956522, |
|
"eval_loss": 1.2430508136749268, |
|
"eval_runtime": 166.7541, |
|
"eval_samples_per_second": 35.309, |
|
"eval_steps_per_second": 8.827, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.00016589595375722543, |
|
"loss": 0.5354, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.00016396917148362233, |
|
"loss": 0.4011, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.00016204238921001924, |
|
"loss": 0.4009, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.00016011560693641617, |
|
"loss": 0.459, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.00015818882466281308, |
|
"loss": 0.3829, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.00015626204238921, |
|
"loss": 0.428, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.00015433526011560692, |
|
"loss": 0.4654, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.00015240847784200385, |
|
"loss": 0.4531, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.00015067437379576106, |
|
"loss": 0.4221, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.000148747591522158, |
|
"loss": 0.3927, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 0.0001468208092485549, |
|
"loss": 0.4451, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.00014489402697495183, |
|
"loss": 0.4363, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.00014296724470134873, |
|
"loss": 0.4583, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 0.00014104046242774566, |
|
"loss": 0.3788, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.00013911368015414257, |
|
"loss": 0.4333, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.0001371868978805395, |
|
"loss": 0.3448, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 0.0001352601156069364, |
|
"loss": 0.4101, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7231657608695652, |
|
"eval_loss": 1.1443275213241577, |
|
"eval_runtime": 167.1514, |
|
"eval_samples_per_second": 35.226, |
|
"eval_steps_per_second": 8.806, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 0.0001333333333333333, |
|
"loss": 0.4535, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.00013140655105973025, |
|
"loss": 0.3461, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.00012947976878612715, |
|
"loss": 0.2761, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.00012755298651252408, |
|
"loss": 0.2911, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.000125626204238921, |
|
"loss": 0.2985, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.0001236994219653179, |
|
"loss": 0.2858, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00012177263969171483, |
|
"loss": 0.3162, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00011984585741811175, |
|
"loss": 0.3326, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.00011791907514450866, |
|
"loss": 0.3024, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.00011599229287090558, |
|
"loss": 0.2981, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.00011406551059730249, |
|
"loss": 0.3112, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 0.00011213872832369942, |
|
"loss": 0.2206, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.00011021194605009633, |
|
"loss": 0.3052, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.00010828516377649323, |
|
"loss": 0.2799, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 0.00010635838150289017, |
|
"loss": 0.2793, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 0.00010443159922928707, |
|
"loss": 0.2992, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 0.000102504816955684, |
|
"loss": 0.3151, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.00010057803468208092, |
|
"loss": 0.2945, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7544157608695652, |
|
"eval_loss": 1.0869863033294678, |
|
"eval_runtime": 167.4711, |
|
"eval_samples_per_second": 35.158, |
|
"eval_steps_per_second": 8.79, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 9.865125240847783e-05, |
|
"loss": 0.2053, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 9.672447013487476e-05, |
|
"loss": 0.1619, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 9.479768786127167e-05, |
|
"loss": 0.2176, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 9.28709055876686e-05, |
|
"loss": 0.1907, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 9.09441233140655e-05, |
|
"loss": 0.1517, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 8.901734104046241e-05, |
|
"loss": 0.1932, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 8.709055876685934e-05, |
|
"loss": 0.2175, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 8.516377649325625e-05, |
|
"loss": 0.1794, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 8.323699421965317e-05, |
|
"loss": 0.1934, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 8.131021194605009e-05, |
|
"loss": 0.2138, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 7.9383429672447e-05, |
|
"loss": 0.1451, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 7.745664739884392e-05, |
|
"loss": 0.1691, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 7.552986512524084e-05, |
|
"loss": 0.1672, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 7.360308285163776e-05, |
|
"loss": 0.2004, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 7.167630057803468e-05, |
|
"loss": 0.1925, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 6.974951830443159e-05, |
|
"loss": 0.1649, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 6.78227360308285e-05, |
|
"loss": 0.1552, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7661345108695652, |
|
"eval_loss": 1.1080151796340942, |
|
"eval_runtime": 167.1864, |
|
"eval_samples_per_second": 35.218, |
|
"eval_steps_per_second": 8.805, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 6.589595375722542e-05, |
|
"loss": 0.1643, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 6.396917148362234e-05, |
|
"loss": 0.08, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 6.204238921001926e-05, |
|
"loss": 0.1022, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 6.0115606936416174e-05, |
|
"loss": 0.1466, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 5.81888246628131e-05, |
|
"loss": 0.1262, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 5.626204238921002e-05, |
|
"loss": 0.0895, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 5.433526011560694e-05, |
|
"loss": 0.1155, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 5.2408477842003843e-05, |
|
"loss": 0.1205, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 5.048169556840076e-05, |
|
"loss": 0.1196, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 4.855491329479768e-05, |
|
"loss": 0.1488, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 4.66281310211946e-05, |
|
"loss": 0.1263, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 4.470134874759151e-05, |
|
"loss": 0.1289, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 4.277456647398843e-05, |
|
"loss": 0.1628, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 4.084778420038535e-05, |
|
"loss": 0.1665, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 3.892100192678227e-05, |
|
"loss": 0.1234, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 3.699421965317919e-05, |
|
"loss": 0.1146, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 3.506743737957611e-05, |
|
"loss": 0.0968, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7944972826086957, |
|
"eval_loss": 0.9835737347602844, |
|
"eval_runtime": 167.3463, |
|
"eval_samples_per_second": 35.185, |
|
"eval_steps_per_second": 8.796, |
|
"step": 1557 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 3.314065510597302e-05, |
|
"loss": 0.1091, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 3.121387283236994e-05, |
|
"loss": 0.1036, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 2.9287090558766855e-05, |
|
"loss": 0.1183, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 2.7360308285163774e-05, |
|
"loss": 0.0903, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 2.543352601156069e-05, |
|
"loss": 0.0808, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 2.350674373795761e-05, |
|
"loss": 0.0749, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 2.1579961464354524e-05, |
|
"loss": 0.0523, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 1.9653179190751443e-05, |
|
"loss": 0.1112, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 1.7726396917148362e-05, |
|
"loss": 0.0949, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 1.5799614643545278e-05, |
|
"loss": 0.0858, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 1.3872832369942195e-05, |
|
"loss": 0.0603, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 1.1946050096339112e-05, |
|
"loss": 0.0718, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 1.0019267822736031e-05, |
|
"loss": 0.069, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 8.092485549132947e-06, |
|
"loss": 0.0748, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 6.165703275529864e-06, |
|
"loss": 0.0831, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 4.2389210019267816e-06, |
|
"loss": 0.0635, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 2.3121387283236993e-06, |
|
"loss": 0.0675, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.853564547206165e-07, |
|
"loss": 0.0623, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7992527173913043, |
|
"eval_loss": 1.0252065658569336, |
|
"eval_runtime": 167.8251, |
|
"eval_samples_per_second": 35.084, |
|
"eval_steps_per_second": 8.771, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 1730, |
|
"total_flos": 1.8408056270747849e+19, |
|
"train_loss": 1.0006333545797823, |
|
"train_runtime": 3643.6893, |
|
"train_samples_per_second": 60.911, |
|
"train_steps_per_second": 0.475 |
|
} |
|
], |
|
"max_steps": 1730, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.8408056270747849e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|