{ "best_metric": 1.0, "best_model_checkpoint": "vit-base-patch16-224-in21k-face-recognition/checkpoint-1488", "epoch": 8.0, "global_step": 2976, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.026845637583892e-06, "loss": 2.214, "step": 10 }, { "epoch": 0.05, "learning_rate": 8.053691275167785e-06, "loss": 2.1532, "step": 20 }, { "epoch": 0.08, "learning_rate": 1.2080536912751678e-05, "loss": 2.0195, "step": 30 }, { "epoch": 0.11, "learning_rate": 1.610738255033557e-05, "loss": 1.7918, "step": 40 }, { "epoch": 0.13, "learning_rate": 2.0134228187919465e-05, "loss": 1.4794, "step": 50 }, { "epoch": 0.16, "learning_rate": 2.4161073825503356e-05, "loss": 1.1516, "step": 60 }, { "epoch": 0.19, "learning_rate": 2.8187919463087248e-05, "loss": 0.8457, "step": 70 }, { "epoch": 0.22, "learning_rate": 3.221476510067114e-05, "loss": 0.6145, "step": 80 }, { "epoch": 0.24, "learning_rate": 3.6241610738255034e-05, "loss": 0.446, "step": 90 }, { "epoch": 0.27, "learning_rate": 4.026845637583893e-05, "loss": 0.3573, "step": 100 }, { "epoch": 0.3, "learning_rate": 4.4295302013422824e-05, "loss": 0.289, "step": 110 }, { "epoch": 0.32, "learning_rate": 4.832214765100671e-05, "loss": 0.2559, "step": 120 }, { "epoch": 0.35, "learning_rate": 5.23489932885906e-05, "loss": 0.2186, "step": 130 }, { "epoch": 0.38, "learning_rate": 5.6375838926174495e-05, "loss": 0.1936, "step": 140 }, { "epoch": 0.4, "learning_rate": 6.04026845637584e-05, "loss": 0.181, "step": 150 }, { "epoch": 0.43, "learning_rate": 6.442953020134228e-05, "loss": 0.1621, "step": 160 }, { "epoch": 0.46, "learning_rate": 6.845637583892617e-05, "loss": 0.1518, "step": 170 }, { "epoch": 0.48, "learning_rate": 7.248322147651007e-05, "loss": 0.1361, "step": 180 }, { "epoch": 0.51, "learning_rate": 7.651006711409396e-05, "loss": 0.1268, "step": 190 }, { "epoch": 0.54, "learning_rate": 8.053691275167786e-05, "loss": 0.1159, "step": 200 }, { "epoch": 0.56, "learning_rate": 8.456375838926175e-05, "loss": 0.1088, "step": 210 }, { "epoch": 0.59, "learning_rate": 8.859060402684565e-05, "loss": 0.1015, "step": 220 }, { "epoch": 0.62, "learning_rate": 9.261744966442954e-05, "loss": 0.0947, "step": 230 }, { "epoch": 0.65, "learning_rate": 9.664429530201342e-05, "loss": 0.087, "step": 240 }, { "epoch": 0.67, "learning_rate": 0.00010067114093959731, "loss": 0.0823, "step": 250 }, { "epoch": 0.7, "learning_rate": 0.0001046979865771812, "loss": 0.0772, "step": 260 }, { "epoch": 0.73, "learning_rate": 0.0001087248322147651, "loss": 0.0694, "step": 270 }, { "epoch": 0.75, "learning_rate": 0.00011275167785234899, "loss": 0.0642, "step": 280 }, { "epoch": 0.78, "learning_rate": 0.00011677852348993289, "loss": 0.0592, "step": 290 }, { "epoch": 0.81, "learning_rate": 0.00011991038088125467, "loss": 0.0556, "step": 300 }, { "epoch": 0.83, "learning_rate": 0.000119462285287528, "loss": 0.0516, "step": 310 }, { "epoch": 0.86, "learning_rate": 0.00011901418969380135, "loss": 0.0679, "step": 320 }, { "epoch": 0.89, "learning_rate": 0.00011856609410007469, "loss": 0.0582, "step": 330 }, { "epoch": 0.91, "learning_rate": 0.00011811799850634802, "loss": 0.0487, "step": 340 }, { "epoch": 0.94, "learning_rate": 0.00011766990291262137, "loss": 0.0462, "step": 350 }, { "epoch": 0.97, "learning_rate": 0.0001172218073188947, "loss": 0.0411, "step": 360 }, { "epoch": 0.99, "learning_rate": 0.00011677371172516803, "loss": 0.0368, "step": 370 }, { "epoch": 1.0, "eval_accuracy": 0.999957997311828, "eval_loss": 0.034596964716911316, "eval_runtime": 326.4848, "eval_samples_per_second": 72.922, "eval_steps_per_second": 1.139, "step": 372 }, { "epoch": 1.02, "learning_rate": 0.00011632561613144138, "loss": 0.0335, "step": 380 }, { "epoch": 1.05, "learning_rate": 0.00011587752053771471, "loss": 0.0316, "step": 390 }, { "epoch": 1.08, "learning_rate": 0.00011542942494398806, "loss": 0.0303, "step": 400 }, { "epoch": 1.1, "learning_rate": 0.00011498132935026138, "loss": 0.0285, "step": 410 }, { "epoch": 1.13, "learning_rate": 0.00011453323375653473, "loss": 0.0273, "step": 420 }, { "epoch": 1.16, "learning_rate": 0.00011408513816280807, "loss": 0.0258, "step": 430 }, { "epoch": 1.18, "learning_rate": 0.00011363704256908141, "loss": 0.0248, "step": 440 }, { "epoch": 1.21, "learning_rate": 0.00011318894697535474, "loss": 0.0257, "step": 450 }, { "epoch": 1.24, "learning_rate": 0.00011274085138162807, "loss": 0.0251, "step": 460 }, { "epoch": 1.26, "learning_rate": 0.00011229275578790142, "loss": 0.0225, "step": 470 }, { "epoch": 1.29, "learning_rate": 0.00011184466019417477, "loss": 0.0212, "step": 480 }, { "epoch": 1.32, "learning_rate": 0.00011139656460044809, "loss": 0.0214, "step": 490 }, { "epoch": 1.34, "learning_rate": 0.00011094846900672144, "loss": 0.0197, "step": 500 }, { "epoch": 1.37, "learning_rate": 0.00011050037341299477, "loss": 0.0203, "step": 510 }, { "epoch": 1.4, "learning_rate": 0.00011005227781926812, "loss": 0.0183, "step": 520 }, { "epoch": 1.42, "learning_rate": 0.00010960418222554145, "loss": 0.0202, "step": 530 }, { "epoch": 1.45, "learning_rate": 0.00010915608663181478, "loss": 0.0258, "step": 540 }, { "epoch": 1.48, "learning_rate": 0.00010870799103808813, "loss": 0.0198, "step": 550 }, { "epoch": 1.51, "learning_rate": 0.00010825989544436148, "loss": 0.0194, "step": 560 }, { "epoch": 1.53, "learning_rate": 0.00010781179985063481, "loss": 0.0171, "step": 570 }, { "epoch": 1.56, "learning_rate": 0.00010736370425690814, "loss": 0.0152, "step": 580 }, { "epoch": 1.59, "learning_rate": 0.00010691560866318148, "loss": 0.0162, "step": 590 }, { "epoch": 1.61, "learning_rate": 0.00010646751306945482, "loss": 0.0143, "step": 600 }, { "epoch": 1.64, "learning_rate": 0.00010601941747572817, "loss": 0.014, "step": 610 }, { "epoch": 1.67, "learning_rate": 0.00010557132188200149, "loss": 0.0132, "step": 620 }, { "epoch": 1.69, "learning_rate": 0.00010512322628827484, "loss": 0.0127, "step": 630 }, { "epoch": 1.72, "learning_rate": 0.00010467513069454817, "loss": 0.0123, "step": 640 }, { "epoch": 1.75, "learning_rate": 0.00010422703510082152, "loss": 0.0124, "step": 650 }, { "epoch": 1.77, "learning_rate": 0.00010377893950709485, "loss": 0.0118, "step": 660 }, { "epoch": 1.8, "learning_rate": 0.00010333084391336818, "loss": 0.0128, "step": 670 }, { "epoch": 1.83, "learning_rate": 0.00010288274831964153, "loss": 0.0139, "step": 680 }, { "epoch": 1.85, "learning_rate": 0.00010243465272591486, "loss": 0.0158, "step": 690 }, { "epoch": 1.88, "learning_rate": 0.0001019865571321882, "loss": 0.0112, "step": 700 }, { "epoch": 1.91, "learning_rate": 0.00010153846153846155, "loss": 0.0104, "step": 710 }, { "epoch": 1.94, "learning_rate": 0.00010109036594473488, "loss": 0.0099, "step": 720 }, { "epoch": 1.96, "learning_rate": 0.00010064227035100823, "loss": 0.0095, "step": 730 }, { "epoch": 1.99, "learning_rate": 0.00010019417475728155, "loss": 0.0094, "step": 740 }, { "epoch": 2.0, "eval_accuracy": 0.999957997311828, "eval_loss": 0.009242160245776176, "eval_runtime": 315.5591, "eval_samples_per_second": 75.447, "eval_steps_per_second": 1.179, "step": 744 }, { "epoch": 2.02, "learning_rate": 9.974607916355489e-05, "loss": 0.009, "step": 750 }, { "epoch": 2.04, "learning_rate": 9.929798356982824e-05, "loss": 0.0089, "step": 760 }, { "epoch": 2.07, "learning_rate": 9.884988797610157e-05, "loss": 0.0086, "step": 770 }, { "epoch": 2.1, "learning_rate": 9.84017923823749e-05, "loss": 0.0086, "step": 780 }, { "epoch": 2.12, "learning_rate": 9.795369678864824e-05, "loss": 0.0082, "step": 790 }, { "epoch": 2.15, "learning_rate": 9.750560119492159e-05, "loss": 0.0081, "step": 800 }, { "epoch": 2.18, "learning_rate": 9.705750560119493e-05, "loss": 0.0081, "step": 810 }, { "epoch": 2.2, "learning_rate": 9.660941000746825e-05, "loss": 0.0078, "step": 820 }, { "epoch": 2.23, "learning_rate": 9.61613144137416e-05, "loss": 0.0077, "step": 830 }, { "epoch": 2.26, "learning_rate": 9.571321882001495e-05, "loss": 0.0074, "step": 840 }, { "epoch": 2.28, "learning_rate": 9.526512322628828e-05, "loss": 0.0073, "step": 850 }, { "epoch": 2.31, "learning_rate": 9.481702763256161e-05, "loss": 0.0096, "step": 860 }, { "epoch": 2.34, "learning_rate": 9.436893203883495e-05, "loss": 0.0078, "step": 870 }, { "epoch": 2.37, "learning_rate": 9.39208364451083e-05, "loss": 0.0073, "step": 880 }, { "epoch": 2.39, "learning_rate": 9.347274085138164e-05, "loss": 0.0069, "step": 890 }, { "epoch": 2.42, "learning_rate": 9.302464525765496e-05, "loss": 0.0067, "step": 900 }, { "epoch": 2.45, "learning_rate": 9.257654966392831e-05, "loss": 0.0066, "step": 910 }, { "epoch": 2.47, "learning_rate": 9.212845407020164e-05, "loss": 0.0064, "step": 920 }, { "epoch": 2.5, "learning_rate": 9.168035847647499e-05, "loss": 0.0063, "step": 930 }, { "epoch": 2.53, "learning_rate": 9.123226288274832e-05, "loss": 0.0061, "step": 940 }, { "epoch": 2.55, "learning_rate": 9.078416728902166e-05, "loss": 0.006, "step": 950 }, { "epoch": 2.58, "learning_rate": 9.0336071695295e-05, "loss": 0.0059, "step": 960 }, { "epoch": 2.61, "learning_rate": 8.988797610156834e-05, "loss": 0.0058, "step": 970 }, { "epoch": 2.63, "learning_rate": 8.943988050784167e-05, "loss": 0.0057, "step": 980 }, { "epoch": 2.66, "learning_rate": 8.899178491411502e-05, "loss": 0.0055, "step": 990 }, { "epoch": 2.69, "learning_rate": 8.854368932038835e-05, "loss": 0.0056, "step": 1000 }, { "epoch": 2.72, "learning_rate": 8.80955937266617e-05, "loss": 0.0055, "step": 1010 }, { "epoch": 2.74, "learning_rate": 8.764749813293502e-05, "loss": 0.0053, "step": 1020 }, { "epoch": 2.77, "learning_rate": 8.719940253920836e-05, "loss": 0.0052, "step": 1030 }, { "epoch": 2.8, "learning_rate": 8.675130694548171e-05, "loss": 0.0052, "step": 1040 }, { "epoch": 2.82, "learning_rate": 8.630321135175504e-05, "loss": 0.0051, "step": 1050 }, { "epoch": 2.85, "learning_rate": 8.585511575802838e-05, "loss": 0.0051, "step": 1060 }, { "epoch": 2.88, "learning_rate": 8.540702016430171e-05, "loss": 0.005, "step": 1070 }, { "epoch": 2.9, "learning_rate": 8.495892457057506e-05, "loss": 0.0052, "step": 1080 }, { "epoch": 2.93, "learning_rate": 8.45108289768484e-05, "loss": 0.0049, "step": 1090 }, { "epoch": 2.96, "learning_rate": 8.406273338312172e-05, "loss": 0.0048, "step": 1100 }, { "epoch": 2.98, "learning_rate": 8.361463778939507e-05, "loss": 0.0046, "step": 1110 }, { "epoch": 3.0, "eval_accuracy": 0.999957997311828, "eval_loss": 0.004723448771983385, "eval_runtime": 316.511, "eval_samples_per_second": 75.22, "eval_steps_per_second": 1.175, "step": 1116 }, { "epoch": 3.01, "learning_rate": 8.316654219566842e-05, "loss": 0.0046, "step": 1120 }, { "epoch": 3.04, "learning_rate": 8.271844660194175e-05, "loss": 0.0046, "step": 1130 }, { "epoch": 3.06, "learning_rate": 8.22703510082151e-05, "loss": 0.0045, "step": 1140 }, { "epoch": 3.09, "learning_rate": 8.182225541448842e-05, "loss": 0.0044, "step": 1150 }, { "epoch": 3.12, "learning_rate": 8.137415982076177e-05, "loss": 0.0044, "step": 1160 }, { "epoch": 3.15, "learning_rate": 8.092606422703511e-05, "loss": 0.0043, "step": 1170 }, { "epoch": 3.17, "learning_rate": 8.047796863330845e-05, "loss": 0.0042, "step": 1180 }, { "epoch": 3.2, "learning_rate": 8.002987303958178e-05, "loss": 0.0041, "step": 1190 }, { "epoch": 3.23, "learning_rate": 7.958177744585511e-05, "loss": 0.0041, "step": 1200 }, { "epoch": 3.25, "learning_rate": 7.913368185212846e-05, "loss": 0.004, "step": 1210 }, { "epoch": 3.28, "learning_rate": 7.86855862584018e-05, "loss": 0.004, "step": 1220 }, { "epoch": 3.31, "learning_rate": 7.823749066467513e-05, "loss": 0.004, "step": 1230 }, { "epoch": 3.33, "learning_rate": 7.778939507094847e-05, "loss": 0.0039, "step": 1240 }, { "epoch": 3.36, "learning_rate": 7.73412994772218e-05, "loss": 0.0039, "step": 1250 }, { "epoch": 3.39, "learning_rate": 7.689320388349515e-05, "loss": 0.0038, "step": 1260 }, { "epoch": 3.41, "learning_rate": 7.644510828976849e-05, "loss": 0.0037, "step": 1270 }, { "epoch": 3.44, "learning_rate": 7.599701269604182e-05, "loss": 0.0036, "step": 1280 }, { "epoch": 3.47, "learning_rate": 7.554891710231517e-05, "loss": 0.0036, "step": 1290 }, { "epoch": 3.49, "learning_rate": 7.51008215085885e-05, "loss": 0.0036, "step": 1300 }, { "epoch": 3.52, "learning_rate": 7.465272591486183e-05, "loss": 0.0036, "step": 1310 }, { "epoch": 3.55, "learning_rate": 7.420463032113518e-05, "loss": 0.0035, "step": 1320 }, { "epoch": 3.58, "learning_rate": 7.375653472740851e-05, "loss": 0.0034, "step": 1330 }, { "epoch": 3.6, "learning_rate": 7.330843913368186e-05, "loss": 0.0034, "step": 1340 }, { "epoch": 3.63, "learning_rate": 7.286034353995518e-05, "loss": 0.0036, "step": 1350 }, { "epoch": 3.66, "learning_rate": 7.241224794622853e-05, "loss": 0.0035, "step": 1360 }, { "epoch": 3.68, "learning_rate": 7.196415235250188e-05, "loss": 0.0034, "step": 1370 }, { "epoch": 3.71, "learning_rate": 7.151605675877521e-05, "loss": 0.0033, "step": 1380 }, { "epoch": 3.74, "learning_rate": 7.106796116504854e-05, "loss": 0.0033, "step": 1390 }, { "epoch": 3.76, "learning_rate": 7.061986557132189e-05, "loss": 0.0032, "step": 1400 }, { "epoch": 3.79, "learning_rate": 7.017176997759522e-05, "loss": 0.0032, "step": 1410 }, { "epoch": 3.82, "learning_rate": 6.972367438386857e-05, "loss": 0.0031, "step": 1420 }, { "epoch": 3.84, "learning_rate": 6.927557879014189e-05, "loss": 0.0031, "step": 1430 }, { "epoch": 3.87, "learning_rate": 6.882748319641524e-05, "loss": 0.0031, "step": 1440 }, { "epoch": 3.9, "learning_rate": 6.837938760268858e-05, "loss": 0.003, "step": 1450 }, { "epoch": 3.92, "learning_rate": 6.793129200896192e-05, "loss": 0.0031, "step": 1460 }, { "epoch": 3.95, "learning_rate": 6.748319641523525e-05, "loss": 0.003, "step": 1470 }, { "epoch": 3.98, "learning_rate": 6.703510082150858e-05, "loss": 0.0029, "step": 1480 }, { "epoch": 4.0, "eval_accuracy": 1.0, "eval_loss": 0.002898953389376402, "eval_runtime": 315.5752, "eval_samples_per_second": 75.443, "eval_steps_per_second": 1.179, "step": 1488 }, { "epoch": 4.01, "learning_rate": 6.658700522778193e-05, "loss": 0.003, "step": 1490 }, { "epoch": 4.03, "learning_rate": 6.613890963405528e-05, "loss": 0.0029, "step": 1500 }, { "epoch": 4.06, "learning_rate": 6.56908140403286e-05, "loss": 0.0029, "step": 1510 }, { "epoch": 4.09, "learning_rate": 6.524271844660194e-05, "loss": 0.0028, "step": 1520 }, { "epoch": 4.11, "learning_rate": 6.479462285287528e-05, "loss": 0.0028, "step": 1530 }, { "epoch": 4.14, "learning_rate": 6.434652725914862e-05, "loss": 0.0028, "step": 1540 }, { "epoch": 4.17, "learning_rate": 6.389843166542196e-05, "loss": 0.0028, "step": 1550 }, { "epoch": 4.19, "learning_rate": 6.345033607169529e-05, "loss": 0.0027, "step": 1560 }, { "epoch": 4.22, "learning_rate": 6.300224047796864e-05, "loss": 0.0027, "step": 1570 }, { "epoch": 4.25, "learning_rate": 6.255414488424197e-05, "loss": 0.0027, "step": 1580 }, { "epoch": 4.27, "learning_rate": 6.21060492905153e-05, "loss": 0.0027, "step": 1590 }, { "epoch": 4.3, "learning_rate": 6.165795369678865e-05, "loss": 0.0026, "step": 1600 }, { "epoch": 4.33, "learning_rate": 6.120985810306199e-05, "loss": 0.0026, "step": 1610 }, { "epoch": 4.35, "learning_rate": 6.076176250933533e-05, "loss": 0.0026, "step": 1620 }, { "epoch": 4.38, "learning_rate": 6.031366691560866e-05, "loss": 0.0026, "step": 1630 }, { "epoch": 4.41, "learning_rate": 5.9865571321882e-05, "loss": 0.0025, "step": 1640 }, { "epoch": 4.44, "learning_rate": 5.941747572815534e-05, "loss": 0.0025, "step": 1650 }, { "epoch": 4.46, "learning_rate": 5.896938013442868e-05, "loss": 0.0025, "step": 1660 }, { "epoch": 4.49, "learning_rate": 5.852128454070202e-05, "loss": 0.0025, "step": 1670 }, { "epoch": 4.52, "learning_rate": 5.807318894697535e-05, "loss": 0.0024, "step": 1680 }, { "epoch": 4.54, "learning_rate": 5.762509335324869e-05, "loss": 0.0024, "step": 1690 }, { "epoch": 4.57, "learning_rate": 5.717699775952203e-05, "loss": 0.0032, "step": 1700 }, { "epoch": 4.6, "learning_rate": 5.672890216579537e-05, "loss": 0.003, "step": 1710 }, { "epoch": 4.62, "learning_rate": 5.6280806572068713e-05, "loss": 0.0028, "step": 1720 }, { "epoch": 4.65, "learning_rate": 5.583271097834205e-05, "loss": 0.0055, "step": 1730 }, { "epoch": 4.68, "learning_rate": 5.538461538461539e-05, "loss": 0.0036, "step": 1740 }, { "epoch": 4.7, "learning_rate": 5.493651979088873e-05, "loss": 0.0029, "step": 1750 }, { "epoch": 4.73, "learning_rate": 5.448842419716207e-05, "loss": 0.0059, "step": 1760 }, { "epoch": 4.76, "learning_rate": 5.40403286034354e-05, "loss": 0.0042, "step": 1770 }, { "epoch": 4.78, "learning_rate": 5.359223300970874e-05, "loss": 0.0031, "step": 1780 }, { "epoch": 4.81, "learning_rate": 5.3144137415982074e-05, "loss": 0.0026, "step": 1790 }, { "epoch": 4.84, "learning_rate": 5.269604182225542e-05, "loss": 0.0025, "step": 1800 }, { "epoch": 4.87, "learning_rate": 5.2247946228528755e-05, "loss": 0.0025, "step": 1810 }, { "epoch": 4.89, "learning_rate": 5.1799850634802095e-05, "loss": 0.0023, "step": 1820 }, { "epoch": 4.92, "learning_rate": 5.135175504107543e-05, "loss": 0.0024, "step": 1830 }, { "epoch": 4.95, "learning_rate": 5.090365944734877e-05, "loss": 0.0023, "step": 1840 }, { "epoch": 4.97, "learning_rate": 5.045556385362211e-05, "loss": 0.0022, "step": 1850 }, { "epoch": 5.0, "learning_rate": 5.000746825989545e-05, "loss": 0.0022, "step": 1860 }, { "epoch": 5.0, "eval_accuracy": 0.9999159946236559, "eval_loss": 0.0023123060818761587, "eval_runtime": 314.9106, "eval_samples_per_second": 75.602, "eval_steps_per_second": 1.181, "step": 1860 }, { "epoch": 5.03, "learning_rate": 4.955937266616878e-05, "loss": 0.0023, "step": 1870 }, { "epoch": 5.05, "learning_rate": 4.911127707244212e-05, "loss": 0.0021, "step": 1880 }, { "epoch": 5.08, "learning_rate": 4.866318147871546e-05, "loss": 0.0044, "step": 1890 }, { "epoch": 5.11, "learning_rate": 4.82150858849888e-05, "loss": 0.0021, "step": 1900 }, { "epoch": 5.13, "learning_rate": 4.7766990291262136e-05, "loss": 0.0022, "step": 1910 }, { "epoch": 5.16, "learning_rate": 4.7318894697535476e-05, "loss": 0.0021, "step": 1920 }, { "epoch": 5.19, "learning_rate": 4.687079910380881e-05, "loss": 0.0021, "step": 1930 }, { "epoch": 5.22, "learning_rate": 4.642270351008216e-05, "loss": 0.002, "step": 1940 }, { "epoch": 5.24, "learning_rate": 4.597460791635549e-05, "loss": 0.0023, "step": 1950 }, { "epoch": 5.27, "learning_rate": 4.552651232262883e-05, "loss": 0.002, "step": 1960 }, { "epoch": 5.3, "learning_rate": 4.5078416728902164e-05, "loss": 0.002, "step": 1970 }, { "epoch": 5.32, "learning_rate": 4.4630321135175504e-05, "loss": 0.0019, "step": 1980 }, { "epoch": 5.35, "learning_rate": 4.4182225541448844e-05, "loss": 0.0019, "step": 1990 }, { "epoch": 5.38, "learning_rate": 4.3734129947722184e-05, "loss": 0.0019, "step": 2000 }, { "epoch": 5.4, "learning_rate": 4.328603435399552e-05, "loss": 0.0019, "step": 2010 }, { "epoch": 5.43, "learning_rate": 4.283793876026886e-05, "loss": 0.0019, "step": 2020 }, { "epoch": 5.46, "learning_rate": 4.23898431665422e-05, "loss": 0.0019, "step": 2030 }, { "epoch": 5.48, "learning_rate": 4.194174757281554e-05, "loss": 0.0019, "step": 2040 }, { "epoch": 5.51, "learning_rate": 4.149365197908887e-05, "loss": 0.0019, "step": 2050 }, { "epoch": 5.54, "learning_rate": 4.104555638536221e-05, "loss": 0.0019, "step": 2060 }, { "epoch": 5.56, "learning_rate": 4.0597460791635545e-05, "loss": 0.0019, "step": 2070 }, { "epoch": 5.59, "learning_rate": 4.014936519790889e-05, "loss": 0.0019, "step": 2080 }, { "epoch": 5.62, "learning_rate": 3.9701269604182226e-05, "loss": 0.0018, "step": 2090 }, { "epoch": 5.65, "learning_rate": 3.9253174010455566e-05, "loss": 0.0018, "step": 2100 }, { "epoch": 5.67, "learning_rate": 3.88050784167289e-05, "loss": 0.0018, "step": 2110 }, { "epoch": 5.7, "learning_rate": 3.835698282300224e-05, "loss": 0.0018, "step": 2120 }, { "epoch": 5.73, "learning_rate": 3.790888722927558e-05, "loss": 0.0018, "step": 2130 }, { "epoch": 5.75, "learning_rate": 3.746079163554892e-05, "loss": 0.0018, "step": 2140 }, { "epoch": 5.78, "learning_rate": 3.701269604182225e-05, "loss": 0.0018, "step": 2150 }, { "epoch": 5.81, "learning_rate": 3.656460044809559e-05, "loss": 0.0017, "step": 2160 }, { "epoch": 5.83, "learning_rate": 3.6116504854368933e-05, "loss": 0.0018, "step": 2170 }, { "epoch": 5.86, "learning_rate": 3.5668409260642274e-05, "loss": 0.0017, "step": 2180 }, { "epoch": 5.89, "learning_rate": 3.522031366691561e-05, "loss": 0.0017, "step": 2190 }, { "epoch": 5.91, "learning_rate": 3.477221807318895e-05, "loss": 0.0017, "step": 2200 }, { "epoch": 5.94, "learning_rate": 3.432412247946228e-05, "loss": 0.0017, "step": 2210 }, { "epoch": 5.97, "learning_rate": 3.387602688573563e-05, "loss": 0.0017, "step": 2220 }, { "epoch": 5.99, "learning_rate": 3.342793129200896e-05, "loss": 0.0017, "step": 2230 }, { "epoch": 6.0, "eval_accuracy": 1.0, "eval_loss": 0.0016880695475265384, "eval_runtime": 314.9291, "eval_samples_per_second": 75.598, "eval_steps_per_second": 1.181, "step": 2232 }, { "epoch": 6.02, "learning_rate": 3.29798356982823e-05, "loss": 0.0017, "step": 2240 }, { "epoch": 6.05, "learning_rate": 3.2531740104555635e-05, "loss": 0.0017, "step": 2250 }, { "epoch": 6.08, "learning_rate": 3.2083644510828975e-05, "loss": 0.0016, "step": 2260 }, { "epoch": 6.1, "learning_rate": 3.1635548917102315e-05, "loss": 0.0017, "step": 2270 }, { "epoch": 6.13, "learning_rate": 3.1187453323375655e-05, "loss": 0.0016, "step": 2280 }, { "epoch": 6.16, "learning_rate": 3.0739357729648995e-05, "loss": 0.0016, "step": 2290 }, { "epoch": 6.18, "learning_rate": 3.0291262135922332e-05, "loss": 0.0016, "step": 2300 }, { "epoch": 6.21, "learning_rate": 2.984316654219567e-05, "loss": 0.0016, "step": 2310 }, { "epoch": 6.24, "learning_rate": 2.9395070948469006e-05, "loss": 0.0016, "step": 2320 }, { "epoch": 6.26, "learning_rate": 2.8946975354742346e-05, "loss": 0.0016, "step": 2330 }, { "epoch": 6.29, "learning_rate": 2.8498879761015683e-05, "loss": 0.0016, "step": 2340 }, { "epoch": 6.32, "learning_rate": 2.8050784167289023e-05, "loss": 0.0016, "step": 2350 }, { "epoch": 6.34, "learning_rate": 2.760268857356236e-05, "loss": 0.0016, "step": 2360 }, { "epoch": 6.37, "learning_rate": 2.71545929798357e-05, "loss": 0.0016, "step": 2370 }, { "epoch": 6.4, "learning_rate": 2.6706497386109037e-05, "loss": 0.0016, "step": 2380 }, { "epoch": 6.42, "learning_rate": 2.6258401792382373e-05, "loss": 0.0016, "step": 2390 }, { "epoch": 6.45, "learning_rate": 2.5810306198655713e-05, "loss": 0.0016, "step": 2400 }, { "epoch": 6.48, "learning_rate": 2.536221060492905e-05, "loss": 0.0016, "step": 2410 }, { "epoch": 6.51, "learning_rate": 2.491411501120239e-05, "loss": 0.0016, "step": 2420 }, { "epoch": 6.53, "learning_rate": 2.4466019417475727e-05, "loss": 0.0015, "step": 2430 }, { "epoch": 6.56, "learning_rate": 2.4017923823749067e-05, "loss": 0.0015, "step": 2440 }, { "epoch": 6.59, "learning_rate": 2.3569828230022404e-05, "loss": 0.0015, "step": 2450 }, { "epoch": 6.61, "learning_rate": 2.312173263629574e-05, "loss": 0.0015, "step": 2460 }, { "epoch": 6.64, "learning_rate": 2.267363704256908e-05, "loss": 0.0015, "step": 2470 }, { "epoch": 6.67, "learning_rate": 2.2225541448842418e-05, "loss": 0.0015, "step": 2480 }, { "epoch": 6.69, "learning_rate": 2.1777445855115758e-05, "loss": 0.0015, "step": 2490 }, { "epoch": 6.72, "learning_rate": 2.1329350261389095e-05, "loss": 0.0015, "step": 2500 }, { "epoch": 6.75, "learning_rate": 2.0881254667662435e-05, "loss": 0.0015, "step": 2510 }, { "epoch": 6.77, "learning_rate": 2.0433159073935772e-05, "loss": 0.0015, "step": 2520 }, { "epoch": 6.8, "learning_rate": 1.998506348020911e-05, "loss": 0.0015, "step": 2530 }, { "epoch": 6.83, "learning_rate": 1.953696788648245e-05, "loss": 0.0015, "step": 2540 }, { "epoch": 6.85, "learning_rate": 1.9088872292755786e-05, "loss": 0.0015, "step": 2550 }, { "epoch": 6.88, "learning_rate": 1.8640776699029126e-05, "loss": 0.0015, "step": 2560 }, { "epoch": 6.91, "learning_rate": 1.8192681105302466e-05, "loss": 0.0015, "step": 2570 }, { "epoch": 6.94, "learning_rate": 1.7744585511575806e-05, "loss": 0.0015, "step": 2580 }, { "epoch": 6.96, "learning_rate": 1.7296489917849143e-05, "loss": 0.0015, "step": 2590 }, { "epoch": 6.99, "learning_rate": 1.684839432412248e-05, "loss": 0.0015, "step": 2600 }, { "epoch": 7.0, "eval_accuracy": 1.0, "eval_loss": 0.001458011451177299, "eval_runtime": 317.9295, "eval_samples_per_second": 74.885, "eval_steps_per_second": 1.17, "step": 2604 }, { "epoch": 7.02, "learning_rate": 1.640029873039582e-05, "loss": 0.0015, "step": 2610 }, { "epoch": 7.04, "learning_rate": 1.5952203136669157e-05, "loss": 0.0015, "step": 2620 }, { "epoch": 7.07, "learning_rate": 1.5504107542942497e-05, "loss": 0.0014, "step": 2630 }, { "epoch": 7.1, "learning_rate": 1.5056011949215834e-05, "loss": 0.0014, "step": 2640 }, { "epoch": 7.12, "learning_rate": 1.460791635548917e-05, "loss": 0.0014, "step": 2650 }, { "epoch": 7.15, "learning_rate": 1.4159820761762509e-05, "loss": 0.0014, "step": 2660 }, { "epoch": 7.18, "learning_rate": 1.3711725168035847e-05, "loss": 0.0014, "step": 2670 }, { "epoch": 7.2, "learning_rate": 1.3263629574309186e-05, "loss": 0.0014, "step": 2680 }, { "epoch": 7.23, "learning_rate": 1.2815533980582524e-05, "loss": 0.0014, "step": 2690 }, { "epoch": 7.26, "learning_rate": 1.2367438386855863e-05, "loss": 0.0014, "step": 2700 }, { "epoch": 7.28, "learning_rate": 1.19193427931292e-05, "loss": 0.0014, "step": 2710 }, { "epoch": 7.31, "learning_rate": 1.147124719940254e-05, "loss": 0.0014, "step": 2720 }, { "epoch": 7.34, "learning_rate": 1.1023151605675878e-05, "loss": 0.0014, "step": 2730 }, { "epoch": 7.37, "learning_rate": 1.0575056011949217e-05, "loss": 0.0014, "step": 2740 }, { "epoch": 7.39, "learning_rate": 1.0126960418222555e-05, "loss": 0.0014, "step": 2750 }, { "epoch": 7.42, "learning_rate": 9.678864824495894e-06, "loss": 0.0014, "step": 2760 }, { "epoch": 7.45, "learning_rate": 9.230769230769232e-06, "loss": 0.0014, "step": 2770 }, { "epoch": 7.47, "learning_rate": 8.782673637042569e-06, "loss": 0.0014, "step": 2780 }, { "epoch": 7.5, "learning_rate": 8.334578043315908e-06, "loss": 0.0014, "step": 2790 }, { "epoch": 7.53, "learning_rate": 7.886482449589246e-06, "loss": 0.0014, "step": 2800 }, { "epoch": 7.55, "learning_rate": 7.4383868558625845e-06, "loss": 0.0014, "step": 2810 }, { "epoch": 7.58, "learning_rate": 6.990291262135923e-06, "loss": 0.0014, "step": 2820 }, { "epoch": 7.61, "learning_rate": 6.542195668409261e-06, "loss": 0.0014, "step": 2830 }, { "epoch": 7.63, "learning_rate": 6.094100074682599e-06, "loss": 0.0014, "step": 2840 }, { "epoch": 7.66, "learning_rate": 5.646004480955938e-06, "loss": 0.0014, "step": 2850 }, { "epoch": 7.69, "learning_rate": 5.197908887229276e-06, "loss": 0.0014, "step": 2860 }, { "epoch": 7.72, "learning_rate": 4.749813293502614e-06, "loss": 0.0014, "step": 2870 }, { "epoch": 7.74, "learning_rate": 4.301717699775952e-06, "loss": 0.0014, "step": 2880 }, { "epoch": 7.77, "learning_rate": 3.853622106049291e-06, "loss": 0.0014, "step": 2890 }, { "epoch": 7.8, "learning_rate": 3.4055265123226292e-06, "loss": 0.0014, "step": 2900 }, { "epoch": 7.82, "learning_rate": 2.9574309185959673e-06, "loss": 0.0014, "step": 2910 }, { "epoch": 7.85, "learning_rate": 2.5093353248693058e-06, "loss": 0.0014, "step": 2920 }, { "epoch": 7.88, "learning_rate": 2.061239731142644e-06, "loss": 0.0014, "step": 2930 }, { "epoch": 7.9, "learning_rate": 1.613144137415982e-06, "loss": 0.0014, "step": 2940 }, { "epoch": 7.93, "learning_rate": 1.1650485436893204e-06, "loss": 0.0014, "step": 2950 }, { "epoch": 7.96, "learning_rate": 7.169529499626587e-07, "loss": 0.0014, "step": 2960 }, { "epoch": 7.98, "learning_rate": 2.68857356235997e-07, "loss": 0.0014, "step": 2970 }, { "epoch": 8.0, "eval_accuracy": 0.999957997311828, "eval_loss": 0.0014714967692270875, "eval_runtime": 317.4979, "eval_samples_per_second": 74.986, "eval_steps_per_second": 1.172, "step": 2976 }, { "epoch": 8.0, "step": 2976, "total_flos": 5.9038961296526475e+19, "train_loss": 0.05853422665912207, "train_runtime": 16155.4674, "train_samples_per_second": 47.156, "train_steps_per_second": 0.184 } ], "max_steps": 2976, "num_train_epochs": 8, "total_flos": 5.9038961296526475e+19, "trial_name": null, "trial_params": null }