{ "best_metric": 0.3434308171272278, "best_model_checkpoint": "./vit-base-beans/checkpoint-3760", "epoch": 1.9810326659641728, "global_step": 3760, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.00019947312961011594, "loss": 2.0892, "step": 10 }, { "epoch": 0.01, "learning_rate": 0.00019894625922023182, "loss": 1.6471, "step": 20 }, { "epoch": 0.02, "learning_rate": 0.00019841938883034775, "loss": 1.407, "step": 30 }, { "epoch": 0.02, "learning_rate": 0.00019789251844046365, "loss": 1.2934, "step": 40 }, { "epoch": 0.02, "eval_accuracy": 0.554004214963119, "eval_loss": 1.2484745979309082, "eval_runtime": 40.9693, "eval_samples_per_second": 92.655, "eval_steps_per_second": 11.594, "step": 40 }, { "epoch": 0.03, "learning_rate": 0.00019736564805057958, "loss": 1.3421, "step": 50 }, { "epoch": 0.03, "learning_rate": 0.00019683877766069548, "loss": 1.1489, "step": 60 }, { "epoch": 0.04, "learning_rate": 0.0001963119072708114, "loss": 1.1359, "step": 70 }, { "epoch": 0.04, "learning_rate": 0.00019578503688092732, "loss": 1.0804, "step": 80 }, { "epoch": 0.04, "eval_accuracy": 0.5076396206533192, "eval_loss": 1.2709373235702515, "eval_runtime": 41.0324, "eval_samples_per_second": 92.512, "eval_steps_per_second": 11.576, "step": 80 }, { "epoch": 0.05, "learning_rate": 0.0001952581664910432, "loss": 1.281, "step": 90 }, { "epoch": 0.05, "learning_rate": 0.00019473129610115912, "loss": 1.1561, "step": 100 }, { "epoch": 0.06, "learning_rate": 0.00019420442571127503, "loss": 0.9883, "step": 110 }, { "epoch": 0.06, "learning_rate": 0.00019367755532139096, "loss": 1.0225, "step": 120 }, { "epoch": 0.06, "eval_accuracy": 0.5835089567966281, "eval_loss": 1.1064503192901611, "eval_runtime": 41.5232, "eval_samples_per_second": 91.419, "eval_steps_per_second": 11.439, "step": 120 }, { "epoch": 0.07, "learning_rate": 0.00019315068493150686, "loss": 1.0751, "step": 130 }, { "epoch": 0.07, "learning_rate": 0.00019262381454162277, "loss": 1.2585, "step": 140 }, { "epoch": 0.08, "learning_rate": 0.0001920969441517387, "loss": 1.0325, "step": 150 }, { "epoch": 0.08, "learning_rate": 0.00019157007376185457, "loss": 1.1999, "step": 160 }, { "epoch": 0.08, "eval_accuracy": 0.6240779768177028, "eval_loss": 0.998985767364502, "eval_runtime": 41.8244, "eval_samples_per_second": 90.76, "eval_steps_per_second": 11.357, "step": 160 }, { "epoch": 0.09, "learning_rate": 0.0001910432033719705, "loss": 1.0211, "step": 170 }, { "epoch": 0.09, "learning_rate": 0.00019051633298208643, "loss": 1.0205, "step": 180 }, { "epoch": 0.1, "learning_rate": 0.0001899894625922023, "loss": 1.0047, "step": 190 }, { "epoch": 0.11, "learning_rate": 0.00018946259220231824, "loss": 0.9969, "step": 200 }, { "epoch": 0.11, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.9371430277824402, "eval_runtime": 41.6306, "eval_samples_per_second": 91.183, "eval_steps_per_second": 11.41, "step": 200 }, { "epoch": 0.11, "learning_rate": 0.00018893572181243414, "loss": 1.025, "step": 210 }, { "epoch": 0.12, "learning_rate": 0.00018840885142255007, "loss": 1.0511, "step": 220 }, { "epoch": 0.12, "learning_rate": 0.00018788198103266598, "loss": 0.8609, "step": 230 }, { "epoch": 0.13, "learning_rate": 0.00018735511064278188, "loss": 0.8846, "step": 240 }, { "epoch": 0.13, "eval_accuracy": 0.6620126448893572, "eval_loss": 0.9217966794967651, "eval_runtime": 41.7497, "eval_samples_per_second": 90.923, "eval_steps_per_second": 11.377, "step": 240 }, { "epoch": 0.13, "learning_rate": 0.0001868282402528978, "loss": 1.0542, "step": 250 }, { "epoch": 0.14, "learning_rate": 0.0001863013698630137, "loss": 0.8937, "step": 260 }, { "epoch": 0.14, "learning_rate": 0.00018577449947312962, "loss": 0.9493, "step": 270 }, { "epoch": 0.15, "learning_rate": 0.00018524762908324552, "loss": 0.9374, "step": 280 }, { "epoch": 0.15, "eval_accuracy": 0.6704425711275026, "eval_loss": 0.8844485282897949, "eval_runtime": 41.7229, "eval_samples_per_second": 90.981, "eval_steps_per_second": 11.385, "step": 280 }, { "epoch": 0.15, "learning_rate": 0.00018472075869336145, "loss": 0.93, "step": 290 }, { "epoch": 0.16, "learning_rate": 0.00018419388830347736, "loss": 0.9324, "step": 300 }, { "epoch": 0.16, "learning_rate": 0.00018366701791359326, "loss": 0.9806, "step": 310 }, { "epoch": 0.17, "learning_rate": 0.0001831401475237092, "loss": 0.8979, "step": 320 }, { "epoch": 0.17, "eval_accuracy": 0.6862486828240253, "eval_loss": 0.8492920994758606, "eval_runtime": 42.0264, "eval_samples_per_second": 90.324, "eval_steps_per_second": 11.302, "step": 320 }, { "epoch": 0.17, "learning_rate": 0.00018261327713382507, "loss": 0.7995, "step": 330 }, { "epoch": 0.18, "learning_rate": 0.000182086406743941, "loss": 0.8829, "step": 340 }, { "epoch": 0.18, "learning_rate": 0.00018155953635405693, "loss": 0.8794, "step": 350 }, { "epoch": 0.19, "learning_rate": 0.00018103266596417283, "loss": 1.0743, "step": 360 }, { "epoch": 0.19, "eval_accuracy": 0.6680716543730242, "eval_loss": 0.910214900970459, "eval_runtime": 41.8529, "eval_samples_per_second": 90.699, "eval_steps_per_second": 11.349, "step": 360 }, { "epoch": 0.19, "learning_rate": 0.00018050579557428873, "loss": 0.9903, "step": 370 }, { "epoch": 0.2, "learning_rate": 0.00017997892518440464, "loss": 0.9447, "step": 380 }, { "epoch": 0.21, "learning_rate": 0.00017945205479452057, "loss": 0.9659, "step": 390 }, { "epoch": 0.21, "learning_rate": 0.00017892518440463647, "loss": 0.8767, "step": 400 }, { "epoch": 0.21, "eval_accuracy": 0.6290832455216017, "eval_loss": 0.9642680287361145, "eval_runtime": 41.7862, "eval_samples_per_second": 90.843, "eval_steps_per_second": 11.367, "step": 400 }, { "epoch": 0.22, "learning_rate": 0.00017839831401475237, "loss": 1.0593, "step": 410 }, { "epoch": 0.22, "learning_rate": 0.0001778714436248683, "loss": 0.8566, "step": 420 }, { "epoch": 0.23, "learning_rate": 0.0001773445732349842, "loss": 0.8882, "step": 430 }, { "epoch": 0.23, "learning_rate": 0.0001768177028451001, "loss": 0.9381, "step": 440 }, { "epoch": 0.23, "eval_accuracy": 0.6383034773445733, "eval_loss": 0.9608025550842285, "eval_runtime": 41.7191, "eval_samples_per_second": 90.99, "eval_steps_per_second": 11.386, "step": 440 }, { "epoch": 0.24, "learning_rate": 0.00017629083245521601, "loss": 0.9577, "step": 450 }, { "epoch": 0.24, "learning_rate": 0.00017576396206533195, "loss": 0.8785, "step": 460 }, { "epoch": 0.25, "learning_rate": 0.00017523709167544785, "loss": 0.7606, "step": 470 }, { "epoch": 0.25, "learning_rate": 0.00017471022128556375, "loss": 1.1457, "step": 480 }, { "epoch": 0.25, "eval_accuracy": 0.6654373024236038, "eval_loss": 0.8898976445198059, "eval_runtime": 41.808, "eval_samples_per_second": 90.796, "eval_steps_per_second": 11.361, "step": 480 }, { "epoch": 0.26, "learning_rate": 0.00017418335089567968, "loss": 0.9, "step": 490 }, { "epoch": 0.26, "learning_rate": 0.00017365648050579559, "loss": 0.7624, "step": 500 }, { "epoch": 0.27, "learning_rate": 0.0001731296101159115, "loss": 0.819, "step": 510 }, { "epoch": 0.27, "learning_rate": 0.00017260273972602742, "loss": 0.8516, "step": 520 }, { "epoch": 0.27, "eval_accuracy": 0.6791359325605901, "eval_loss": 0.8746893405914307, "eval_runtime": 41.7582, "eval_samples_per_second": 90.904, "eval_steps_per_second": 11.375, "step": 520 }, { "epoch": 0.28, "learning_rate": 0.00017207586933614332, "loss": 0.9307, "step": 530 }, { "epoch": 0.28, "learning_rate": 0.00017154899894625923, "loss": 0.8867, "step": 540 }, { "epoch": 0.29, "learning_rate": 0.00017102212855637513, "loss": 0.8658, "step": 550 }, { "epoch": 0.3, "learning_rate": 0.00017049525816649106, "loss": 0.8935, "step": 560 }, { "epoch": 0.3, "eval_accuracy": 0.6699157007376185, "eval_loss": 0.8650425672531128, "eval_runtime": 42.0967, "eval_samples_per_second": 90.173, "eval_steps_per_second": 11.284, "step": 560 }, { "epoch": 0.3, "learning_rate": 0.00016996838777660696, "loss": 0.958, "step": 570 }, { "epoch": 0.31, "learning_rate": 0.00016944151738672287, "loss": 0.8805, "step": 580 }, { "epoch": 0.31, "learning_rate": 0.0001689146469968388, "loss": 0.9066, "step": 590 }, { "epoch": 0.32, "learning_rate": 0.0001683877766069547, "loss": 0.8468, "step": 600 }, { "epoch": 0.32, "eval_accuracy": 0.7268177028451, "eval_loss": 0.7727631330490112, "eval_runtime": 41.8106, "eval_samples_per_second": 90.79, "eval_steps_per_second": 11.361, "step": 600 }, { "epoch": 0.32, "learning_rate": 0.0001678609062170706, "loss": 0.797, "step": 610 }, { "epoch": 0.33, "learning_rate": 0.0001673340358271865, "loss": 0.8564, "step": 620 }, { "epoch": 0.33, "learning_rate": 0.00016680716543730244, "loss": 0.7341, "step": 630 }, { "epoch": 0.34, "learning_rate": 0.00016628029504741834, "loss": 0.829, "step": 640 }, { "epoch": 0.34, "eval_accuracy": 0.6978398314014752, "eval_loss": 0.8096127510070801, "eval_runtime": 42.0874, "eval_samples_per_second": 90.193, "eval_steps_per_second": 11.286, "step": 640 }, { "epoch": 0.34, "learning_rate": 0.00016575342465753425, "loss": 0.7701, "step": 650 }, { "epoch": 0.35, "learning_rate": 0.00016522655426765018, "loss": 0.763, "step": 660 }, { "epoch": 0.35, "learning_rate": 0.00016469968387776608, "loss": 0.7975, "step": 670 }, { "epoch": 0.36, "learning_rate": 0.00016417281348788198, "loss": 0.7611, "step": 680 }, { "epoch": 0.36, "eval_accuracy": 0.7168071654373024, "eval_loss": 0.774563193321228, "eval_runtime": 41.9532, "eval_samples_per_second": 90.482, "eval_steps_per_second": 11.322, "step": 680 }, { "epoch": 0.36, "learning_rate": 0.0001636459430979979, "loss": 0.8112, "step": 690 }, { "epoch": 0.37, "learning_rate": 0.00016311907270811382, "loss": 0.7897, "step": 700 }, { "epoch": 0.37, "learning_rate": 0.00016259220231822972, "loss": 0.8332, "step": 710 }, { "epoch": 0.38, "learning_rate": 0.00016206533192834562, "loss": 0.8072, "step": 720 }, { "epoch": 0.38, "eval_accuracy": 0.7067966280295047, "eval_loss": 0.802047848701477, "eval_runtime": 41.8602, "eval_samples_per_second": 90.683, "eval_steps_per_second": 11.347, "step": 720 }, { "epoch": 0.38, "learning_rate": 0.00016153846153846155, "loss": 0.7557, "step": 730 }, { "epoch": 0.39, "learning_rate": 0.00016101159114857746, "loss": 0.8949, "step": 740 }, { "epoch": 0.4, "learning_rate": 0.00016048472075869336, "loss": 0.7729, "step": 750 }, { "epoch": 0.4, "learning_rate": 0.0001599578503688093, "loss": 0.8402, "step": 760 }, { "epoch": 0.4, "eval_accuracy": 0.6983667017913593, "eval_loss": 0.8181519508361816, "eval_runtime": 42.0502, "eval_samples_per_second": 90.273, "eval_steps_per_second": 11.296, "step": 760 }, { "epoch": 0.41, "learning_rate": 0.0001594309799789252, "loss": 0.869, "step": 770 }, { "epoch": 0.41, "learning_rate": 0.0001589041095890411, "loss": 0.8356, "step": 780 }, { "epoch": 0.42, "learning_rate": 0.000158377239199157, "loss": 0.9184, "step": 790 }, { "epoch": 0.42, "learning_rate": 0.00015785036880927293, "loss": 0.6367, "step": 800 }, { "epoch": 0.42, "eval_accuracy": 0.7397260273972602, "eval_loss": 0.7236610054969788, "eval_runtime": 41.5948, "eval_samples_per_second": 91.261, "eval_steps_per_second": 11.42, "step": 800 }, { "epoch": 0.43, "learning_rate": 0.00015732349841938883, "loss": 0.7656, "step": 810 }, { "epoch": 0.43, "learning_rate": 0.00015679662802950474, "loss": 0.8507, "step": 820 }, { "epoch": 0.44, "learning_rate": 0.00015626975763962067, "loss": 0.7213, "step": 830 }, { "epoch": 0.44, "learning_rate": 0.00015574288724973657, "loss": 0.7094, "step": 840 }, { "epoch": 0.44, "eval_accuracy": 0.7336670179135932, "eval_loss": 0.75114905834198, "eval_runtime": 41.1659, "eval_samples_per_second": 92.212, "eval_steps_per_second": 11.539, "step": 840 }, { "epoch": 0.45, "learning_rate": 0.00015521601685985248, "loss": 0.7631, "step": 850 }, { "epoch": 0.45, "learning_rate": 0.0001546891464699684, "loss": 0.7045, "step": 860 }, { "epoch": 0.46, "learning_rate": 0.0001541622760800843, "loss": 0.7582, "step": 870 }, { "epoch": 0.46, "learning_rate": 0.0001536354056902002, "loss": 0.8905, "step": 880 }, { "epoch": 0.46, "eval_accuracy": 0.7489462592202318, "eval_loss": 0.6889460682868958, "eval_runtime": 41.1832, "eval_samples_per_second": 92.173, "eval_steps_per_second": 11.534, "step": 880 }, { "epoch": 0.47, "learning_rate": 0.00015310853530031612, "loss": 0.7429, "step": 890 }, { "epoch": 0.47, "learning_rate": 0.00015258166491043205, "loss": 0.7355, "step": 900 }, { "epoch": 0.48, "learning_rate": 0.00015205479452054795, "loss": 0.7525, "step": 910 }, { "epoch": 0.48, "learning_rate": 0.00015152792413066385, "loss": 0.8386, "step": 920 }, { "epoch": 0.48, "eval_accuracy": 0.7178609062170705, "eval_loss": 0.7636825442314148, "eval_runtime": 41.2051, "eval_samples_per_second": 92.124, "eval_steps_per_second": 11.528, "step": 920 }, { "epoch": 0.49, "learning_rate": 0.00015100105374077978, "loss": 0.7451, "step": 930 }, { "epoch": 0.5, "learning_rate": 0.0001504741833508957, "loss": 0.6861, "step": 940 }, { "epoch": 0.5, "learning_rate": 0.0001499473129610116, "loss": 0.7938, "step": 950 }, { "epoch": 0.51, "learning_rate": 0.0001494204425711275, "loss": 0.7564, "step": 960 }, { "epoch": 0.51, "eval_accuracy": 0.7365648050579557, "eval_loss": 0.7086778283119202, "eval_runtime": 41.133, "eval_samples_per_second": 92.286, "eval_steps_per_second": 11.548, "step": 960 }, { "epoch": 0.51, "learning_rate": 0.00014889357218124342, "loss": 0.7699, "step": 970 }, { "epoch": 0.52, "learning_rate": 0.00014836670179135933, "loss": 0.754, "step": 980 }, { "epoch": 0.52, "learning_rate": 0.00014783983140147523, "loss": 0.7126, "step": 990 }, { "epoch": 0.53, "learning_rate": 0.00014731296101159116, "loss": 0.7927, "step": 1000 }, { "epoch": 0.53, "eval_accuracy": 0.7307692307692307, "eval_loss": 0.7288175225257874, "eval_runtime": 40.9893, "eval_samples_per_second": 92.609, "eval_steps_per_second": 11.588, "step": 1000 }, { "epoch": 0.53, "learning_rate": 0.00014678609062170707, "loss": 0.8823, "step": 1010 }, { "epoch": 0.54, "learning_rate": 0.00014625922023182297, "loss": 0.6232, "step": 1020 }, { "epoch": 0.54, "learning_rate": 0.0001457323498419389, "loss": 0.7342, "step": 1030 }, { "epoch": 0.55, "learning_rate": 0.0001452054794520548, "loss": 0.7456, "step": 1040 }, { "epoch": 0.55, "eval_accuracy": 0.7494731296101159, "eval_loss": 0.7018606662750244, "eval_runtime": 41.1744, "eval_samples_per_second": 92.193, "eval_steps_per_second": 11.536, "step": 1040 }, { "epoch": 0.55, "learning_rate": 0.00014467860906217073, "loss": 0.7618, "step": 1050 }, { "epoch": 0.56, "learning_rate": 0.0001441517386722866, "loss": 0.6762, "step": 1060 }, { "epoch": 0.56, "learning_rate": 0.00014362486828240254, "loss": 0.7244, "step": 1070 }, { "epoch": 0.57, "learning_rate": 0.00014309799789251844, "loss": 0.6604, "step": 1080 }, { "epoch": 0.57, "eval_accuracy": 0.7386722866174921, "eval_loss": 0.737483561038971, "eval_runtime": 41.1001, "eval_samples_per_second": 92.36, "eval_steps_per_second": 11.557, "step": 1080 }, { "epoch": 0.57, "learning_rate": 0.00014257112750263435, "loss": 0.8146, "step": 1090 }, { "epoch": 0.58, "learning_rate": 0.00014204425711275028, "loss": 0.7946, "step": 1100 }, { "epoch": 0.58, "learning_rate": 0.00014151738672286618, "loss": 0.8021, "step": 1110 }, { "epoch": 0.59, "learning_rate": 0.0001409905163329821, "loss": 0.6272, "step": 1120 }, { "epoch": 0.59, "eval_accuracy": 0.755795574288725, "eval_loss": 0.6697063446044922, "eval_runtime": 41.0445, "eval_samples_per_second": 92.485, "eval_steps_per_second": 11.573, "step": 1120 }, { "epoch": 0.6, "learning_rate": 0.000140463645943098, "loss": 0.7822, "step": 1130 }, { "epoch": 0.6, "learning_rate": 0.00013993677555321392, "loss": 0.5635, "step": 1140 }, { "epoch": 0.61, "learning_rate": 0.00013940990516332982, "loss": 0.8475, "step": 1150 }, { "epoch": 0.61, "learning_rate": 0.00013888303477344572, "loss": 0.6899, "step": 1160 }, { "epoch": 0.61, "eval_accuracy": 0.7510537407797682, "eval_loss": 0.6927923560142517, "eval_runtime": 41.0153, "eval_samples_per_second": 92.551, "eval_steps_per_second": 11.581, "step": 1160 }, { "epoch": 0.62, "learning_rate": 0.00013835616438356166, "loss": 0.6789, "step": 1170 }, { "epoch": 0.62, "learning_rate": 0.00013782929399367756, "loss": 0.7409, "step": 1180 }, { "epoch": 0.63, "learning_rate": 0.0001373024236037935, "loss": 0.6629, "step": 1190 }, { "epoch": 0.63, "learning_rate": 0.0001367755532139094, "loss": 0.7612, "step": 1200 }, { "epoch": 0.63, "eval_accuracy": 0.7726554267650158, "eval_loss": 0.6404809355735779, "eval_runtime": 40.9053, "eval_samples_per_second": 92.8, "eval_steps_per_second": 11.612, "step": 1200 }, { "epoch": 0.64, "learning_rate": 0.0001362486828240253, "loss": 0.727, "step": 1210 }, { "epoch": 0.64, "learning_rate": 0.00013572181243414123, "loss": 0.6921, "step": 1220 }, { "epoch": 0.65, "learning_rate": 0.0001351949420442571, "loss": 0.5298, "step": 1230 }, { "epoch": 0.65, "learning_rate": 0.00013466807165437303, "loss": 0.5398, "step": 1240 }, { "epoch": 0.65, "eval_accuracy": 0.7673867228661749, "eval_loss": 0.6329030394554138, "eval_runtime": 41.1422, "eval_samples_per_second": 92.265, "eval_steps_per_second": 11.545, "step": 1240 }, { "epoch": 0.66, "learning_rate": 0.00013414120126448894, "loss": 0.6673, "step": 1250 }, { "epoch": 0.66, "learning_rate": 0.00013361433087460484, "loss": 0.682, "step": 1260 }, { "epoch": 0.67, "learning_rate": 0.00013308746048472077, "loss": 0.6457, "step": 1270 }, { "epoch": 0.67, "learning_rate": 0.00013256059009483667, "loss": 0.5942, "step": 1280 }, { "epoch": 0.67, "eval_accuracy": 0.7481559536354057, "eval_loss": 0.6839689016342163, "eval_runtime": 41.0621, "eval_samples_per_second": 92.445, "eval_steps_per_second": 11.568, "step": 1280 }, { "epoch": 0.68, "learning_rate": 0.0001320337197049526, "loss": 0.5086, "step": 1290 }, { "epoch": 0.68, "learning_rate": 0.00013150684931506848, "loss": 0.6658, "step": 1300 }, { "epoch": 0.69, "learning_rate": 0.0001309799789251844, "loss": 0.7157, "step": 1310 }, { "epoch": 0.7, "learning_rate": 0.00013045310853530031, "loss": 0.5924, "step": 1320 }, { "epoch": 0.7, "eval_accuracy": 0.7795047418335089, "eval_loss": 0.6245933175086975, "eval_runtime": 40.9868, "eval_samples_per_second": 92.615, "eval_steps_per_second": 11.589, "step": 1320 }, { "epoch": 0.7, "learning_rate": 0.00012992623814541622, "loss": 0.6697, "step": 1330 }, { "epoch": 0.71, "learning_rate": 0.00012939936775553215, "loss": 0.8167, "step": 1340 }, { "epoch": 0.71, "learning_rate": 0.00012887249736564805, "loss": 0.641, "step": 1350 }, { "epoch": 0.72, "learning_rate": 0.00012834562697576398, "loss": 0.7035, "step": 1360 }, { "epoch": 0.72, "eval_accuracy": 0.7710748155953635, "eval_loss": 0.6422901153564453, "eval_runtime": 41.0116, "eval_samples_per_second": 92.559, "eval_steps_per_second": 11.582, "step": 1360 }, { "epoch": 0.72, "learning_rate": 0.00012781875658587989, "loss": 0.6054, "step": 1370 }, { "epoch": 0.73, "learning_rate": 0.0001272918861959958, "loss": 0.5799, "step": 1380 }, { "epoch": 0.73, "learning_rate": 0.00012676501580611172, "loss": 0.6575, "step": 1390 }, { "epoch": 0.74, "learning_rate": 0.0001262381454162276, "loss": 0.7114, "step": 1400 }, { "epoch": 0.74, "eval_accuracy": 0.7755532139093783, "eval_loss": 0.6289725303649902, "eval_runtime": 41.0218, "eval_samples_per_second": 92.536, "eval_steps_per_second": 11.579, "step": 1400 }, { "epoch": 0.74, "learning_rate": 0.00012571127502634353, "loss": 0.6659, "step": 1410 }, { "epoch": 0.75, "learning_rate": 0.00012518440463645943, "loss": 0.5519, "step": 1420 }, { "epoch": 0.75, "learning_rate": 0.00012465753424657536, "loss": 0.7071, "step": 1430 }, { "epoch": 0.76, "learning_rate": 0.00012413066385669126, "loss": 0.477, "step": 1440 }, { "epoch": 0.76, "eval_accuracy": 0.7895152792413066, "eval_loss": 0.5919615030288696, "eval_runtime": 41.0316, "eval_samples_per_second": 92.514, "eval_steps_per_second": 11.576, "step": 1440 }, { "epoch": 0.76, "learning_rate": 0.00012360379346680717, "loss": 0.5786, "step": 1450 }, { "epoch": 0.77, "learning_rate": 0.0001230769230769231, "loss": 0.624, "step": 1460 }, { "epoch": 0.77, "learning_rate": 0.00012255005268703897, "loss": 0.5879, "step": 1470 }, { "epoch": 0.78, "learning_rate": 0.0001220231822971549, "loss": 0.6203, "step": 1480 }, { "epoch": 0.78, "eval_accuracy": 0.7881981032665965, "eval_loss": 0.6011049747467041, "eval_runtime": 41.3051, "eval_samples_per_second": 91.902, "eval_steps_per_second": 11.5, "step": 1480 }, { "epoch": 0.79, "learning_rate": 0.00012149631190727082, "loss": 0.6142, "step": 1490 }, { "epoch": 0.79, "learning_rate": 0.00012096944151738674, "loss": 0.7426, "step": 1500 }, { "epoch": 0.8, "learning_rate": 0.00012044257112750264, "loss": 0.5851, "step": 1510 }, { "epoch": 0.8, "learning_rate": 0.00011991570073761856, "loss": 0.4557, "step": 1520 }, { "epoch": 0.8, "eval_accuracy": 0.773972602739726, "eval_loss": 0.6244290471076965, "eval_runtime": 41.0912, "eval_samples_per_second": 92.38, "eval_steps_per_second": 11.56, "step": 1520 }, { "epoch": 0.81, "learning_rate": 0.00011938883034773448, "loss": 0.7113, "step": 1530 }, { "epoch": 0.81, "learning_rate": 0.00011886195995785037, "loss": 0.4499, "step": 1540 }, { "epoch": 0.82, "learning_rate": 0.00011833508956796628, "loss": 0.7979, "step": 1550 }, { "epoch": 0.82, "learning_rate": 0.0001178082191780822, "loss": 0.6389, "step": 1560 }, { "epoch": 0.82, "eval_accuracy": 0.7968914646996839, "eval_loss": 0.5629897117614746, "eval_runtime": 41.1832, "eval_samples_per_second": 92.174, "eval_steps_per_second": 11.534, "step": 1560 }, { "epoch": 0.83, "learning_rate": 0.00011728134878819812, "loss": 0.5088, "step": 1570 }, { "epoch": 0.83, "learning_rate": 0.00011675447839831402, "loss": 0.5226, "step": 1580 }, { "epoch": 0.84, "learning_rate": 0.00011622760800842994, "loss": 0.7143, "step": 1590 }, { "epoch": 0.84, "learning_rate": 0.00011570073761854585, "loss": 0.6855, "step": 1600 }, { "epoch": 0.84, "eval_accuracy": 0.7966280295047419, "eval_loss": 0.570618748664856, "eval_runtime": 41.1893, "eval_samples_per_second": 92.16, "eval_steps_per_second": 11.532, "step": 1600 }, { "epoch": 0.85, "learning_rate": 0.00011517386722866174, "loss": 0.5177, "step": 1610 }, { "epoch": 0.85, "learning_rate": 0.00011464699683877766, "loss": 0.6428, "step": 1620 }, { "epoch": 0.86, "learning_rate": 0.00011412012644889358, "loss": 0.6207, "step": 1630 }, { "epoch": 0.86, "learning_rate": 0.00011359325605900948, "loss": 0.5935, "step": 1640 }, { "epoch": 0.86, "eval_accuracy": 0.8071654373024236, "eval_loss": 0.553897500038147, "eval_runtime": 41.1107, "eval_samples_per_second": 92.336, "eval_steps_per_second": 11.554, "step": 1640 }, { "epoch": 0.87, "learning_rate": 0.0001130663856691254, "loss": 0.5959, "step": 1650 }, { "epoch": 0.87, "learning_rate": 0.00011253951527924131, "loss": 0.6435, "step": 1660 }, { "epoch": 0.88, "learning_rate": 0.00011201264488935723, "loss": 0.6994, "step": 1670 }, { "epoch": 0.89, "learning_rate": 0.00011148577449947313, "loss": 0.6779, "step": 1680 }, { "epoch": 0.89, "eval_accuracy": 0.7829293993677555, "eval_loss": 0.5908519625663757, "eval_runtime": 40.8752, "eval_samples_per_second": 92.868, "eval_steps_per_second": 11.621, "step": 1680 }, { "epoch": 0.89, "learning_rate": 0.00011095890410958905, "loss": 0.6209, "step": 1690 }, { "epoch": 0.9, "learning_rate": 0.00011043203371970497, "loss": 0.4842, "step": 1700 }, { "epoch": 0.9, "learning_rate": 0.00010990516332982086, "loss": 0.6179, "step": 1710 }, { "epoch": 0.91, "learning_rate": 0.00010937829293993678, "loss": 0.5032, "step": 1720 }, { "epoch": 0.91, "eval_accuracy": 0.8174394099051633, "eval_loss": 0.5368214249610901, "eval_runtime": 40.9272, "eval_samples_per_second": 92.75, "eval_steps_per_second": 11.606, "step": 1720 }, { "epoch": 0.91, "learning_rate": 0.00010885142255005269, "loss": 0.4929, "step": 1730 }, { "epoch": 0.92, "learning_rate": 0.00010832455216016861, "loss": 0.5158, "step": 1740 }, { "epoch": 0.92, "learning_rate": 0.00010779768177028451, "loss": 0.5903, "step": 1750 }, { "epoch": 0.93, "learning_rate": 0.00010727081138040043, "loss": 0.5604, "step": 1760 }, { "epoch": 0.93, "eval_accuracy": 0.773709167544784, "eval_loss": 0.6411083936691284, "eval_runtime": 40.9594, "eval_samples_per_second": 92.677, "eval_steps_per_second": 11.597, "step": 1760 }, { "epoch": 0.93, "learning_rate": 0.00010674394099051635, "loss": 0.7767, "step": 1770 }, { "epoch": 0.94, "learning_rate": 0.00010621707060063224, "loss": 0.6115, "step": 1780 }, { "epoch": 0.94, "learning_rate": 0.00010569020021074815, "loss": 0.49, "step": 1790 }, { "epoch": 0.95, "learning_rate": 0.00010516332982086407, "loss": 0.5398, "step": 1800 }, { "epoch": 0.95, "eval_accuracy": 0.7642255005268704, "eval_loss": 0.6474949717521667, "eval_runtime": 41.1724, "eval_samples_per_second": 92.198, "eval_steps_per_second": 11.537, "step": 1800 }, { "epoch": 0.95, "learning_rate": 0.00010463645943097999, "loss": 0.7346, "step": 1810 }, { "epoch": 0.96, "learning_rate": 0.00010410958904109589, "loss": 0.5711, "step": 1820 }, { "epoch": 0.96, "learning_rate": 0.00010358271865121181, "loss": 0.4447, "step": 1830 }, { "epoch": 0.97, "learning_rate": 0.00010305584826132772, "loss": 0.5243, "step": 1840 }, { "epoch": 0.97, "eval_accuracy": 0.7829293993677555, "eval_loss": 0.5976884365081787, "eval_runtime": 41.1462, "eval_samples_per_second": 92.256, "eval_steps_per_second": 11.544, "step": 1840 }, { "epoch": 0.97, "learning_rate": 0.00010252897787144363, "loss": 0.6316, "step": 1850 }, { "epoch": 0.98, "learning_rate": 0.00010200210748155954, "loss": 0.4889, "step": 1860 }, { "epoch": 0.99, "learning_rate": 0.00010147523709167546, "loss": 0.5548, "step": 1870 }, { "epoch": 0.99, "learning_rate": 0.00010094836670179138, "loss": 0.555, "step": 1880 }, { "epoch": 0.99, "eval_accuracy": 0.8076923076923077, "eval_loss": 0.5374019742012024, "eval_runtime": 41.2791, "eval_samples_per_second": 91.959, "eval_steps_per_second": 11.507, "step": 1880 }, { "epoch": 1.0, "learning_rate": 0.00010042149631190727, "loss": 0.5253, "step": 1890 }, { "epoch": 1.0, "learning_rate": 9.989462592202319e-05, "loss": 0.4858, "step": 1900 }, { "epoch": 1.01, "learning_rate": 9.936775553213909e-05, "loss": 0.4391, "step": 1910 }, { "epoch": 1.01, "learning_rate": 9.884088514225502e-05, "loss": 0.3991, "step": 1920 }, { "epoch": 1.01, "eval_accuracy": 0.8079557428872497, "eval_loss": 0.5598599910736084, "eval_runtime": 41.1706, "eval_samples_per_second": 92.202, "eval_steps_per_second": 11.537, "step": 1920 }, { "epoch": 1.02, "learning_rate": 9.831401475237092e-05, "loss": 0.4727, "step": 1930 }, { "epoch": 1.02, "learning_rate": 9.778714436248684e-05, "loss": 0.4029, "step": 1940 }, { "epoch": 1.03, "learning_rate": 9.726027397260274e-05, "loss": 0.4803, "step": 1950 }, { "epoch": 1.03, "learning_rate": 9.673340358271865e-05, "loss": 0.418, "step": 1960 }, { "epoch": 1.03, "eval_accuracy": 0.8308746048472075, "eval_loss": 0.5048983693122864, "eval_runtime": 41.4141, "eval_samples_per_second": 91.66, "eval_steps_per_second": 11.47, "step": 1960 }, { "epoch": 1.04, "learning_rate": 9.620653319283456e-05, "loss": 0.3603, "step": 1970 }, { "epoch": 1.04, "learning_rate": 9.567966280295048e-05, "loss": 0.3883, "step": 1980 }, { "epoch": 1.05, "learning_rate": 9.51527924130664e-05, "loss": 0.3467, "step": 1990 }, { "epoch": 1.05, "learning_rate": 9.46259220231823e-05, "loss": 0.4145, "step": 2000 }, { "epoch": 1.05, "eval_accuracy": 0.8116438356164384, "eval_loss": 0.5375648140907288, "eval_runtime": 41.1926, "eval_samples_per_second": 92.152, "eval_steps_per_second": 11.531, "step": 2000 }, { "epoch": 1.06, "learning_rate": 9.409905163329822e-05, "loss": 0.4242, "step": 2010 }, { "epoch": 1.06, "learning_rate": 9.357218124341412e-05, "loss": 0.3996, "step": 2020 }, { "epoch": 1.07, "learning_rate": 9.304531085353004e-05, "loss": 0.2548, "step": 2030 }, { "epoch": 1.07, "learning_rate": 9.251844046364595e-05, "loss": 0.4141, "step": 2040 }, { "epoch": 1.07, "eval_accuracy": 0.8229715489989463, "eval_loss": 0.5164880752563477, "eval_runtime": 41.0208, "eval_samples_per_second": 92.538, "eval_steps_per_second": 11.579, "step": 2040 }, { "epoch": 1.08, "learning_rate": 9.199157007376186e-05, "loss": 0.3818, "step": 2050 }, { "epoch": 1.09, "learning_rate": 9.146469968387778e-05, "loss": 0.3252, "step": 2060 }, { "epoch": 1.09, "learning_rate": 9.093782929399368e-05, "loss": 0.3749, "step": 2070 }, { "epoch": 1.1, "learning_rate": 9.041095890410958e-05, "loss": 0.3729, "step": 2080 }, { "epoch": 1.1, "eval_accuracy": 0.8303477344573235, "eval_loss": 0.5023428201675415, "eval_runtime": 41.3017, "eval_samples_per_second": 91.909, "eval_steps_per_second": 11.501, "step": 2080 }, { "epoch": 1.1, "learning_rate": 8.988408851422551e-05, "loss": 0.4618, "step": 2090 }, { "epoch": 1.11, "learning_rate": 8.935721812434142e-05, "loss": 0.3952, "step": 2100 }, { "epoch": 1.11, "learning_rate": 8.883034773445733e-05, "loss": 0.3239, "step": 2110 }, { "epoch": 1.12, "learning_rate": 8.830347734457324e-05, "loss": 0.4594, "step": 2120 }, { "epoch": 1.12, "eval_accuracy": 0.8374604847207587, "eval_loss": 0.4935886263847351, "eval_runtime": 42.0509, "eval_samples_per_second": 90.271, "eval_steps_per_second": 11.296, "step": 2120 }, { "epoch": 1.12, "learning_rate": 8.777660695468915e-05, "loss": 0.4036, "step": 2130 }, { "epoch": 1.13, "learning_rate": 8.724973656480506e-05, "loss": 0.2812, "step": 2140 }, { "epoch": 1.13, "learning_rate": 8.672286617492097e-05, "loss": 0.3308, "step": 2150 }, { "epoch": 1.14, "learning_rate": 8.619599578503689e-05, "loss": 0.3766, "step": 2160 }, { "epoch": 1.14, "eval_accuracy": 0.833245521601686, "eval_loss": 0.5025116801261902, "eval_runtime": 41.7352, "eval_samples_per_second": 90.954, "eval_steps_per_second": 11.381, "step": 2160 }, { "epoch": 1.14, "learning_rate": 8.56691253951528e-05, "loss": 0.2861, "step": 2170 }, { "epoch": 1.15, "learning_rate": 8.514225500526871e-05, "loss": 0.3525, "step": 2180 }, { "epoch": 1.15, "learning_rate": 8.461538461538461e-05, "loss": 0.4213, "step": 2190 }, { "epoch": 1.16, "learning_rate": 8.408851422550053e-05, "loss": 0.3628, "step": 2200 }, { "epoch": 1.16, "eval_accuracy": 0.8285036880927292, "eval_loss": 0.5210418105125427, "eval_runtime": 41.7139, "eval_samples_per_second": 91.001, "eval_steps_per_second": 11.387, "step": 2200 }, { "epoch": 1.16, "learning_rate": 8.356164383561645e-05, "loss": 0.5404, "step": 2210 }, { "epoch": 1.17, "learning_rate": 8.303477344573235e-05, "loss": 0.383, "step": 2220 }, { "epoch": 1.17, "learning_rate": 8.250790305584827e-05, "loss": 0.3332, "step": 2230 }, { "epoch": 1.18, "learning_rate": 8.198103266596417e-05, "loss": 0.4392, "step": 2240 }, { "epoch": 1.18, "eval_accuracy": 0.8387776606954689, "eval_loss": 0.49416255950927734, "eval_runtime": 41.5117, "eval_samples_per_second": 91.444, "eval_steps_per_second": 11.443, "step": 2240 }, { "epoch": 1.19, "learning_rate": 8.145416227608009e-05, "loss": 0.3811, "step": 2250 }, { "epoch": 1.19, "learning_rate": 8.0927291886196e-05, "loss": 0.3708, "step": 2260 }, { "epoch": 1.2, "learning_rate": 8.040042149631191e-05, "loss": 0.5215, "step": 2270 }, { "epoch": 1.2, "learning_rate": 7.987355110642783e-05, "loss": 0.4257, "step": 2280 }, { "epoch": 1.2, "eval_accuracy": 0.8300842992623815, "eval_loss": 0.49942925572395325, "eval_runtime": 41.6212, "eval_samples_per_second": 91.203, "eval_steps_per_second": 11.412, "step": 2280 }, { "epoch": 1.21, "learning_rate": 7.934668071654373e-05, "loss": 0.4234, "step": 2290 }, { "epoch": 1.21, "learning_rate": 7.881981032665965e-05, "loss": 0.3984, "step": 2300 }, { "epoch": 1.22, "learning_rate": 7.829293993677555e-05, "loss": 0.4514, "step": 2310 }, { "epoch": 1.22, "learning_rate": 7.776606954689147e-05, "loss": 0.3442, "step": 2320 }, { "epoch": 1.22, "eval_accuracy": 0.8403582718651211, "eval_loss": 0.4866703748703003, "eval_runtime": 41.5299, "eval_samples_per_second": 91.404, "eval_steps_per_second": 11.438, "step": 2320 }, { "epoch": 1.23, "learning_rate": 7.723919915700738e-05, "loss": 0.3327, "step": 2330 }, { "epoch": 1.23, "learning_rate": 7.671232876712329e-05, "loss": 0.4527, "step": 2340 }, { "epoch": 1.24, "learning_rate": 7.61854583772392e-05, "loss": 0.3862, "step": 2350 }, { "epoch": 1.24, "learning_rate": 7.565858798735511e-05, "loss": 0.4008, "step": 2360 }, { "epoch": 1.24, "eval_accuracy": 0.8163856691253951, "eval_loss": 0.5492302775382996, "eval_runtime": 41.4705, "eval_samples_per_second": 91.535, "eval_steps_per_second": 11.454, "step": 2360 }, { "epoch": 1.25, "learning_rate": 7.513171759747102e-05, "loss": 0.4259, "step": 2370 }, { "epoch": 1.25, "learning_rate": 7.460484720758694e-05, "loss": 0.3364, "step": 2380 }, { "epoch": 1.26, "learning_rate": 7.407797681770284e-05, "loss": 0.3355, "step": 2390 }, { "epoch": 1.26, "learning_rate": 7.355110642781876e-05, "loss": 0.3541, "step": 2400 }, { "epoch": 1.26, "eval_accuracy": 0.8335089567966281, "eval_loss": 0.5003746151924133, "eval_runtime": 41.6532, "eval_samples_per_second": 91.134, "eval_steps_per_second": 11.404, "step": 2400 }, { "epoch": 1.27, "learning_rate": 7.302423603793467e-05, "loss": 0.3951, "step": 2410 }, { "epoch": 1.28, "learning_rate": 7.249736564805058e-05, "loss": 0.3318, "step": 2420 }, { "epoch": 1.28, "learning_rate": 7.19704952581665e-05, "loss": 0.3423, "step": 2430 }, { "epoch": 1.29, "learning_rate": 7.144362486828242e-05, "loss": 0.3842, "step": 2440 }, { "epoch": 1.29, "eval_accuracy": 0.8435194942044257, "eval_loss": 0.46031510829925537, "eval_runtime": 41.7276, "eval_samples_per_second": 90.971, "eval_steps_per_second": 11.383, "step": 2440 }, { "epoch": 1.29, "learning_rate": 7.091675447839832e-05, "loss": 0.4708, "step": 2450 }, { "epoch": 1.3, "learning_rate": 7.038988408851422e-05, "loss": 0.2436, "step": 2460 }, { "epoch": 1.3, "learning_rate": 6.986301369863014e-05, "loss": 0.5203, "step": 2470 }, { "epoch": 1.31, "learning_rate": 6.933614330874604e-05, "loss": 0.3398, "step": 2480 }, { "epoch": 1.31, "eval_accuracy": 0.8519494204425712, "eval_loss": 0.44302287697792053, "eval_runtime": 41.6802, "eval_samples_per_second": 91.074, "eval_steps_per_second": 11.396, "step": 2480 }, { "epoch": 1.31, "learning_rate": 6.880927291886196e-05, "loss": 0.2589, "step": 2490 }, { "epoch": 1.32, "learning_rate": 6.828240252897788e-05, "loss": 0.3447, "step": 2500 }, { "epoch": 1.32, "learning_rate": 6.77555321390938e-05, "loss": 0.2995, "step": 2510 }, { "epoch": 1.33, "learning_rate": 6.72286617492097e-05, "loss": 0.3823, "step": 2520 }, { "epoch": 1.33, "eval_accuracy": 0.8466807165437302, "eval_loss": 0.45392054319381714, "eval_runtime": 41.6585, "eval_samples_per_second": 91.122, "eval_steps_per_second": 11.402, "step": 2520 }, { "epoch": 1.33, "learning_rate": 6.67017913593256e-05, "loss": 0.3361, "step": 2530 }, { "epoch": 1.34, "learning_rate": 6.617492096944152e-05, "loss": 0.354, "step": 2540 }, { "epoch": 1.34, "learning_rate": 6.564805057955743e-05, "loss": 0.3558, "step": 2550 }, { "epoch": 1.35, "learning_rate": 6.512118018967335e-05, "loss": 0.452, "step": 2560 }, { "epoch": 1.35, "eval_accuracy": 0.8482613277133825, "eval_loss": 0.4561702609062195, "eval_runtime": 41.5457, "eval_samples_per_second": 91.369, "eval_steps_per_second": 11.433, "step": 2560 }, { "epoch": 1.35, "learning_rate": 6.459430979978925e-05, "loss": 0.3694, "step": 2570 }, { "epoch": 1.36, "learning_rate": 6.406743940990516e-05, "loss": 0.3642, "step": 2580 }, { "epoch": 1.36, "learning_rate": 6.354056902002108e-05, "loss": 0.3627, "step": 2590 }, { "epoch": 1.37, "learning_rate": 6.301369863013699e-05, "loss": 0.3121, "step": 2600 }, { "epoch": 1.37, "eval_accuracy": 0.8548472075869337, "eval_loss": 0.4360343813896179, "eval_runtime": 41.6262, "eval_samples_per_second": 91.193, "eval_steps_per_second": 11.411, "step": 2600 }, { "epoch": 1.38, "learning_rate": 6.248682824025291e-05, "loss": 0.3276, "step": 2610 }, { "epoch": 1.38, "learning_rate": 6.195995785036881e-05, "loss": 0.2538, "step": 2620 }, { "epoch": 1.39, "learning_rate": 6.143308746048473e-05, "loss": 0.2862, "step": 2630 }, { "epoch": 1.39, "learning_rate": 6.090621707060063e-05, "loss": 0.3032, "step": 2640 }, { "epoch": 1.39, "eval_accuracy": 0.8535300316122234, "eval_loss": 0.4355548620223999, "eval_runtime": 41.6726, "eval_samples_per_second": 91.091, "eval_steps_per_second": 11.398, "step": 2640 }, { "epoch": 1.4, "learning_rate": 6.037934668071654e-05, "loss": 0.3316, "step": 2650 }, { "epoch": 1.4, "learning_rate": 5.985247629083246e-05, "loss": 0.3128, "step": 2660 }, { "epoch": 1.41, "learning_rate": 5.932560590094837e-05, "loss": 0.438, "step": 2670 }, { "epoch": 1.41, "learning_rate": 5.879873551106429e-05, "loss": 0.361, "step": 2680 }, { "epoch": 1.41, "eval_accuracy": 0.8530031612223393, "eval_loss": 0.4372209906578064, "eval_runtime": 41.6096, "eval_samples_per_second": 91.229, "eval_steps_per_second": 11.416, "step": 2680 }, { "epoch": 1.42, "learning_rate": 5.827186512118019e-05, "loss": 0.4102, "step": 2690 }, { "epoch": 1.42, "learning_rate": 5.774499473129611e-05, "loss": 0.3263, "step": 2700 }, { "epoch": 1.43, "learning_rate": 5.721812434141202e-05, "loss": 0.2175, "step": 2710 }, { "epoch": 1.43, "learning_rate": 5.669125395152792e-05, "loss": 0.5349, "step": 2720 }, { "epoch": 1.43, "eval_accuracy": 0.8532665964172813, "eval_loss": 0.4406832754611969, "eval_runtime": 41.5463, "eval_samples_per_second": 91.368, "eval_steps_per_second": 11.433, "step": 2720 }, { "epoch": 1.44, "learning_rate": 5.616438356164384e-05, "loss": 0.3344, "step": 2730 }, { "epoch": 1.44, "learning_rate": 5.563751317175975e-05, "loss": 0.4089, "step": 2740 }, { "epoch": 1.45, "learning_rate": 5.5110642781875665e-05, "loss": 0.4002, "step": 2750 }, { "epoch": 1.45, "learning_rate": 5.4583772391991575e-05, "loss": 0.2898, "step": 2760 }, { "epoch": 1.45, "eval_accuracy": 0.8606427818756586, "eval_loss": 0.41055828332901, "eval_runtime": 41.7124, "eval_samples_per_second": 91.004, "eval_steps_per_second": 11.388, "step": 2760 }, { "epoch": 1.46, "learning_rate": 5.405690200210748e-05, "loss": 0.4475, "step": 2770 }, { "epoch": 1.46, "learning_rate": 5.3530031612223395e-05, "loss": 0.3652, "step": 2780 }, { "epoch": 1.47, "learning_rate": 5.3003161222339306e-05, "loss": 0.3678, "step": 2790 }, { "epoch": 1.48, "learning_rate": 5.247629083245522e-05, "loss": 0.2751, "step": 2800 }, { "epoch": 1.48, "eval_accuracy": 0.8648577449947313, "eval_loss": 0.4102562963962555, "eval_runtime": 41.6322, "eval_samples_per_second": 91.179, "eval_steps_per_second": 11.409, "step": 2800 }, { "epoch": 1.48, "learning_rate": 5.1949420442571126e-05, "loss": 0.4149, "step": 2810 }, { "epoch": 1.49, "learning_rate": 5.142255005268704e-05, "loss": 0.2141, "step": 2820 }, { "epoch": 1.49, "learning_rate": 5.089567966280295e-05, "loss": 0.331, "step": 2830 }, { "epoch": 1.5, "learning_rate": 5.036880927291886e-05, "loss": 0.2966, "step": 2840 }, { "epoch": 1.5, "eval_accuracy": 0.8635405690200211, "eval_loss": 0.4106617271900177, "eval_runtime": 41.6805, "eval_samples_per_second": 91.074, "eval_steps_per_second": 11.396, "step": 2840 }, { "epoch": 1.5, "learning_rate": 4.984193888303478e-05, "loss": 0.3219, "step": 2850 }, { "epoch": 1.51, "learning_rate": 4.9315068493150684e-05, "loss": 0.3621, "step": 2860 }, { "epoch": 1.51, "learning_rate": 4.8788198103266594e-05, "loss": 0.3827, "step": 2870 }, { "epoch": 1.52, "learning_rate": 4.826132771338251e-05, "loss": 0.2146, "step": 2880 }, { "epoch": 1.52, "eval_accuracy": 0.8619599578503688, "eval_loss": 0.42587053775787354, "eval_runtime": 41.6661, "eval_samples_per_second": 91.105, "eval_steps_per_second": 11.4, "step": 2880 }, { "epoch": 1.52, "learning_rate": 4.773445732349842e-05, "loss": 0.2426, "step": 2890 }, { "epoch": 1.53, "learning_rate": 4.720758693361433e-05, "loss": 0.2925, "step": 2900 }, { "epoch": 1.53, "learning_rate": 4.668071654373025e-05, "loss": 0.308, "step": 2910 }, { "epoch": 1.54, "learning_rate": 4.615384615384616e-05, "loss": 0.4042, "step": 2920 }, { "epoch": 1.54, "eval_accuracy": 0.8714436248682824, "eval_loss": 0.39998331665992737, "eval_runtime": 41.7183, "eval_samples_per_second": 90.991, "eval_steps_per_second": 11.386, "step": 2920 }, { "epoch": 1.54, "learning_rate": 4.562697576396207e-05, "loss": 0.4615, "step": 2930 }, { "epoch": 1.55, "learning_rate": 4.510010537407798e-05, "loss": 0.3321, "step": 2940 }, { "epoch": 1.55, "learning_rate": 4.457323498419389e-05, "loss": 0.2821, "step": 2950 }, { "epoch": 1.56, "learning_rate": 4.4046364594309806e-05, "loss": 0.2715, "step": 2960 }, { "epoch": 1.56, "eval_accuracy": 0.8695995785036881, "eval_loss": 0.40408244729042053, "eval_runtime": 41.2952, "eval_samples_per_second": 91.923, "eval_steps_per_second": 11.503, "step": 2960 }, { "epoch": 1.56, "learning_rate": 4.3519494204425716e-05, "loss": 0.3723, "step": 2970 }, { "epoch": 1.57, "learning_rate": 4.2992623814541626e-05, "loss": 0.2885, "step": 2980 }, { "epoch": 1.58, "learning_rate": 4.2465753424657536e-05, "loss": 0.253, "step": 2990 }, { "epoch": 1.58, "learning_rate": 4.1938883034773446e-05, "loss": 0.2795, "step": 3000 }, { "epoch": 1.58, "eval_accuracy": 0.869072708113804, "eval_loss": 0.4010108709335327, "eval_runtime": 41.3963, "eval_samples_per_second": 91.699, "eval_steps_per_second": 11.474, "step": 3000 }, { "epoch": 1.59, "learning_rate": 4.1412012644889356e-05, "loss": 0.2582, "step": 3010 }, { "epoch": 1.59, "learning_rate": 4.088514225500527e-05, "loss": 0.3652, "step": 3020 }, { "epoch": 1.6, "learning_rate": 4.0358271865121184e-05, "loss": 0.404, "step": 3030 }, { "epoch": 1.6, "learning_rate": 3.9831401475237094e-05, "loss": 0.2104, "step": 3040 }, { "epoch": 1.6, "eval_accuracy": 0.8756585879873551, "eval_loss": 0.38390201330184937, "eval_runtime": 41.4547, "eval_samples_per_second": 91.57, "eval_steps_per_second": 11.458, "step": 3040 }, { "epoch": 1.61, "learning_rate": 3.9304531085353004e-05, "loss": 0.2561, "step": 3050 }, { "epoch": 1.61, "learning_rate": 3.8777660695468914e-05, "loss": 0.328, "step": 3060 }, { "epoch": 1.62, "learning_rate": 3.8250790305584824e-05, "loss": 0.2671, "step": 3070 }, { "epoch": 1.62, "learning_rate": 3.772391991570074e-05, "loss": 0.2486, "step": 3080 }, { "epoch": 1.62, "eval_accuracy": 0.8759220231822972, "eval_loss": 0.38206353783607483, "eval_runtime": 41.5068, "eval_samples_per_second": 91.455, "eval_steps_per_second": 11.444, "step": 3080 }, { "epoch": 1.63, "learning_rate": 3.719704952581665e-05, "loss": 0.3262, "step": 3090 }, { "epoch": 1.63, "learning_rate": 3.667017913593256e-05, "loss": 0.3114, "step": 3100 }, { "epoch": 1.64, "learning_rate": 3.614330874604848e-05, "loss": 0.3031, "step": 3110 }, { "epoch": 1.64, "learning_rate": 3.561643835616438e-05, "loss": 0.3005, "step": 3120 }, { "epoch": 1.64, "eval_accuracy": 0.8695995785036881, "eval_loss": 0.3960082530975342, "eval_runtime": 41.7318, "eval_samples_per_second": 90.962, "eval_steps_per_second": 11.382, "step": 3120 }, { "epoch": 1.65, "learning_rate": 3.50895679662803e-05, "loss": 0.3876, "step": 3130 }, { "epoch": 1.65, "learning_rate": 3.456269757639621e-05, "loss": 0.3107, "step": 3140 }, { "epoch": 1.66, "learning_rate": 3.403582718651212e-05, "loss": 0.2807, "step": 3150 }, { "epoch": 1.66, "learning_rate": 3.350895679662803e-05, "loss": 0.2839, "step": 3160 }, { "epoch": 1.66, "eval_accuracy": 0.8746048472075869, "eval_loss": 0.38452914357185364, "eval_runtime": 41.6884, "eval_samples_per_second": 91.056, "eval_steps_per_second": 11.394, "step": 3160 }, { "epoch": 1.67, "learning_rate": 3.2982086406743946e-05, "loss": 0.3845, "step": 3170 }, { "epoch": 1.68, "learning_rate": 3.245521601685985e-05, "loss": 0.2933, "step": 3180 }, { "epoch": 1.68, "learning_rate": 3.1928345626975767e-05, "loss": 0.3032, "step": 3190 }, { "epoch": 1.69, "learning_rate": 3.140147523709168e-05, "loss": 0.3101, "step": 3200 }, { "epoch": 1.69, "eval_accuracy": 0.8777660695468915, "eval_loss": 0.3834909200668335, "eval_runtime": 41.5606, "eval_samples_per_second": 91.337, "eval_steps_per_second": 11.429, "step": 3200 }, { "epoch": 1.69, "learning_rate": 3.087460484720759e-05, "loss": 0.3474, "step": 3210 }, { "epoch": 1.7, "learning_rate": 3.03477344573235e-05, "loss": 0.3775, "step": 3220 }, { "epoch": 1.7, "learning_rate": 2.9820864067439414e-05, "loss": 0.3458, "step": 3230 }, { "epoch": 1.71, "learning_rate": 2.929399367755532e-05, "loss": 0.1596, "step": 3240 }, { "epoch": 1.71, "eval_accuracy": 0.8822444678609063, "eval_loss": 0.3663616478443146, "eval_runtime": 41.6501, "eval_samples_per_second": 91.14, "eval_steps_per_second": 11.405, "step": 3240 }, { "epoch": 1.71, "learning_rate": 2.8767123287671234e-05, "loss": 0.2665, "step": 3250 }, { "epoch": 1.72, "learning_rate": 2.8240252897787145e-05, "loss": 0.3029, "step": 3260 }, { "epoch": 1.72, "learning_rate": 2.7713382507903058e-05, "loss": 0.3974, "step": 3270 }, { "epoch": 1.73, "learning_rate": 2.7186512118018968e-05, "loss": 0.3662, "step": 3280 }, { "epoch": 1.73, "eval_accuracy": 0.8819810326659642, "eval_loss": 0.3698595464229584, "eval_runtime": 41.5938, "eval_samples_per_second": 91.264, "eval_steps_per_second": 11.42, "step": 3280 }, { "epoch": 1.73, "learning_rate": 2.6659641728134882e-05, "loss": 0.304, "step": 3290 }, { "epoch": 1.74, "learning_rate": 2.613277133825079e-05, "loss": 0.2496, "step": 3300 }, { "epoch": 1.74, "learning_rate": 2.5605900948366702e-05, "loss": 0.3154, "step": 3310 }, { "epoch": 1.75, "learning_rate": 2.5079030558482612e-05, "loss": 0.406, "step": 3320 }, { "epoch": 1.75, "eval_accuracy": 0.8788198103266597, "eval_loss": 0.3647627532482147, "eval_runtime": 41.6136, "eval_samples_per_second": 91.22, "eval_steps_per_second": 11.415, "step": 3320 }, { "epoch": 1.75, "learning_rate": 2.4552160168598526e-05, "loss": 0.2383, "step": 3330 }, { "epoch": 1.76, "learning_rate": 2.402528977871444e-05, "loss": 0.2272, "step": 3340 }, { "epoch": 1.77, "learning_rate": 2.3498419388830346e-05, "loss": 0.3772, "step": 3350 }, { "epoch": 1.77, "learning_rate": 2.297154899894626e-05, "loss": 0.3417, "step": 3360 }, { "epoch": 1.77, "eval_accuracy": 0.8806638566912539, "eval_loss": 0.37239569425582886, "eval_runtime": 41.7989, "eval_samples_per_second": 90.816, "eval_steps_per_second": 11.364, "step": 3360 }, { "epoch": 1.78, "learning_rate": 2.2444678609062173e-05, "loss": 0.2708, "step": 3370 }, { "epoch": 1.78, "learning_rate": 2.1917808219178083e-05, "loss": 0.2455, "step": 3380 }, { "epoch": 1.79, "learning_rate": 2.1390937829293994e-05, "loss": 0.3778, "step": 3390 }, { "epoch": 1.79, "learning_rate": 2.0864067439409907e-05, "loss": 0.2489, "step": 3400 }, { "epoch": 1.79, "eval_accuracy": 0.8825079030558483, "eval_loss": 0.361176073551178, "eval_runtime": 41.7201, "eval_samples_per_second": 90.987, "eval_steps_per_second": 11.385, "step": 3400 }, { "epoch": 1.8, "learning_rate": 2.0337197049525817e-05, "loss": 0.3192, "step": 3410 }, { "epoch": 1.8, "learning_rate": 1.9810326659641728e-05, "loss": 0.2914, "step": 3420 }, { "epoch": 1.81, "learning_rate": 1.928345626975764e-05, "loss": 0.2141, "step": 3430 }, { "epoch": 1.81, "learning_rate": 1.8756585879873555e-05, "loss": 0.2464, "step": 3440 }, { "epoch": 1.81, "eval_accuracy": 0.8825079030558483, "eval_loss": 0.36530667543411255, "eval_runtime": 41.819, "eval_samples_per_second": 90.772, "eval_steps_per_second": 11.358, "step": 3440 }, { "epoch": 1.82, "learning_rate": 1.822971548998946e-05, "loss": 0.1882, "step": 3450 }, { "epoch": 1.82, "learning_rate": 1.7702845100105375e-05, "loss": 0.252, "step": 3460 }, { "epoch": 1.83, "learning_rate": 1.717597471022129e-05, "loss": 0.3904, "step": 3470 }, { "epoch": 1.83, "learning_rate": 1.66491043203372e-05, "loss": 0.2391, "step": 3480 }, { "epoch": 1.83, "eval_accuracy": 0.881190727081138, "eval_loss": 0.3640024662017822, "eval_runtime": 41.67, "eval_samples_per_second": 91.097, "eval_steps_per_second": 11.399, "step": 3480 }, { "epoch": 1.84, "learning_rate": 1.612223393045311e-05, "loss": 0.3707, "step": 3490 }, { "epoch": 1.84, "learning_rate": 1.5595363540569022e-05, "loss": 0.3237, "step": 3500 }, { "epoch": 1.85, "learning_rate": 1.5068493150684931e-05, "loss": 0.2514, "step": 3510 }, { "epoch": 1.85, "learning_rate": 1.4541622760800844e-05, "loss": 0.4291, "step": 3520 }, { "epoch": 1.85, "eval_accuracy": 0.8861959957850368, "eval_loss": 0.3543952703475952, "eval_runtime": 41.457, "eval_samples_per_second": 91.565, "eval_steps_per_second": 11.458, "step": 3520 }, { "epoch": 1.86, "learning_rate": 1.4014752370916756e-05, "loss": 0.2714, "step": 3530 }, { "epoch": 1.87, "learning_rate": 1.3487881981032666e-05, "loss": 0.4164, "step": 3540 }, { "epoch": 1.87, "learning_rate": 1.2961011591148578e-05, "loss": 0.2214, "step": 3550 }, { "epoch": 1.88, "learning_rate": 1.2434141201264489e-05, "loss": 0.259, "step": 3560 }, { "epoch": 1.88, "eval_accuracy": 0.8896206533192834, "eval_loss": 0.3500049412250519, "eval_runtime": 41.6053, "eval_samples_per_second": 91.238, "eval_steps_per_second": 11.417, "step": 3560 }, { "epoch": 1.88, "learning_rate": 1.1907270811380402e-05, "loss": 0.252, "step": 3570 }, { "epoch": 1.89, "learning_rate": 1.1380400421496312e-05, "loss": 0.2847, "step": 3580 }, { "epoch": 1.89, "learning_rate": 1.0853530031612224e-05, "loss": 0.2444, "step": 3590 }, { "epoch": 1.9, "learning_rate": 1.0326659641728136e-05, "loss": 0.1871, "step": 3600 }, { "epoch": 1.9, "eval_accuracy": 0.8864594309799789, "eval_loss": 0.35397401452064514, "eval_runtime": 41.3976, "eval_samples_per_second": 91.696, "eval_steps_per_second": 11.474, "step": 3600 }, { "epoch": 1.9, "learning_rate": 9.799789251844046e-06, "loss": 0.1985, "step": 3610 }, { "epoch": 1.91, "learning_rate": 9.272918861959958e-06, "loss": 0.2478, "step": 3620 }, { "epoch": 1.91, "learning_rate": 8.74604847207587e-06, "loss": 0.2614, "step": 3630 }, { "epoch": 1.92, "learning_rate": 8.21917808219178e-06, "loss": 0.2337, "step": 3640 }, { "epoch": 1.92, "eval_accuracy": 0.8872497365648051, "eval_loss": 0.3516286015510559, "eval_runtime": 41.3744, "eval_samples_per_second": 91.748, "eval_steps_per_second": 11.481, "step": 3640 }, { "epoch": 1.92, "learning_rate": 7.692307692307694e-06, "loss": 0.2885, "step": 3650 }, { "epoch": 1.93, "learning_rate": 7.165437302423604e-06, "loss": 0.2682, "step": 3660 }, { "epoch": 1.93, "learning_rate": 6.638566912539515e-06, "loss": 0.1558, "step": 3670 }, { "epoch": 1.94, "learning_rate": 6.1116965226554275e-06, "loss": 0.2336, "step": 3680 }, { "epoch": 1.94, "eval_accuracy": 0.8890937829293993, "eval_loss": 0.3470406234264374, "eval_runtime": 41.6183, "eval_samples_per_second": 91.21, "eval_steps_per_second": 11.413, "step": 3680 }, { "epoch": 1.94, "learning_rate": 5.5848261327713385e-06, "loss": 0.2797, "step": 3690 }, { "epoch": 1.95, "learning_rate": 5.05795574288725e-06, "loss": 0.2202, "step": 3700 }, { "epoch": 1.95, "learning_rate": 4.531085353003161e-06, "loss": 0.2526, "step": 3710 }, { "epoch": 1.96, "learning_rate": 4.004214963119072e-06, "loss": 0.2401, "step": 3720 }, { "epoch": 1.96, "eval_accuracy": 0.8901475237091675, "eval_loss": 0.3446514308452606, "eval_runtime": 41.3853, "eval_samples_per_second": 91.723, "eval_steps_per_second": 11.477, "step": 3720 }, { "epoch": 1.97, "learning_rate": 3.4773445732349843e-06, "loss": 0.1877, "step": 3730 }, { "epoch": 1.97, "learning_rate": 2.9504741833508957e-06, "loss": 0.163, "step": 3740 }, { "epoch": 1.98, "learning_rate": 2.423603793466807e-06, "loss": 0.2202, "step": 3750 }, { "epoch": 1.98, "learning_rate": 1.8967334035827188e-06, "loss": 0.2327, "step": 3760 }, { "epoch": 1.98, "eval_accuracy": 0.8904109589041096, "eval_loss": 0.3434308171272278, "eval_runtime": 41.3779, "eval_samples_per_second": 91.74, "eval_steps_per_second": 11.48, "step": 3760 } ], "max_steps": 3796, "num_train_epochs": 2, "total_flos": 4.662174864046436e+18, "trial_name": null, "trial_params": null }