{ "best_metric": 0.8912871287128713, "best_model_checkpoint": "food101_outputs/checkpoint-2960", "epoch": 5.0, "global_step": 2960, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.00019864864864864865, "loss": 4.4083, "step": 20 }, { "epoch": 0.07, "learning_rate": 0.0001972972972972973, "loss": 3.884, "step": 40 }, { "epoch": 0.1, "learning_rate": 0.00019594594594594594, "loss": 3.4068, "step": 60 }, { "epoch": 0.14, "learning_rate": 0.00019459459459459462, "loss": 2.9784, "step": 80 }, { "epoch": 0.17, "learning_rate": 0.00019324324324324326, "loss": 2.6549, "step": 100 }, { "epoch": 0.2, "learning_rate": 0.0001918918918918919, "loss": 2.3437, "step": 120 }, { "epoch": 0.24, "learning_rate": 0.00019054054054054055, "loss": 2.1107, "step": 140 }, { "epoch": 0.27, "learning_rate": 0.0001891891891891892, "loss": 1.8803, "step": 160 }, { "epoch": 0.3, "learning_rate": 0.00018783783783783784, "loss": 1.7025, "step": 180 }, { "epoch": 0.34, "learning_rate": 0.0001864864864864865, "loss": 1.5403, "step": 200 }, { "epoch": 0.37, "learning_rate": 0.00018513513513513513, "loss": 1.48, "step": 220 }, { "epoch": 0.41, "learning_rate": 0.0001837837837837838, "loss": 1.358, "step": 240 }, { "epoch": 0.44, "learning_rate": 0.00018243243243243245, "loss": 1.2943, "step": 260 }, { "epoch": 0.47, "learning_rate": 0.0001810810810810811, "loss": 1.2301, "step": 280 }, { "epoch": 0.51, "learning_rate": 0.00017972972972972974, "loss": 1.1578, "step": 300 }, { "epoch": 0.54, "learning_rate": 0.00017837837837837839, "loss": 1.0811, "step": 320 }, { "epoch": 0.57, "learning_rate": 0.00017702702702702703, "loss": 1.0662, "step": 340 }, { "epoch": 0.61, "learning_rate": 0.00017567567567567568, "loss": 1.0146, "step": 360 }, { "epoch": 0.64, "learning_rate": 0.00017432432432432432, "loss": 0.9584, "step": 380 }, { "epoch": 0.68, "learning_rate": 0.000172972972972973, "loss": 0.973, "step": 400 }, { "epoch": 0.71, "learning_rate": 0.00017162162162162164, "loss": 0.9817, "step": 420 }, { "epoch": 0.74, "learning_rate": 0.00017027027027027028, "loss": 0.9552, "step": 440 }, { "epoch": 0.78, "learning_rate": 0.00016891891891891893, "loss": 0.916, "step": 460 }, { "epoch": 0.81, "learning_rate": 0.00016756756756756757, "loss": 0.8896, "step": 480 }, { "epoch": 0.84, "learning_rate": 0.00016621621621621622, "loss": 0.8855, "step": 500 }, { "epoch": 0.88, "learning_rate": 0.00016486486486486486, "loss": 0.8823, "step": 520 }, { "epoch": 0.91, "learning_rate": 0.0001635135135135135, "loss": 0.8059, "step": 540 }, { "epoch": 0.95, "learning_rate": 0.00016216216216216218, "loss": 0.8323, "step": 560 }, { "epoch": 0.98, "learning_rate": 0.00016081081081081083, "loss": 0.8271, "step": 580 }, { "epoch": 1.0, "eval_accuracy": 0.8561584158415841, "eval_loss": 0.6070069074630737, "eval_runtime": 142.5311, "eval_samples_per_second": 177.154, "eval_steps_per_second": 1.389, "step": 592 }, { "epoch": 1.01, "learning_rate": 0.00015945945945945947, "loss": 0.6876, "step": 600 }, { "epoch": 1.05, "learning_rate": 0.00015810810810810812, "loss": 0.4771, "step": 620 }, { "epoch": 1.08, "learning_rate": 0.00015675675675675676, "loss": 0.4998, "step": 640 }, { "epoch": 1.11, "learning_rate": 0.0001554054054054054, "loss": 0.4753, "step": 660 }, { "epoch": 1.15, "learning_rate": 0.00015405405405405405, "loss": 0.5197, "step": 680 }, { "epoch": 1.18, "learning_rate": 0.0001527027027027027, "loss": 0.527, "step": 700 }, { "epoch": 1.22, "learning_rate": 0.00015135135135135137, "loss": 0.5371, "step": 720 }, { "epoch": 1.25, "learning_rate": 0.00015000000000000001, "loss": 0.4992, "step": 740 }, { "epoch": 1.28, "learning_rate": 0.00014864864864864866, "loss": 0.4728, "step": 760 }, { "epoch": 1.32, "learning_rate": 0.0001472972972972973, "loss": 0.5185, "step": 780 }, { "epoch": 1.35, "learning_rate": 0.00014594594594594595, "loss": 0.5071, "step": 800 }, { "epoch": 1.39, "learning_rate": 0.00014459459459459462, "loss": 0.4728, "step": 820 }, { "epoch": 1.42, "learning_rate": 0.00014324324324324324, "loss": 0.4731, "step": 840 }, { "epoch": 1.45, "learning_rate": 0.00014189189189189188, "loss": 0.5211, "step": 860 }, { "epoch": 1.49, "learning_rate": 0.00014054054054054056, "loss": 0.4949, "step": 880 }, { "epoch": 1.52, "learning_rate": 0.0001391891891891892, "loss": 0.4847, "step": 900 }, { "epoch": 1.55, "learning_rate": 0.00013783783783783785, "loss": 0.4626, "step": 920 }, { "epoch": 1.59, "learning_rate": 0.0001364864864864865, "loss": 0.456, "step": 940 }, { "epoch": 1.62, "learning_rate": 0.00013513513513513514, "loss": 0.4938, "step": 960 }, { "epoch": 1.66, "learning_rate": 0.0001337837837837838, "loss": 0.4846, "step": 980 }, { "epoch": 1.69, "learning_rate": 0.00013243243243243243, "loss": 0.4576, "step": 1000 }, { "epoch": 1.72, "learning_rate": 0.00013108108108108107, "loss": 0.4766, "step": 1020 }, { "epoch": 1.76, "learning_rate": 0.00012972972972972974, "loss": 0.4741, "step": 1040 }, { "epoch": 1.79, "learning_rate": 0.0001283783783783784, "loss": 0.4225, "step": 1060 }, { "epoch": 1.82, "learning_rate": 0.00012702702702702703, "loss": 0.4201, "step": 1080 }, { "epoch": 1.86, "learning_rate": 0.00012567567567567568, "loss": 0.4327, "step": 1100 }, { "epoch": 1.89, "learning_rate": 0.00012432432432432433, "loss": 0.4771, "step": 1120 }, { "epoch": 1.93, "learning_rate": 0.000122972972972973, "loss": 0.4473, "step": 1140 }, { "epoch": 1.96, "learning_rate": 0.00012162162162162163, "loss": 0.4569, "step": 1160 }, { "epoch": 1.99, "learning_rate": 0.00012027027027027027, "loss": 0.4376, "step": 1180 }, { "epoch": 2.0, "eval_accuracy": 0.8691485148514851, "eval_loss": 0.4947212338447571, "eval_runtime": 107.3521, "eval_samples_per_second": 235.207, "eval_steps_per_second": 1.844, "step": 1184 }, { "epoch": 2.03, "learning_rate": 0.00011891891891891893, "loss": 0.2526, "step": 1200 }, { "epoch": 2.06, "learning_rate": 0.00011756756756756758, "loss": 0.2468, "step": 1220 }, { "epoch": 2.09, "learning_rate": 0.00011621621621621621, "loss": 0.2298, "step": 1240 }, { "epoch": 2.13, "learning_rate": 0.00011486486486486487, "loss": 0.2294, "step": 1260 }, { "epoch": 2.16, "learning_rate": 0.00011351351351351351, "loss": 0.2408, "step": 1280 }, { "epoch": 2.2, "learning_rate": 0.00011216216216216217, "loss": 0.2245, "step": 1300 }, { "epoch": 2.23, "learning_rate": 0.00011081081081081082, "loss": 0.2391, "step": 1320 }, { "epoch": 2.26, "learning_rate": 0.00010945945945945946, "loss": 0.241, "step": 1340 }, { "epoch": 2.3, "learning_rate": 0.00010810810810810812, "loss": 0.2197, "step": 1360 }, { "epoch": 2.33, "learning_rate": 0.00010675675675675677, "loss": 0.2467, "step": 1380 }, { "epoch": 2.36, "learning_rate": 0.0001054054054054054, "loss": 0.2397, "step": 1400 }, { "epoch": 2.4, "learning_rate": 0.00010405405405405406, "loss": 0.2308, "step": 1420 }, { "epoch": 2.43, "learning_rate": 0.0001027027027027027, "loss": 0.25, "step": 1440 }, { "epoch": 2.47, "learning_rate": 0.00010135135135135136, "loss": 0.2291, "step": 1460 }, { "epoch": 2.5, "learning_rate": 0.0001, "loss": 0.2496, "step": 1480 }, { "epoch": 2.53, "learning_rate": 9.864864864864865e-05, "loss": 0.2322, "step": 1500 }, { "epoch": 2.57, "learning_rate": 9.729729729729731e-05, "loss": 0.2266, "step": 1520 }, { "epoch": 2.6, "learning_rate": 9.594594594594595e-05, "loss": 0.2201, "step": 1540 }, { "epoch": 2.64, "learning_rate": 9.45945945945946e-05, "loss": 0.2497, "step": 1560 }, { "epoch": 2.67, "learning_rate": 9.324324324324324e-05, "loss": 0.2276, "step": 1580 }, { "epoch": 2.7, "learning_rate": 9.18918918918919e-05, "loss": 0.1945, "step": 1600 }, { "epoch": 2.74, "learning_rate": 9.054054054054055e-05, "loss": 0.2174, "step": 1620 }, { "epoch": 2.77, "learning_rate": 8.918918918918919e-05, "loss": 0.2423, "step": 1640 }, { "epoch": 2.8, "learning_rate": 8.783783783783784e-05, "loss": 0.2242, "step": 1660 }, { "epoch": 2.84, "learning_rate": 8.64864864864865e-05, "loss": 0.2383, "step": 1680 }, { "epoch": 2.87, "learning_rate": 8.513513513513514e-05, "loss": 0.2582, "step": 1700 }, { "epoch": 2.91, "learning_rate": 8.378378378378379e-05, "loss": 0.2125, "step": 1720 }, { "epoch": 2.94, "learning_rate": 8.243243243243243e-05, "loss": 0.2307, "step": 1740 }, { "epoch": 2.97, "learning_rate": 8.108108108108109e-05, "loss": 0.2089, "step": 1760 }, { "epoch": 3.0, "eval_accuracy": 0.8746930693069307, "eval_loss": 0.48760801553726196, "eval_runtime": 106.972, "eval_samples_per_second": 236.043, "eval_steps_per_second": 1.851, "step": 1776 }, { "epoch": 3.01, "learning_rate": 7.972972972972974e-05, "loss": 0.1821, "step": 1780 }, { "epoch": 3.04, "learning_rate": 7.837837837837838e-05, "loss": 0.1332, "step": 1800 }, { "epoch": 3.07, "learning_rate": 7.702702702702703e-05, "loss": 0.116, "step": 1820 }, { "epoch": 3.11, "learning_rate": 7.567567567567568e-05, "loss": 0.119, "step": 1840 }, { "epoch": 3.14, "learning_rate": 7.432432432432433e-05, "loss": 0.1222, "step": 1860 }, { "epoch": 3.18, "learning_rate": 7.297297297297297e-05, "loss": 0.118, "step": 1880 }, { "epoch": 3.21, "learning_rate": 7.162162162162162e-05, "loss": 0.1078, "step": 1900 }, { "epoch": 3.24, "learning_rate": 7.027027027027028e-05, "loss": 0.0982, "step": 1920 }, { "epoch": 3.28, "learning_rate": 6.891891891891892e-05, "loss": 0.1056, "step": 1940 }, { "epoch": 3.31, "learning_rate": 6.756756756756757e-05, "loss": 0.099, "step": 1960 }, { "epoch": 3.34, "learning_rate": 6.621621621621621e-05, "loss": 0.0961, "step": 1980 }, { "epoch": 3.38, "learning_rate": 6.486486486486487e-05, "loss": 0.1051, "step": 2000 }, { "epoch": 3.41, "learning_rate": 6.358108108108109e-05, "loss": 0.1161, "step": 2020 }, { "epoch": 3.45, "learning_rate": 6.222972972972973e-05, "loss": 0.0919, "step": 2040 }, { "epoch": 3.48, "learning_rate": 6.087837837837839e-05, "loss": 0.1181, "step": 2060 }, { "epoch": 3.51, "learning_rate": 5.952702702702703e-05, "loss": 0.1215, "step": 2080 }, { "epoch": 3.55, "learning_rate": 5.817567567567568e-05, "loss": 0.0959, "step": 2100 }, { "epoch": 3.58, "learning_rate": 5.682432432432433e-05, "loss": 0.0866, "step": 2120 }, { "epoch": 3.61, "learning_rate": 5.547297297297298e-05, "loss": 0.117, "step": 2140 }, { "epoch": 3.65, "learning_rate": 5.412162162162162e-05, "loss": 0.1063, "step": 2160 }, { "epoch": 3.68, "learning_rate": 5.277027027027027e-05, "loss": 0.0993, "step": 2180 }, { "epoch": 3.72, "learning_rate": 5.1418918918918925e-05, "loss": 0.1057, "step": 2200 }, { "epoch": 3.75, "learning_rate": 5.006756756756758e-05, "loss": 0.1194, "step": 2220 }, { "epoch": 3.78, "learning_rate": 4.871621621621622e-05, "loss": 0.0929, "step": 2240 }, { "epoch": 3.82, "learning_rate": 4.736486486486487e-05, "loss": 0.08, "step": 2260 }, { "epoch": 3.85, "learning_rate": 4.601351351351352e-05, "loss": 0.1133, "step": 2280 }, { "epoch": 3.89, "learning_rate": 4.4662162162162164e-05, "loss": 0.0996, "step": 2300 }, { "epoch": 3.92, "learning_rate": 4.3310810810810816e-05, "loss": 0.0992, "step": 2320 }, { "epoch": 3.95, "learning_rate": 4.195945945945946e-05, "loss": 0.0941, "step": 2340 }, { "epoch": 3.99, "learning_rate": 4.060810810810811e-05, "loss": 0.0882, "step": 2360 }, { "epoch": 4.0, "eval_accuracy": 0.8856633663366337, "eval_loss": 0.463856965303421, "eval_runtime": 107.2753, "eval_samples_per_second": 235.376, "eval_steps_per_second": 1.846, "step": 2368 }, { "epoch": 4.02, "learning_rate": 3.925675675675676e-05, "loss": 0.0796, "step": 2380 }, { "epoch": 4.05, "learning_rate": 3.790540540540541e-05, "loss": 0.0353, "step": 2400 }, { "epoch": 4.09, "learning_rate": 3.6554054054054055e-05, "loss": 0.0536, "step": 2420 }, { "epoch": 4.12, "learning_rate": 3.520270270270271e-05, "loss": 0.0564, "step": 2440 }, { "epoch": 4.16, "learning_rate": 3.385135135135135e-05, "loss": 0.0506, "step": 2460 }, { "epoch": 4.19, "learning_rate": 3.2500000000000004e-05, "loss": 0.0547, "step": 2480 }, { "epoch": 4.22, "learning_rate": 3.114864864864865e-05, "loss": 0.0462, "step": 2500 }, { "epoch": 4.26, "learning_rate": 2.97972972972973e-05, "loss": 0.0501, "step": 2520 }, { "epoch": 4.29, "learning_rate": 2.8445945945945946e-05, "loss": 0.0588, "step": 2540 }, { "epoch": 4.32, "learning_rate": 2.7094594594594598e-05, "loss": 0.0303, "step": 2560 }, { "epoch": 4.36, "learning_rate": 2.5743243243243243e-05, "loss": 0.0411, "step": 2580 }, { "epoch": 4.39, "learning_rate": 2.4391891891891895e-05, "loss": 0.0406, "step": 2600 }, { "epoch": 4.43, "learning_rate": 2.3040540540540543e-05, "loss": 0.0378, "step": 2620 }, { "epoch": 4.46, "learning_rate": 2.1689189189189192e-05, "loss": 0.0391, "step": 2640 }, { "epoch": 4.49, "learning_rate": 2.033783783783784e-05, "loss": 0.038, "step": 2660 }, { "epoch": 4.53, "learning_rate": 1.898648648648649e-05, "loss": 0.0443, "step": 2680 }, { "epoch": 4.56, "learning_rate": 1.7635135135135137e-05, "loss": 0.0391, "step": 2700 }, { "epoch": 4.59, "learning_rate": 1.6283783783783786e-05, "loss": 0.0439, "step": 2720 }, { "epoch": 4.63, "learning_rate": 1.4932432432432433e-05, "loss": 0.0532, "step": 2740 }, { "epoch": 4.66, "learning_rate": 1.3581081081081081e-05, "loss": 0.0395, "step": 2760 }, { "epoch": 4.7, "learning_rate": 1.222972972972973e-05, "loss": 0.0458, "step": 2780 }, { "epoch": 4.73, "learning_rate": 1.0878378378378378e-05, "loss": 0.0588, "step": 2800 }, { "epoch": 4.76, "learning_rate": 9.527027027027027e-06, "loss": 0.0465, "step": 2820 }, { "epoch": 4.8, "learning_rate": 8.175675675675675e-06, "loss": 0.036, "step": 2840 }, { "epoch": 4.83, "learning_rate": 6.8243243243243244e-06, "loss": 0.0437, "step": 2860 }, { "epoch": 4.86, "learning_rate": 5.472972972972974e-06, "loss": 0.0487, "step": 2880 }, { "epoch": 4.9, "learning_rate": 4.121621621621622e-06, "loss": 0.0273, "step": 2900 }, { "epoch": 4.93, "learning_rate": 2.7702702702702708e-06, "loss": 0.0392, "step": 2920 }, { "epoch": 4.97, "learning_rate": 1.418918918918919e-06, "loss": 0.0458, "step": 2940 }, { "epoch": 5.0, "learning_rate": 6.756756756756757e-08, "loss": 0.0452, "step": 2960 }, { "epoch": 5.0, "eval_accuracy": 0.8912871287128713, "eval_loss": 0.45006364583969116, "eval_runtime": 106.893, "eval_samples_per_second": 236.218, "eval_steps_per_second": 1.852, "step": 2960 }, { "epoch": 5.0, "step": 2960, "total_flos": 0.0, "train_loss": 0.48942885282071863, "train_runtime": 2221.2523, "train_samples_per_second": 170.512, "train_steps_per_second": 1.333 } ], "max_steps": 2960, "num_train_epochs": 5, "total_flos": 0.0, "trial_name": null, "trial_params": null }