{ "best_metric": 0.19176600873470306, "best_model_checkpoint": "finetuned-indian-food/checkpoint-1300", "epoch": 4.0, "global_step": 1332, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.0001984984984984985, "loss": 2.8856, "step": 10 }, { "epoch": 0.06, "learning_rate": 0.00019699699699699701, "loss": 2.5941, "step": 20 }, { "epoch": 0.09, "learning_rate": 0.0001954954954954955, "loss": 2.1576, "step": 30 }, { "epoch": 0.12, "learning_rate": 0.00019399399399399402, "loss": 1.9221, "step": 40 }, { "epoch": 0.15, "learning_rate": 0.0001924924924924925, "loss": 1.7671, "step": 50 }, { "epoch": 0.18, "learning_rate": 0.000190990990990991, "loss": 1.4461, "step": 60 }, { "epoch": 0.21, "learning_rate": 0.0001894894894894895, "loss": 1.3917, "step": 70 }, { "epoch": 0.24, "learning_rate": 0.000187987987987988, "loss": 1.3666, "step": 80 }, { "epoch": 0.27, "learning_rate": 0.0001864864864864865, "loss": 1.1615, "step": 90 }, { "epoch": 0.3, "learning_rate": 0.000184984984984985, "loss": 1.0175, "step": 100 }, { "epoch": 0.3, "eval_accuracy": 0.8629117959617428, "eval_loss": 0.9246562719345093, "eval_runtime": 35.8495, "eval_samples_per_second": 26.249, "eval_steps_per_second": 3.292, "step": 100 }, { "epoch": 0.33, "learning_rate": 0.0001834834834834835, "loss": 1.044, "step": 110 }, { "epoch": 0.36, "learning_rate": 0.000181981981981982, "loss": 0.9848, "step": 120 }, { "epoch": 0.39, "learning_rate": 0.0001804804804804805, "loss": 0.7889, "step": 130 }, { "epoch": 0.42, "learning_rate": 0.00017897897897897898, "loss": 0.921, "step": 140 }, { "epoch": 0.45, "learning_rate": 0.0001774774774774775, "loss": 0.7709, "step": 150 }, { "epoch": 0.48, "learning_rate": 0.000175975975975976, "loss": 0.9096, "step": 160 }, { "epoch": 0.51, "learning_rate": 0.0001744744744744745, "loss": 0.7293, "step": 170 }, { "epoch": 0.54, "learning_rate": 0.000172972972972973, "loss": 0.6419, "step": 180 }, { "epoch": 0.57, "learning_rate": 0.00017147147147147148, "loss": 0.7251, "step": 190 }, { "epoch": 0.6, "learning_rate": 0.00016996996996997, "loss": 0.7418, "step": 200 }, { "epoch": 0.6, "eval_accuracy": 0.8990435706695006, "eval_loss": 0.5535812377929688, "eval_runtime": 33.962, "eval_samples_per_second": 27.707, "eval_steps_per_second": 3.474, "step": 200 }, { "epoch": 0.63, "learning_rate": 0.00016846846846846846, "loss": 0.7307, "step": 210 }, { "epoch": 0.66, "learning_rate": 0.00016696696696696697, "loss": 0.7721, "step": 220 }, { "epoch": 0.69, "learning_rate": 0.00016546546546546546, "loss": 0.6073, "step": 230 }, { "epoch": 0.72, "learning_rate": 0.00016396396396396395, "loss": 0.6946, "step": 240 }, { "epoch": 0.75, "learning_rate": 0.00016246246246246247, "loss": 0.7824, "step": 250 }, { "epoch": 0.78, "learning_rate": 0.00016096096096096096, "loss": 0.5175, "step": 260 }, { "epoch": 0.81, "learning_rate": 0.00015945945945945947, "loss": 0.7959, "step": 270 }, { "epoch": 0.84, "learning_rate": 0.00015795795795795796, "loss": 0.5273, "step": 280 }, { "epoch": 0.87, "learning_rate": 0.00015645645645645645, "loss": 0.6327, "step": 290 }, { "epoch": 0.9, "learning_rate": 0.00015495495495495496, "loss": 0.6652, "step": 300 }, { "epoch": 0.9, "eval_accuracy": 0.9181721572794899, "eval_loss": 0.4035964906215668, "eval_runtime": 33.8609, "eval_samples_per_second": 27.79, "eval_steps_per_second": 3.485, "step": 300 }, { "epoch": 0.93, "learning_rate": 0.00015345345345345345, "loss": 0.6688, "step": 310 }, { "epoch": 0.96, "learning_rate": 0.00015195195195195194, "loss": 0.5384, "step": 320 }, { "epoch": 0.99, "learning_rate": 0.00015045045045045046, "loss": 0.5232, "step": 330 }, { "epoch": 1.02, "learning_rate": 0.00014894894894894895, "loss": 0.5407, "step": 340 }, { "epoch": 1.05, "learning_rate": 0.00014744744744744746, "loss": 0.512, "step": 350 }, { "epoch": 1.08, "learning_rate": 0.00014594594594594595, "loss": 0.4422, "step": 360 }, { "epoch": 1.11, "learning_rate": 0.00014444444444444444, "loss": 0.6211, "step": 370 }, { "epoch": 1.14, "learning_rate": 0.00014294294294294295, "loss": 0.3153, "step": 380 }, { "epoch": 1.17, "learning_rate": 0.00014144144144144144, "loss": 0.4897, "step": 390 }, { "epoch": 1.2, "learning_rate": 0.00013993993993993996, "loss": 0.5959, "step": 400 }, { "epoch": 1.2, "eval_accuracy": 0.89798087141339, "eval_loss": 0.40219178795814514, "eval_runtime": 34.8649, "eval_samples_per_second": 26.99, "eval_steps_per_second": 3.384, "step": 400 }, { "epoch": 1.23, "learning_rate": 0.00013843843843843845, "loss": 0.4482, "step": 410 }, { "epoch": 1.26, "learning_rate": 0.00013693693693693693, "loss": 0.3453, "step": 420 }, { "epoch": 1.29, "learning_rate": 0.00013543543543543545, "loss": 0.4837, "step": 430 }, { "epoch": 1.32, "learning_rate": 0.00013393393393393394, "loss": 0.403, "step": 440 }, { "epoch": 1.35, "learning_rate": 0.00013243243243243243, "loss": 0.5089, "step": 450 }, { "epoch": 1.38, "learning_rate": 0.00013093093093093094, "loss": 0.4843, "step": 460 }, { "epoch": 1.41, "learning_rate": 0.00012942942942942943, "loss": 0.3155, "step": 470 }, { "epoch": 1.44, "learning_rate": 0.00012792792792792795, "loss": 0.4345, "step": 480 }, { "epoch": 1.47, "learning_rate": 0.00012642642642642644, "loss": 0.4005, "step": 490 }, { "epoch": 1.5, "learning_rate": 0.00012492492492492492, "loss": 0.4478, "step": 500 }, { "epoch": 1.5, "eval_accuracy": 0.9287991498405951, "eval_loss": 0.3246866464614868, "eval_runtime": 33.5066, "eval_samples_per_second": 28.084, "eval_steps_per_second": 3.522, "step": 500 }, { "epoch": 1.53, "learning_rate": 0.00012342342342342344, "loss": 0.4507, "step": 510 }, { "epoch": 1.56, "learning_rate": 0.00012192192192192193, "loss": 0.5392, "step": 520 }, { "epoch": 1.59, "learning_rate": 0.00012042042042042043, "loss": 0.2738, "step": 530 }, { "epoch": 1.62, "learning_rate": 0.00011891891891891893, "loss": 0.566, "step": 540 }, { "epoch": 1.65, "learning_rate": 0.00011741741741741743, "loss": 0.5345, "step": 550 }, { "epoch": 1.68, "learning_rate": 0.00011591591591591592, "loss": 0.443, "step": 560 }, { "epoch": 1.71, "learning_rate": 0.00011441441441441443, "loss": 0.346, "step": 570 }, { "epoch": 1.74, "learning_rate": 0.00011291291291291293, "loss": 0.3985, "step": 580 }, { "epoch": 1.77, "learning_rate": 0.00011141141141141143, "loss": 0.3706, "step": 590 }, { "epoch": 1.8, "learning_rate": 0.00011006006006006006, "loss": 0.4717, "step": 600 }, { "epoch": 1.8, "eval_accuracy": 0.926673751328374, "eval_loss": 0.30190205574035645, "eval_runtime": 33.6807, "eval_samples_per_second": 27.939, "eval_steps_per_second": 3.503, "step": 600 }, { "epoch": 1.83, "learning_rate": 0.00010855855855855856, "loss": 0.3582, "step": 610 }, { "epoch": 1.86, "learning_rate": 0.00010705705705705707, "loss": 0.3629, "step": 620 }, { "epoch": 1.89, "learning_rate": 0.00010555555555555557, "loss": 0.3866, "step": 630 }, { "epoch": 1.92, "learning_rate": 0.00010405405405405406, "loss": 0.3155, "step": 640 }, { "epoch": 1.95, "learning_rate": 0.00010255255255255256, "loss": 0.4311, "step": 650 }, { "epoch": 1.98, "learning_rate": 0.00010105105105105106, "loss": 0.2544, "step": 660 }, { "epoch": 2.01, "learning_rate": 9.954954954954956e-05, "loss": 0.3617, "step": 670 }, { "epoch": 2.04, "learning_rate": 9.804804804804806e-05, "loss": 0.3127, "step": 680 }, { "epoch": 2.07, "learning_rate": 9.654654654654654e-05, "loss": 0.2718, "step": 690 }, { "epoch": 2.1, "learning_rate": 9.504504504504504e-05, "loss": 0.34, "step": 700 }, { "epoch": 2.1, "eval_accuracy": 0.9351753453772582, "eval_loss": 0.25940415263175964, "eval_runtime": 35.0686, "eval_samples_per_second": 26.833, "eval_steps_per_second": 3.365, "step": 700 }, { "epoch": 2.13, "learning_rate": 9.354354354354354e-05, "loss": 0.3881, "step": 710 }, { "epoch": 2.16, "learning_rate": 9.204204204204205e-05, "loss": 0.3528, "step": 720 }, { "epoch": 2.19, "learning_rate": 9.054054054054055e-05, "loss": 0.3053, "step": 730 }, { "epoch": 2.22, "learning_rate": 8.903903903903904e-05, "loss": 0.223, "step": 740 }, { "epoch": 2.25, "learning_rate": 8.753753753753754e-05, "loss": 0.2268, "step": 750 }, { "epoch": 2.28, "learning_rate": 8.603603603603604e-05, "loss": 0.4058, "step": 760 }, { "epoch": 2.31, "learning_rate": 8.453453453453454e-05, "loss": 0.3018, "step": 770 }, { "epoch": 2.34, "learning_rate": 8.303303303303303e-05, "loss": 0.2973, "step": 780 }, { "epoch": 2.37, "learning_rate": 8.153153153153153e-05, "loss": 0.2607, "step": 790 }, { "epoch": 2.4, "learning_rate": 8.003003003003004e-05, "loss": 0.3518, "step": 800 }, { "epoch": 2.4, "eval_accuracy": 0.9351753453772582, "eval_loss": 0.2507326304912567, "eval_runtime": 34.9575, "eval_samples_per_second": 26.918, "eval_steps_per_second": 3.376, "step": 800 }, { "epoch": 2.43, "learning_rate": 7.852852852852854e-05, "loss": 0.2251, "step": 810 }, { "epoch": 2.46, "learning_rate": 7.702702702702703e-05, "loss": 0.2747, "step": 820 }, { "epoch": 2.49, "learning_rate": 7.552552552552553e-05, "loss": 0.2653, "step": 830 }, { "epoch": 2.52, "learning_rate": 7.402402402402403e-05, "loss": 0.2361, "step": 840 }, { "epoch": 2.55, "learning_rate": 7.252252252252253e-05, "loss": 0.2548, "step": 850 }, { "epoch": 2.58, "learning_rate": 7.102102102102103e-05, "loss": 0.392, "step": 860 }, { "epoch": 2.61, "learning_rate": 6.951951951951952e-05, "loss": 0.3041, "step": 870 }, { "epoch": 2.64, "learning_rate": 6.801801801801802e-05, "loss": 0.3341, "step": 880 }, { "epoch": 2.67, "learning_rate": 6.651651651651653e-05, "loss": 0.2462, "step": 890 }, { "epoch": 2.7, "learning_rate": 6.501501501501502e-05, "loss": 0.3352, "step": 900 }, { "epoch": 2.7, "eval_accuracy": 0.9426142401700319, "eval_loss": 0.2483620047569275, "eval_runtime": 33.9353, "eval_samples_per_second": 27.729, "eval_steps_per_second": 3.477, "step": 900 }, { "epoch": 2.73, "learning_rate": 6.351351351351352e-05, "loss": 0.2314, "step": 910 }, { "epoch": 2.76, "learning_rate": 6.2012012012012e-05, "loss": 0.2739, "step": 920 }, { "epoch": 2.79, "learning_rate": 6.051051051051051e-05, "loss": 0.2905, "step": 930 }, { "epoch": 2.82, "learning_rate": 5.900900900900901e-05, "loss": 0.2607, "step": 940 }, { "epoch": 2.85, "learning_rate": 5.7507507507507506e-05, "loss": 0.334, "step": 950 }, { "epoch": 2.88, "learning_rate": 5.600600600600601e-05, "loss": 0.3202, "step": 960 }, { "epoch": 2.91, "learning_rate": 5.45045045045045e-05, "loss": 0.3326, "step": 970 }, { "epoch": 2.94, "learning_rate": 5.3003003003003005e-05, "loss": 0.2717, "step": 980 }, { "epoch": 2.97, "learning_rate": 5.15015015015015e-05, "loss": 0.259, "step": 990 }, { "epoch": 3.0, "learning_rate": 5e-05, "loss": 0.2493, "step": 1000 }, { "epoch": 3.0, "eval_accuracy": 0.9394261424017003, "eval_loss": 0.22659793496131897, "eval_runtime": 33.5582, "eval_samples_per_second": 28.041, "eval_steps_per_second": 3.516, "step": 1000 }, { "epoch": 3.03, "learning_rate": 4.8498498498498504e-05, "loss": 0.2026, "step": 1010 }, { "epoch": 3.06, "learning_rate": 4.6996996996997e-05, "loss": 0.189, "step": 1020 }, { "epoch": 3.09, "learning_rate": 4.54954954954955e-05, "loss": 0.1097, "step": 1030 }, { "epoch": 3.12, "learning_rate": 4.3993993993994e-05, "loss": 0.1605, "step": 1040 }, { "epoch": 3.15, "learning_rate": 4.24924924924925e-05, "loss": 0.2681, "step": 1050 }, { "epoch": 3.18, "learning_rate": 4.099099099099099e-05, "loss": 0.2872, "step": 1060 }, { "epoch": 3.21, "learning_rate": 3.948948948948949e-05, "loss": 0.3481, "step": 1070 }, { "epoch": 3.24, "learning_rate": 3.7987987987987985e-05, "loss": 0.1615, "step": 1080 }, { "epoch": 3.27, "learning_rate": 3.648648648648649e-05, "loss": 0.1447, "step": 1090 }, { "epoch": 3.3, "learning_rate": 3.498498498498499e-05, "loss": 0.2034, "step": 1100 }, { "epoch": 3.3, "eval_accuracy": 0.9479277364505845, "eval_loss": 0.20114077627658844, "eval_runtime": 34.4276, "eval_samples_per_second": 27.333, "eval_steps_per_second": 3.427, "step": 1100 }, { "epoch": 3.33, "learning_rate": 3.3483483483483485e-05, "loss": 0.204, "step": 1110 }, { "epoch": 3.36, "learning_rate": 3.198198198198199e-05, "loss": 0.18, "step": 1120 }, { "epoch": 3.39, "learning_rate": 3.0480480480480482e-05, "loss": 0.1657, "step": 1130 }, { "epoch": 3.42, "learning_rate": 2.897897897897898e-05, "loss": 0.1932, "step": 1140 }, { "epoch": 3.45, "learning_rate": 2.7477477477477483e-05, "loss": 0.1524, "step": 1150 }, { "epoch": 3.48, "learning_rate": 2.5975975975975975e-05, "loss": 0.1144, "step": 1160 }, { "epoch": 3.51, "learning_rate": 2.4474474474474477e-05, "loss": 0.1424, "step": 1170 }, { "epoch": 3.54, "learning_rate": 2.2972972972972976e-05, "loss": 0.2142, "step": 1180 }, { "epoch": 3.57, "learning_rate": 2.1621621621621624e-05, "loss": 0.1448, "step": 1190 }, { "epoch": 3.6, "learning_rate": 2.012012012012012e-05, "loss": 0.1753, "step": 1200 }, { "epoch": 3.6, "eval_accuracy": 0.944739638682253, "eval_loss": 0.20892110466957092, "eval_runtime": 33.5408, "eval_samples_per_second": 28.055, "eval_steps_per_second": 3.518, "step": 1200 }, { "epoch": 3.63, "learning_rate": 1.8618618618618618e-05, "loss": 0.2085, "step": 1210 }, { "epoch": 3.66, "learning_rate": 1.7117117117117117e-05, "loss": 0.2179, "step": 1220 }, { "epoch": 3.69, "learning_rate": 1.5615615615615616e-05, "loss": 0.1755, "step": 1230 }, { "epoch": 3.72, "learning_rate": 1.4114114114114116e-05, "loss": 0.1905, "step": 1240 }, { "epoch": 3.75, "learning_rate": 1.2612612612612611e-05, "loss": 0.2246, "step": 1250 }, { "epoch": 3.78, "learning_rate": 1.1111111111111112e-05, "loss": 0.1495, "step": 1260 }, { "epoch": 3.81, "learning_rate": 9.60960960960961e-06, "loss": 0.2038, "step": 1270 }, { "epoch": 3.84, "learning_rate": 8.108108108108109e-06, "loss": 0.2163, "step": 1280 }, { "epoch": 3.87, "learning_rate": 6.606606606606607e-06, "loss": 0.152, "step": 1290 }, { "epoch": 3.9, "learning_rate": 5.105105105105106e-06, "loss": 0.1614, "step": 1300 }, { "epoch": 3.9, "eval_accuracy": 0.9543039319872476, "eval_loss": 0.19176600873470306, "eval_runtime": 33.687, "eval_samples_per_second": 27.934, "eval_steps_per_second": 3.503, "step": 1300 }, { "epoch": 3.93, "learning_rate": 3.603603603603604e-06, "loss": 0.1276, "step": 1310 }, { "epoch": 3.96, "learning_rate": 2.102102102102102e-06, "loss": 0.2218, "step": 1320 }, { "epoch": 3.99, "learning_rate": 6.006006006006006e-07, "loss": 0.1194, "step": 1330 }, { "epoch": 4.0, "step": 1332, "total_flos": 1.651775717862015e+18, "train_loss": 0.4876434194433081, "train_runtime": 1561.5053, "train_samples_per_second": 13.648, "train_steps_per_second": 0.853 } ], "max_steps": 1332, "num_train_epochs": 4, "total_flos": 1.651775717862015e+18, "trial_name": null, "trial_params": null }