{ "best_metric": 0.020224373787641525, "best_model_checkpoint": "./cats_vs_dogs_outputs/checkpoint-1555", "epoch": 5.0, "global_step": 1555, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.00019871382636655948, "loss": 0.3131, "step": 10 }, { "epoch": 0.06, "learning_rate": 0.00019742765273311899, "loss": 0.1692, "step": 20 }, { "epoch": 0.1, "learning_rate": 0.00019614147909967846, "loss": 0.0963, "step": 30 }, { "epoch": 0.13, "learning_rate": 0.00019485530546623796, "loss": 0.0968, "step": 40 }, { "epoch": 0.16, "learning_rate": 0.00019356913183279743, "loss": 0.1045, "step": 50 }, { "epoch": 0.19, "learning_rate": 0.00019228295819935694, "loss": 0.0935, "step": 60 }, { "epoch": 0.23, "learning_rate": 0.0001909967845659164, "loss": 0.1173, "step": 70 }, { "epoch": 0.26, "learning_rate": 0.00018971061093247588, "loss": 0.0906, "step": 80 }, { "epoch": 0.29, "learning_rate": 0.00018842443729903539, "loss": 0.1101, "step": 90 }, { "epoch": 0.32, "learning_rate": 0.00018713826366559486, "loss": 0.1224, "step": 100 }, { "epoch": 0.35, "learning_rate": 0.00018585209003215436, "loss": 0.0655, "step": 110 }, { "epoch": 0.39, "learning_rate": 0.00018456591639871384, "loss": 0.1049, "step": 120 }, { "epoch": 0.42, "learning_rate": 0.0001832797427652733, "loss": 0.1104, "step": 130 }, { "epoch": 0.45, "learning_rate": 0.0001819935691318328, "loss": 0.0567, "step": 140 }, { "epoch": 0.48, "learning_rate": 0.00018070739549839229, "loss": 0.0609, "step": 150 }, { "epoch": 0.51, "learning_rate": 0.0001794212218649518, "loss": 0.0684, "step": 160 }, { "epoch": 0.55, "learning_rate": 0.00017813504823151126, "loss": 0.086, "step": 170 }, { "epoch": 0.58, "learning_rate": 0.00017684887459807076, "loss": 0.0708, "step": 180 }, { "epoch": 0.61, "learning_rate": 0.00017556270096463024, "loss": 0.0693, "step": 190 }, { "epoch": 0.64, "learning_rate": 0.0001742765273311897, "loss": 0.0814, "step": 200 }, { "epoch": 0.68, "learning_rate": 0.0001729903536977492, "loss": 0.1044, "step": 210 }, { "epoch": 0.71, "learning_rate": 0.0001717041800643087, "loss": 0.0896, "step": 220 }, { "epoch": 0.74, "learning_rate": 0.0001704180064308682, "loss": 0.1199, "step": 230 }, { "epoch": 0.77, "learning_rate": 0.00016913183279742766, "loss": 0.0773, "step": 240 }, { "epoch": 0.8, "learning_rate": 0.00016784565916398716, "loss": 0.0845, "step": 250 }, { "epoch": 0.84, "learning_rate": 0.00016655948553054664, "loss": 0.087, "step": 260 }, { "epoch": 0.87, "learning_rate": 0.0001652733118971061, "loss": 0.0836, "step": 270 }, { "epoch": 0.9, "learning_rate": 0.0001639871382636656, "loss": 0.0781, "step": 280 }, { "epoch": 0.93, "learning_rate": 0.0001627009646302251, "loss": 0.0764, "step": 290 }, { "epoch": 0.96, "learning_rate": 0.0001614147909967846, "loss": 0.0576, "step": 300 }, { "epoch": 1.0, "learning_rate": 0.00016012861736334406, "loss": 0.064, "step": 310 }, { "epoch": 1.0, "eval_accuracy": 0.9849088838268792, "eval_loss": 0.048329003155231476, "eval_runtime": 24.3841, "eval_samples_per_second": 144.028, "eval_steps_per_second": 2.256, "step": 311 }, { "epoch": 1.03, "learning_rate": 0.00015884244372990354, "loss": 0.0519, "step": 320 }, { "epoch": 1.06, "learning_rate": 0.00015755627009646304, "loss": 0.0666, "step": 330 }, { "epoch": 1.09, "learning_rate": 0.0001562700964630225, "loss": 0.0706, "step": 340 }, { "epoch": 1.13, "learning_rate": 0.00015498392282958201, "loss": 0.0632, "step": 350 }, { "epoch": 1.16, "learning_rate": 0.0001536977491961415, "loss": 0.0836, "step": 360 }, { "epoch": 1.19, "learning_rate": 0.000152411575562701, "loss": 0.0873, "step": 370 }, { "epoch": 1.22, "learning_rate": 0.00015112540192926046, "loss": 0.0711, "step": 380 }, { "epoch": 1.25, "learning_rate": 0.00014983922829581994, "loss": 0.0694, "step": 390 }, { "epoch": 1.29, "learning_rate": 0.00014855305466237944, "loss": 0.0803, "step": 400 }, { "epoch": 1.32, "learning_rate": 0.00014726688102893891, "loss": 0.0544, "step": 410 }, { "epoch": 1.35, "learning_rate": 0.00014598070739549841, "loss": 0.0673, "step": 420 }, { "epoch": 1.38, "learning_rate": 0.0001446945337620579, "loss": 0.0751, "step": 430 }, { "epoch": 1.41, "learning_rate": 0.0001434083601286174, "loss": 0.0567, "step": 440 }, { "epoch": 1.45, "learning_rate": 0.00014212218649517686, "loss": 0.044, "step": 450 }, { "epoch": 1.48, "learning_rate": 0.00014083601286173634, "loss": 0.0577, "step": 460 }, { "epoch": 1.51, "learning_rate": 0.00013954983922829584, "loss": 0.0451, "step": 470 }, { "epoch": 1.54, "learning_rate": 0.00013826366559485531, "loss": 0.0803, "step": 480 }, { "epoch": 1.58, "learning_rate": 0.00013697749196141482, "loss": 0.0481, "step": 490 }, { "epoch": 1.61, "learning_rate": 0.0001356913183279743, "loss": 0.0716, "step": 500 }, { "epoch": 1.64, "learning_rate": 0.00013440514469453376, "loss": 0.0568, "step": 510 }, { "epoch": 1.67, "learning_rate": 0.00013311897106109327, "loss": 0.0724, "step": 520 }, { "epoch": 1.7, "learning_rate": 0.00013183279742765274, "loss": 0.072, "step": 530 }, { "epoch": 1.74, "learning_rate": 0.00013054662379421224, "loss": 0.0456, "step": 540 }, { "epoch": 1.77, "learning_rate": 0.00012926045016077172, "loss": 0.0548, "step": 550 }, { "epoch": 1.8, "learning_rate": 0.00012797427652733122, "loss": 0.0566, "step": 560 }, { "epoch": 1.83, "learning_rate": 0.0001266881028938907, "loss": 0.0665, "step": 570 }, { "epoch": 1.86, "learning_rate": 0.00012540192926045017, "loss": 0.0553, "step": 580 }, { "epoch": 1.9, "learning_rate": 0.00012411575562700967, "loss": 0.058, "step": 590 }, { "epoch": 1.93, "learning_rate": 0.00012282958199356914, "loss": 0.0532, "step": 600 }, { "epoch": 1.96, "learning_rate": 0.00012154340836012863, "loss": 0.0381, "step": 610 }, { "epoch": 1.99, "learning_rate": 0.0001202572347266881, "loss": 0.0622, "step": 620 }, { "epoch": 2.0, "eval_accuracy": 0.9903189066059226, "eval_loss": 0.027488160878419876, "eval_runtime": 24.3864, "eval_samples_per_second": 144.015, "eval_steps_per_second": 2.255, "step": 622 }, { "epoch": 2.03, "learning_rate": 0.0001189710610932476, "loss": 0.0454, "step": 630 }, { "epoch": 2.06, "learning_rate": 0.00011768488745980708, "loss": 0.0383, "step": 640 }, { "epoch": 2.09, "learning_rate": 0.00011639871382636655, "loss": 0.0545, "step": 650 }, { "epoch": 2.12, "learning_rate": 0.00011511254019292605, "loss": 0.063, "step": 660 }, { "epoch": 2.15, "learning_rate": 0.00011382636655948553, "loss": 0.0717, "step": 670 }, { "epoch": 2.19, "learning_rate": 0.00011254019292604503, "loss": 0.0545, "step": 680 }, { "epoch": 2.22, "learning_rate": 0.0001112540192926045, "loss": 0.0576, "step": 690 }, { "epoch": 2.25, "learning_rate": 0.00010996784565916398, "loss": 0.0656, "step": 700 }, { "epoch": 2.28, "learning_rate": 0.00010868167202572348, "loss": 0.0762, "step": 710 }, { "epoch": 2.32, "learning_rate": 0.00010739549839228295, "loss": 0.0555, "step": 720 }, { "epoch": 2.35, "learning_rate": 0.00010610932475884245, "loss": 0.0492, "step": 730 }, { "epoch": 2.38, "learning_rate": 0.00010482315112540193, "loss": 0.042, "step": 740 }, { "epoch": 2.41, "learning_rate": 0.00010353697749196143, "loss": 0.0361, "step": 750 }, { "epoch": 2.44, "learning_rate": 0.0001022508038585209, "loss": 0.0522, "step": 760 }, { "epoch": 2.48, "learning_rate": 0.00010096463022508038, "loss": 0.0577, "step": 770 }, { "epoch": 2.51, "learning_rate": 9.967845659163988e-05, "loss": 0.0339, "step": 780 }, { "epoch": 2.54, "learning_rate": 9.839228295819937e-05, "loss": 0.0439, "step": 790 }, { "epoch": 2.57, "learning_rate": 9.710610932475884e-05, "loss": 0.0422, "step": 800 }, { "epoch": 2.6, "learning_rate": 9.581993569131833e-05, "loss": 0.03, "step": 810 }, { "epoch": 2.64, "learning_rate": 9.453376205787782e-05, "loss": 0.0637, "step": 820 }, { "epoch": 2.67, "learning_rate": 9.32475884244373e-05, "loss": 0.0629, "step": 830 }, { "epoch": 2.7, "learning_rate": 9.19614147909968e-05, "loss": 0.0371, "step": 840 }, { "epoch": 2.73, "learning_rate": 9.067524115755628e-05, "loss": 0.0419, "step": 850 }, { "epoch": 2.77, "learning_rate": 8.938906752411576e-05, "loss": 0.0479, "step": 860 }, { "epoch": 2.8, "learning_rate": 8.810289389067524e-05, "loss": 0.0392, "step": 870 }, { "epoch": 2.83, "learning_rate": 8.681672025723473e-05, "loss": 0.0383, "step": 880 }, { "epoch": 2.86, "learning_rate": 8.553054662379422e-05, "loss": 0.0449, "step": 890 }, { "epoch": 2.89, "learning_rate": 8.42443729903537e-05, "loss": 0.0481, "step": 900 }, { "epoch": 2.93, "learning_rate": 8.29581993569132e-05, "loss": 0.0292, "step": 910 }, { "epoch": 2.96, "learning_rate": 8.167202572347268e-05, "loss": 0.04, "step": 920 }, { "epoch": 2.99, "learning_rate": 8.038585209003216e-05, "loss": 0.0366, "step": 930 }, { "epoch": 3.0, "eval_accuracy": 0.9917425968109339, "eval_loss": 0.026212546974420547, "eval_runtime": 24.1897, "eval_samples_per_second": 145.186, "eval_steps_per_second": 2.274, "step": 933 }, { "epoch": 3.02, "learning_rate": 7.909967845659164e-05, "loss": 0.0577, "step": 940 }, { "epoch": 3.05, "learning_rate": 7.781350482315113e-05, "loss": 0.049, "step": 950 }, { "epoch": 3.09, "learning_rate": 7.652733118971062e-05, "loss": 0.0366, "step": 960 }, { "epoch": 3.12, "learning_rate": 7.524115755627011e-05, "loss": 0.0286, "step": 970 }, { "epoch": 3.15, "learning_rate": 7.39549839228296e-05, "loss": 0.038, "step": 980 }, { "epoch": 3.18, "learning_rate": 7.266881028938907e-05, "loss": 0.0386, "step": 990 }, { "epoch": 3.22, "learning_rate": 7.138263665594856e-05, "loss": 0.0287, "step": 1000 }, { "epoch": 3.25, "learning_rate": 7.009646302250804e-05, "loss": 0.0284, "step": 1010 }, { "epoch": 3.28, "learning_rate": 6.881028938906753e-05, "loss": 0.0267, "step": 1020 }, { "epoch": 3.31, "learning_rate": 6.752411575562702e-05, "loss": 0.0305, "step": 1030 }, { "epoch": 3.34, "learning_rate": 6.623794212218651e-05, "loss": 0.0438, "step": 1040 }, { "epoch": 3.38, "learning_rate": 6.495176848874598e-05, "loss": 0.0265, "step": 1050 }, { "epoch": 3.41, "learning_rate": 6.366559485530547e-05, "loss": 0.0346, "step": 1060 }, { "epoch": 3.44, "learning_rate": 6.237942122186496e-05, "loss": 0.0468, "step": 1070 }, { "epoch": 3.47, "learning_rate": 6.109324758842445e-05, "loss": 0.0343, "step": 1080 }, { "epoch": 3.5, "learning_rate": 5.980707395498393e-05, "loss": 0.0664, "step": 1090 }, { "epoch": 3.54, "learning_rate": 5.8520900321543414e-05, "loss": 0.0587, "step": 1100 }, { "epoch": 3.57, "learning_rate": 5.72347266881029e-05, "loss": 0.0322, "step": 1110 }, { "epoch": 3.6, "learning_rate": 5.5948553054662377e-05, "loss": 0.0323, "step": 1120 }, { "epoch": 3.63, "learning_rate": 5.4662379421221864e-05, "loss": 0.033, "step": 1130 }, { "epoch": 3.67, "learning_rate": 5.337620578778135e-05, "loss": 0.0343, "step": 1140 }, { "epoch": 3.7, "learning_rate": 5.209003215434084e-05, "loss": 0.0463, "step": 1150 }, { "epoch": 3.73, "learning_rate": 5.080385852090033e-05, "loss": 0.0249, "step": 1160 }, { "epoch": 3.76, "learning_rate": 4.951768488745981e-05, "loss": 0.0391, "step": 1170 }, { "epoch": 3.79, "learning_rate": 4.8231511254019296e-05, "loss": 0.0371, "step": 1180 }, { "epoch": 3.83, "learning_rate": 4.6945337620578784e-05, "loss": 0.0288, "step": 1190 }, { "epoch": 3.86, "learning_rate": 4.5659163987138265e-05, "loss": 0.0239, "step": 1200 }, { "epoch": 3.89, "learning_rate": 4.437299035369775e-05, "loss": 0.0154, "step": 1210 }, { "epoch": 3.92, "learning_rate": 4.308681672025724e-05, "loss": 0.0213, "step": 1220 }, { "epoch": 3.95, "learning_rate": 4.180064308681672e-05, "loss": 0.0543, "step": 1230 }, { "epoch": 3.99, "learning_rate": 4.051446945337621e-05, "loss": 0.0294, "step": 1240 }, { "epoch": 4.0, "eval_accuracy": 0.9931662870159453, "eval_loss": 0.02194945700466633, "eval_runtime": 24.0412, "eval_samples_per_second": 146.083, "eval_steps_per_second": 2.288, "step": 1244 }, { "epoch": 4.02, "learning_rate": 3.92282958199357e-05, "loss": 0.0325, "step": 1250 }, { "epoch": 4.05, "learning_rate": 3.794212218649518e-05, "loss": 0.02, "step": 1260 }, { "epoch": 4.08, "learning_rate": 3.6655948553054666e-05, "loss": 0.0284, "step": 1270 }, { "epoch": 4.12, "learning_rate": 3.5369774919614154e-05, "loss": 0.0266, "step": 1280 }, { "epoch": 4.15, "learning_rate": 3.4083601286173635e-05, "loss": 0.0232, "step": 1290 }, { "epoch": 4.18, "learning_rate": 3.279742765273312e-05, "loss": 0.0193, "step": 1300 }, { "epoch": 4.21, "learning_rate": 3.151125401929261e-05, "loss": 0.0286, "step": 1310 }, { "epoch": 4.24, "learning_rate": 3.0225080385852088e-05, "loss": 0.0464, "step": 1320 }, { "epoch": 4.28, "learning_rate": 2.8938906752411576e-05, "loss": 0.0234, "step": 1330 }, { "epoch": 4.31, "learning_rate": 2.7652733118971064e-05, "loss": 0.0331, "step": 1340 }, { "epoch": 4.34, "learning_rate": 2.6366559485530545e-05, "loss": 0.0243, "step": 1350 }, { "epoch": 4.37, "learning_rate": 2.5080385852090032e-05, "loss": 0.0161, "step": 1360 }, { "epoch": 4.41, "learning_rate": 2.379421221864952e-05, "loss": 0.0228, "step": 1370 }, { "epoch": 4.44, "learning_rate": 2.2508038585209005e-05, "loss": 0.0127, "step": 1380 }, { "epoch": 4.47, "learning_rate": 2.122186495176849e-05, "loss": 0.0281, "step": 1390 }, { "epoch": 4.5, "learning_rate": 1.9935691318327977e-05, "loss": 0.0369, "step": 1400 }, { "epoch": 4.53, "learning_rate": 1.864951768488746e-05, "loss": 0.0205, "step": 1410 }, { "epoch": 4.57, "learning_rate": 1.736334405144695e-05, "loss": 0.0263, "step": 1420 }, { "epoch": 4.6, "learning_rate": 1.6077170418006433e-05, "loss": 0.0106, "step": 1430 }, { "epoch": 4.63, "learning_rate": 1.4790996784565916e-05, "loss": 0.0215, "step": 1440 }, { "epoch": 4.66, "learning_rate": 1.3504823151125404e-05, "loss": 0.0251, "step": 1450 }, { "epoch": 4.69, "learning_rate": 1.2218649517684888e-05, "loss": 0.0177, "step": 1460 }, { "epoch": 4.73, "learning_rate": 1.0932475884244374e-05, "loss": 0.0224, "step": 1470 }, { "epoch": 4.76, "learning_rate": 9.646302250803859e-06, "loss": 0.0242, "step": 1480 }, { "epoch": 4.79, "learning_rate": 8.360128617363345e-06, "loss": 0.0188, "step": 1490 }, { "epoch": 4.82, "learning_rate": 7.07395498392283e-06, "loss": 0.0226, "step": 1500 }, { "epoch": 4.86, "learning_rate": 5.787781350482315e-06, "loss": 0.0417, "step": 1510 }, { "epoch": 4.89, "learning_rate": 4.501607717041801e-06, "loss": 0.0228, "step": 1520 }, { "epoch": 4.92, "learning_rate": 3.215434083601286e-06, "loss": 0.0159, "step": 1530 }, { "epoch": 4.95, "learning_rate": 1.929260450160772e-06, "loss": 0.0277, "step": 1540 }, { "epoch": 4.98, "learning_rate": 6.430868167202573e-07, "loss": 0.0161, "step": 1550 }, { "epoch": 5.0, "eval_accuracy": 0.9934510250569476, "eval_loss": 0.020224373787641525, "eval_runtime": 24.2393, "eval_samples_per_second": 144.888, "eval_steps_per_second": 2.269, "step": 1555 }, { "epoch": 5.0, "step": 1555, "total_flos": 0.0, "train_loss": 0.05366519431784222, "train_runtime": 840.8866, "train_samples_per_second": 118.316, "train_steps_per_second": 1.849 } ], "max_steps": 1555, "num_train_epochs": 5, "total_flos": 0.0, "trial_name": null, "trial_params": null }