{ "best_metric": 0.619316577911377, "best_model_checkpoint": "./vit-base-beans/checkpoint-400", "epoch": 3.883495145631068, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 0.00019805825242718447, "loss": 2.2781, "step": 10 }, { "epoch": 0.19, "learning_rate": 0.00019611650485436895, "loss": 2.0539, "step": 20 }, { "epoch": 0.24, "eval_accuracy": 0.6347826086956522, "eval_loss": 1.684175968170166, "eval_runtime": 199.797, "eval_samples_per_second": 2.302, "eval_steps_per_second": 0.29, "step": 25 }, { "epoch": 0.29, "learning_rate": 0.0001941747572815534, "loss": 1.7246, "step": 30 }, { "epoch": 0.39, "learning_rate": 0.00019223300970873787, "loss": 1.6027, "step": 40 }, { "epoch": 0.49, "learning_rate": 0.00019029126213592236, "loss": 1.4159, "step": 50 }, { "epoch": 0.49, "eval_accuracy": 0.6934782608695652, "eval_loss": 1.2154264450073242, "eval_runtime": 14.461, "eval_samples_per_second": 31.81, "eval_steps_per_second": 4.011, "step": 50 }, { "epoch": 0.58, "learning_rate": 0.00018834951456310681, "loss": 1.243, "step": 60 }, { "epoch": 0.68, "learning_rate": 0.00018640776699029127, "loss": 1.2138, "step": 70 }, { "epoch": 0.73, "eval_accuracy": 0.741304347826087, "eval_loss": 1.0337247848510742, "eval_runtime": 14.5332, "eval_samples_per_second": 31.652, "eval_steps_per_second": 3.991, "step": 75 }, { "epoch": 0.78, "learning_rate": 0.00018446601941747576, "loss": 1.096, "step": 80 }, { "epoch": 0.87, "learning_rate": 0.00018252427184466022, "loss": 1.012, "step": 90 }, { "epoch": 0.97, "learning_rate": 0.00018058252427184467, "loss": 0.8889, "step": 100 }, { "epoch": 0.97, "eval_accuracy": 0.7652173913043478, "eval_loss": 0.845034122467041, "eval_runtime": 14.4751, "eval_samples_per_second": 31.779, "eval_steps_per_second": 4.007, "step": 100 }, { "epoch": 1.07, "learning_rate": 0.00017864077669902913, "loss": 0.7514, "step": 110 }, { "epoch": 1.17, "learning_rate": 0.00017669902912621362, "loss": 0.7239, "step": 120 }, { "epoch": 1.21, "eval_accuracy": 0.6869565217391305, "eval_loss": 1.0301238298416138, "eval_runtime": 13.8604, "eval_samples_per_second": 33.188, "eval_steps_per_second": 4.185, "step": 125 }, { "epoch": 1.26, "learning_rate": 0.00017475728155339805, "loss": 0.6558, "step": 130 }, { "epoch": 1.36, "learning_rate": 0.00017281553398058253, "loss": 0.6192, "step": 140 }, { "epoch": 1.46, "learning_rate": 0.000170873786407767, "loss": 0.6241, "step": 150 }, { "epoch": 1.46, "eval_accuracy": 0.7869565217391304, "eval_loss": 0.7646523118019104, "eval_runtime": 13.9259, "eval_samples_per_second": 33.032, "eval_steps_per_second": 4.165, "step": 150 }, { "epoch": 1.55, "learning_rate": 0.00016893203883495145, "loss": 0.6511, "step": 160 }, { "epoch": 1.65, "learning_rate": 0.00016699029126213594, "loss": 0.7607, "step": 170 }, { "epoch": 1.7, "eval_accuracy": 0.7956521739130434, "eval_loss": 0.7208316922187805, "eval_runtime": 15.3767, "eval_samples_per_second": 29.915, "eval_steps_per_second": 3.772, "step": 175 }, { "epoch": 1.75, "learning_rate": 0.0001650485436893204, "loss": 0.6508, "step": 180 }, { "epoch": 1.84, "learning_rate": 0.00016310679611650485, "loss": 0.5474, "step": 190 }, { "epoch": 1.94, "learning_rate": 0.0001611650485436893, "loss": 0.5841, "step": 200 }, { "epoch": 1.94, "eval_accuracy": 0.7913043478260869, "eval_loss": 0.6974421143531799, "eval_runtime": 13.8536, "eval_samples_per_second": 33.204, "eval_steps_per_second": 4.187, "step": 200 }, { "epoch": 2.04, "learning_rate": 0.0001592233009708738, "loss": 0.5992, "step": 210 }, { "epoch": 2.14, "learning_rate": 0.00015728155339805825, "loss": 0.3827, "step": 220 }, { "epoch": 2.18, "eval_accuracy": 0.7869565217391304, "eval_loss": 0.6890577077865601, "eval_runtime": 14.5884, "eval_samples_per_second": 31.532, "eval_steps_per_second": 3.976, "step": 225 }, { "epoch": 2.23, "learning_rate": 0.0001553398058252427, "loss": 0.4169, "step": 230 }, { "epoch": 2.33, "learning_rate": 0.0001533980582524272, "loss": 0.3825, "step": 240 }, { "epoch": 2.43, "learning_rate": 0.00015145631067961166, "loss": 0.3534, "step": 250 }, { "epoch": 2.43, "eval_accuracy": 0.8, "eval_loss": 0.6881299614906311, "eval_runtime": 14.5452, "eval_samples_per_second": 31.626, "eval_steps_per_second": 3.988, "step": 250 }, { "epoch": 2.52, "learning_rate": 0.00014951456310679611, "loss": 0.2475, "step": 260 }, { "epoch": 2.62, "learning_rate": 0.0001475728155339806, "loss": 0.438, "step": 270 }, { "epoch": 2.67, "eval_accuracy": 0.7869565217391304, "eval_loss": 0.794903039932251, "eval_runtime": 14.6276, "eval_samples_per_second": 31.447, "eval_steps_per_second": 3.965, "step": 275 }, { "epoch": 2.72, "learning_rate": 0.00014563106796116506, "loss": 0.3039, "step": 280 }, { "epoch": 2.82, "learning_rate": 0.00014368932038834952, "loss": 0.2605, "step": 290 }, { "epoch": 2.91, "learning_rate": 0.000141747572815534, "loss": 0.4453, "step": 300 }, { "epoch": 2.91, "eval_accuracy": 0.7804347826086957, "eval_loss": 0.7851635217666626, "eval_runtime": 14.6262, "eval_samples_per_second": 31.45, "eval_steps_per_second": 3.965, "step": 300 }, { "epoch": 3.01, "learning_rate": 0.00013980582524271846, "loss": 0.3196, "step": 310 }, { "epoch": 3.11, "learning_rate": 0.00013786407766990292, "loss": 0.1714, "step": 320 }, { "epoch": 3.16, "eval_accuracy": 0.7956521739130434, "eval_loss": 0.7182856798171997, "eval_runtime": 16.1817, "eval_samples_per_second": 28.427, "eval_steps_per_second": 3.584, "step": 325 }, { "epoch": 3.2, "learning_rate": 0.0001359223300970874, "loss": 0.1467, "step": 330 }, { "epoch": 3.3, "learning_rate": 0.00013398058252427186, "loss": 0.1825, "step": 340 }, { "epoch": 3.4, "learning_rate": 0.00013203883495145632, "loss": 0.1664, "step": 350 }, { "epoch": 3.4, "eval_accuracy": 0.8, "eval_loss": 0.6710854172706604, "eval_runtime": 14.7551, "eval_samples_per_second": 31.176, "eval_steps_per_second": 3.931, "step": 350 }, { "epoch": 3.5, "learning_rate": 0.00013009708737864078, "loss": 0.1266, "step": 360 }, { "epoch": 3.59, "learning_rate": 0.00012815533980582526, "loss": 0.1962, "step": 370 }, { "epoch": 3.64, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.7439975738525391, "eval_runtime": 14.5665, "eval_samples_per_second": 31.579, "eval_steps_per_second": 3.982, "step": 375 }, { "epoch": 3.69, "learning_rate": 0.00012621359223300972, "loss": 0.1609, "step": 380 }, { "epoch": 3.79, "learning_rate": 0.00012427184466019418, "loss": 0.1891, "step": 390 }, { "epoch": 3.88, "learning_rate": 0.00012233009708737864, "loss": 0.1961, "step": 400 }, { "epoch": 3.88, "eval_accuracy": 0.8391304347826087, "eval_loss": 0.619316577911377, "eval_runtime": 14.1432, "eval_samples_per_second": 32.525, "eval_steps_per_second": 4.101, "step": 400 } ], "max_steps": 1030, "num_train_epochs": 10, "total_flos": 4.929663074408755e+17, "trial_name": null, "trial_params": null }