{ "best_metric": 0.027106858789920807, "best_model_checkpoint": "./deit-base-distilled-mask-finetuned/checkpoint-600", "epoch": 1.9955654101995566, "global_step": 900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.00019889135254988916, "loss": 0.0451, "step": 10 }, { "epoch": 0.04, "learning_rate": 0.00019778270509977829, "loss": 0.0465, "step": 20 }, { "epoch": 0.07, "learning_rate": 0.0001966740576496674, "loss": 0.2191, "step": 30 }, { "epoch": 0.09, "learning_rate": 0.00019556541019955653, "loss": 0.0804, "step": 40 }, { "epoch": 0.11, "learning_rate": 0.00019445676274944569, "loss": 0.0719, "step": 50 }, { "epoch": 0.13, "learning_rate": 0.00019334811529933484, "loss": 0.0518, "step": 60 }, { "epoch": 0.16, "learning_rate": 0.00019223946784922396, "loss": 0.0433, "step": 70 }, { "epoch": 0.18, "learning_rate": 0.00019124168514412417, "loss": 0.2892, "step": 80 }, { "epoch": 0.2, "learning_rate": 0.00019013303769401332, "loss": 0.0692, "step": 90 }, { "epoch": 0.22, "learning_rate": 0.00018902439024390244, "loss": 0.0765, "step": 100 }, { "epoch": 0.22, "eval_accuracy": 0.9888765294771968, "eval_loss": 0.05117267370223999, "eval_runtime": 195.5169, "eval_samples_per_second": 18.392, "eval_steps_per_second": 2.302, "step": 100 }, { "epoch": 0.24, "learning_rate": 0.0001879157427937916, "loss": 0.1107, "step": 110 }, { "epoch": 0.27, "learning_rate": 0.00018680709534368072, "loss": 0.0967, "step": 120 }, { "epoch": 0.29, "learning_rate": 0.00018569844789356984, "loss": 0.0852, "step": 130 }, { "epoch": 0.31, "learning_rate": 0.000184589800443459, "loss": 0.082, "step": 140 }, { "epoch": 0.33, "learning_rate": 0.00018348115299334812, "loss": 0.0172, "step": 150 }, { "epoch": 0.35, "learning_rate": 0.00018237250554323727, "loss": 0.0379, "step": 160 }, { "epoch": 0.38, "learning_rate": 0.0001812638580931264, "loss": 0.0773, "step": 170 }, { "epoch": 0.4, "learning_rate": 0.00018015521064301552, "loss": 0.0329, "step": 180 }, { "epoch": 0.42, "learning_rate": 0.00017904656319290467, "loss": 0.0688, "step": 190 }, { "epoch": 0.44, "learning_rate": 0.0001779379157427938, "loss": 0.0533, "step": 200 }, { "epoch": 0.44, "eval_accuracy": 0.9908231368186874, "eval_loss": 0.037399593740701675, "eval_runtime": 194.4149, "eval_samples_per_second": 18.497, "eval_steps_per_second": 2.315, "step": 200 }, { "epoch": 0.47, "learning_rate": 0.00017682926829268295, "loss": 0.0536, "step": 210 }, { "epoch": 0.49, "learning_rate": 0.00017572062084257207, "loss": 0.0144, "step": 220 }, { "epoch": 0.51, "learning_rate": 0.0001746119733924612, "loss": 0.0535, "step": 230 }, { "epoch": 0.53, "learning_rate": 0.00017350332594235035, "loss": 0.0569, "step": 240 }, { "epoch": 0.55, "learning_rate": 0.00017239467849223947, "loss": 0.0408, "step": 250 }, { "epoch": 0.58, "learning_rate": 0.00017128603104212862, "loss": 0.0529, "step": 260 }, { "epoch": 0.6, "learning_rate": 0.00017017738359201775, "loss": 0.088, "step": 270 }, { "epoch": 0.62, "learning_rate": 0.00016906873614190687, "loss": 0.0147, "step": 280 }, { "epoch": 0.64, "learning_rate": 0.00016796008869179602, "loss": 0.0369, "step": 290 }, { "epoch": 0.67, "learning_rate": 0.00016685144124168515, "loss": 0.0442, "step": 300 }, { "epoch": 0.67, "eval_accuracy": 0.9885984427141268, "eval_loss": 0.03961510211229324, "eval_runtime": 194.5523, "eval_samples_per_second": 18.483, "eval_steps_per_second": 2.313, "step": 300 }, { "epoch": 0.69, "learning_rate": 0.0001657427937915743, "loss": 0.0191, "step": 310 }, { "epoch": 0.71, "learning_rate": 0.00016463414634146343, "loss": 0.044, "step": 320 }, { "epoch": 0.73, "learning_rate": 0.00016352549889135255, "loss": 0.0274, "step": 330 }, { "epoch": 0.75, "learning_rate": 0.0001624168514412417, "loss": 0.064, "step": 340 }, { "epoch": 0.78, "learning_rate": 0.00016130820399113083, "loss": 0.0478, "step": 350 }, { "epoch": 0.8, "learning_rate": 0.00016019955654101998, "loss": 0.0532, "step": 360 }, { "epoch": 0.82, "learning_rate": 0.0001590909090909091, "loss": 0.05, "step": 370 }, { "epoch": 0.84, "learning_rate": 0.00015798226164079823, "loss": 0.0197, "step": 380 }, { "epoch": 0.86, "learning_rate": 0.00015687361419068738, "loss": 0.0492, "step": 390 }, { "epoch": 0.89, "learning_rate": 0.0001557649667405765, "loss": 0.0359, "step": 400 }, { "epoch": 0.89, "eval_accuracy": 0.9885984427141268, "eval_loss": 0.034940723329782486, "eval_runtime": 193.9815, "eval_samples_per_second": 18.538, "eval_steps_per_second": 2.32, "step": 400 }, { "epoch": 0.91, "learning_rate": 0.00015465631929046565, "loss": 0.0405, "step": 410 }, { "epoch": 0.93, "learning_rate": 0.00015354767184035478, "loss": 0.03, "step": 420 }, { "epoch": 0.95, "learning_rate": 0.0001524390243902439, "loss": 0.0272, "step": 430 }, { "epoch": 0.98, "learning_rate": 0.00015133037694013303, "loss": 0.0366, "step": 440 }, { "epoch": 1.0, "learning_rate": 0.00015022172949002218, "loss": 0.0676, "step": 450 }, { "epoch": 1.02, "learning_rate": 0.00014911308203991133, "loss": 0.0185, "step": 460 }, { "epoch": 1.04, "learning_rate": 0.00014800443458980045, "loss": 0.0053, "step": 470 }, { "epoch": 1.06, "learning_rate": 0.00014689578713968958, "loss": 0.0309, "step": 480 }, { "epoch": 1.09, "learning_rate": 0.0001457871396895787, "loss": 0.0183, "step": 490 }, { "epoch": 1.11, "learning_rate": 0.00014467849223946785, "loss": 0.0777, "step": 500 }, { "epoch": 1.11, "eval_accuracy": 0.9869299221357063, "eval_loss": 0.042109716683626175, "eval_runtime": 193.8529, "eval_samples_per_second": 18.55, "eval_steps_per_second": 2.321, "step": 500 }, { "epoch": 1.13, "learning_rate": 0.000143569844789357, "loss": 0.0159, "step": 510 }, { "epoch": 1.15, "learning_rate": 0.00014246119733924613, "loss": 0.0645, "step": 520 }, { "epoch": 1.18, "learning_rate": 0.00014135254988913525, "loss": 0.0632, "step": 530 }, { "epoch": 1.2, "learning_rate": 0.00014024390243902438, "loss": 0.0251, "step": 540 }, { "epoch": 1.22, "learning_rate": 0.00013913525498891353, "loss": 0.036, "step": 550 }, { "epoch": 1.24, "learning_rate": 0.00013802660753880268, "loss": 0.0389, "step": 560 }, { "epoch": 1.26, "learning_rate": 0.0001369179600886918, "loss": 0.0353, "step": 570 }, { "epoch": 1.29, "learning_rate": 0.00013580931263858093, "loss": 0.0376, "step": 580 }, { "epoch": 1.31, "learning_rate": 0.00013470066518847006, "loss": 0.0416, "step": 590 }, { "epoch": 1.33, "learning_rate": 0.0001335920177383592, "loss": 0.0051, "step": 600 }, { "epoch": 1.33, "eval_accuracy": 0.9922135706340378, "eval_loss": 0.027106858789920807, "eval_runtime": 193.7443, "eval_samples_per_second": 18.561, "eval_steps_per_second": 2.323, "step": 600 }, { "epoch": 1.35, "learning_rate": 0.00013248337028824836, "loss": 0.0277, "step": 610 }, { "epoch": 1.37, "learning_rate": 0.00013137472283813748, "loss": 0.0466, "step": 620 }, { "epoch": 1.4, "learning_rate": 0.0001302660753880266, "loss": 0.0365, "step": 630 }, { "epoch": 1.42, "learning_rate": 0.00012915742793791573, "loss": 0.0079, "step": 640 }, { "epoch": 1.44, "learning_rate": 0.00012804878048780488, "loss": 0.0291, "step": 650 }, { "epoch": 1.46, "learning_rate": 0.000126940133037694, "loss": 0.0694, "step": 660 }, { "epoch": 1.49, "learning_rate": 0.00012583148558758316, "loss": 0.0027, "step": 670 }, { "epoch": 1.51, "learning_rate": 0.0001247228381374723, "loss": 0.0179, "step": 680 }, { "epoch": 1.53, "learning_rate": 0.0001236141906873614, "loss": 0.0206, "step": 690 }, { "epoch": 1.55, "learning_rate": 0.00012250554323725056, "loss": 0.0112, "step": 700 }, { "epoch": 1.55, "eval_accuracy": 0.9849833147942157, "eval_loss": 0.051631152629852295, "eval_runtime": 194.3203, "eval_samples_per_second": 18.506, "eval_steps_per_second": 2.316, "step": 700 }, { "epoch": 1.57, "learning_rate": 0.00012139689578713968, "loss": 0.0037, "step": 710 }, { "epoch": 1.6, "learning_rate": 0.00012028824833702884, "loss": 0.0051, "step": 720 }, { "epoch": 1.62, "learning_rate": 0.00011917960088691797, "loss": 0.0014, "step": 730 }, { "epoch": 1.64, "learning_rate": 0.0001180709534368071, "loss": 0.0018, "step": 740 }, { "epoch": 1.66, "learning_rate": 0.00011696230598669624, "loss": 0.0445, "step": 750 }, { "epoch": 1.69, "learning_rate": 0.00011585365853658536, "loss": 0.0294, "step": 760 }, { "epoch": 1.71, "learning_rate": 0.00011474501108647451, "loss": 0.0247, "step": 770 }, { "epoch": 1.73, "learning_rate": 0.00011363636363636365, "loss": 0.0069, "step": 780 }, { "epoch": 1.75, "learning_rate": 0.00011252771618625277, "loss": 0.0612, "step": 790 }, { "epoch": 1.77, "learning_rate": 0.00011141906873614191, "loss": 0.0152, "step": 800 }, { "epoch": 1.77, "eval_accuracy": 0.9824805339265851, "eval_loss": 0.05840621143579483, "eval_runtime": 194.8587, "eval_samples_per_second": 18.454, "eval_steps_per_second": 2.309, "step": 800 }, { "epoch": 1.8, "learning_rate": 0.00011031042128603104, "loss": 0.0137, "step": 810 }, { "epoch": 1.82, "learning_rate": 0.00010920177383592019, "loss": 0.0118, "step": 820 }, { "epoch": 1.84, "learning_rate": 0.00010809312638580931, "loss": 0.0246, "step": 830 }, { "epoch": 1.86, "learning_rate": 0.00010698447893569845, "loss": 0.0246, "step": 840 }, { "epoch": 1.88, "learning_rate": 0.0001058758314855876, "loss": 0.036, "step": 850 }, { "epoch": 1.91, "learning_rate": 0.00010476718403547671, "loss": 0.0248, "step": 860 }, { "epoch": 1.93, "learning_rate": 0.00010365853658536586, "loss": 0.019, "step": 870 }, { "epoch": 1.95, "learning_rate": 0.00010254988913525499, "loss": 0.0387, "step": 880 }, { "epoch": 1.97, "learning_rate": 0.00010144124168514413, "loss": 0.0127, "step": 890 }, { "epoch": 2.0, "learning_rate": 0.00010033259423503328, "loss": 0.0339, "step": 900 }, { "epoch": 2.0, "eval_accuracy": 0.9905450500556173, "eval_loss": 0.027991166338324547, "eval_runtime": 194.6989, "eval_samples_per_second": 18.47, "eval_steps_per_second": 2.311, "step": 900 }, { "epoch": 2.0, "step": 900, "total_flos": 2.2314992806549094e+18, "train_loss": 0.04378239723129405, "train_runtime": 5836.8754, "train_samples_per_second": 9.887, "train_steps_per_second": 0.309 } ], "max_steps": 1804, "num_train_epochs": 4, "total_flos": 2.2314992806549094e+18, "trial_name": null, "trial_params": null }