{ "best_metric": null, "best_model_checkpoint": null, "best_supernet_model_checkpoint": null, "epoch": 4.0, "global_step": 49088, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "compression_loss": 0.0, "epoch": 0.04, "learning_rate": 1.9994982433350572e-05, "loss": 1.1374, "step": 500 }, { "compression_loss": 0.0, "epoch": 0.08, "learning_rate": 1.9979731646343096e-05, "loss": 0.6873, "step": 1000 }, { "compression_loss": 0.0, "epoch": 0.12, "learning_rate": 1.9954262742072594e-05, "loss": 0.5978, "step": 1500 }, { "compression_loss": 0.0, "epoch": 0.16, "learning_rate": 1.9918601797818468e-05, "loss": 0.5178, "step": 2000 }, { "compression_loss": 0.0, "epoch": 0.2, "learning_rate": 1.9872785326357108e-05, "loss": 0.5054, "step": 2500 }, { "compression_loss": 0.0, "epoch": 0.24, "learning_rate": 1.9816860238576964e-05, "loss": 0.4631, "step": 3000 }, { "compression_loss": 0.0, "epoch": 0.29, "learning_rate": 1.9750883795447073e-05, "loss": 0.4353, "step": 3500 }, { "compression_loss": 0.0, "epoch": 0.33, "learning_rate": 1.967492354938833e-05, "loss": 0.4238, "step": 4000 }, { "compression_loss": 0.0, "epoch": 0.37, "learning_rate": 1.9589057275107412e-05, "loss": 0.3915, "step": 4500 }, { "compression_loss": 0.0, "epoch": 0.41, "learning_rate": 1.9493372889964286e-05, "loss": 0.3847, "step": 5000 }, { "compression_loss": 0.0, "epoch": 0.45, "learning_rate": 1.938796836395477e-05, "loss": 0.3748, "step": 5500 }, { "compression_loss": 0.0, "epoch": 0.49, "learning_rate": 1.9272951619400323e-05, "loss": 0.3625, "step": 6000 }, { "compression_loss": 0.0, "epoch": 0.53, "learning_rate": 1.9148440420447786e-05, "loss": 0.3628, "step": 6500 }, { "compression_loss": 0.0, "epoch": 0.57, "learning_rate": 1.901483926753625e-05, "loss": 0.3639, "step": 7000 }, { "compression_loss": 0.0, "epoch": 0.61, "learning_rate": 1.8871749523368402e-05, "loss": 0.3496, "step": 7500 }, { "compression_loss": 0.0, "epoch": 0.65, "learning_rate": 1.871957611040359e-05, "loss": 0.3384, "step": 8000 }, { "compression_loss": 0.0, "epoch": 0.69, "learning_rate": 1.8558474837017542e-05, "loss": 0.3401, "step": 8500 }, { "compression_loss": 0.0, "epoch": 0.73, "learning_rate": 1.8388610652706004e-05, "loss": 0.3233, "step": 9000 }, { "compression_loss": 0.0, "epoch": 0.77, "learning_rate": 1.821015747919508e-05, "loss": 0.313, "step": 9500 }, { "compression_loss": 0.0, "epoch": 0.81, "learning_rate": 1.802329803236507e-05, "loss": 0.311, "step": 10000 }, { "compression_loss": 0.0, "epoch": 0.86, "learning_rate": 1.782822363517005e-05, "loss": 0.3181, "step": 10500 }, { "compression_loss": 0.0, "epoch": 0.9, "learning_rate": 1.762554806329125e-05, "loss": 0.3056, "step": 11000 }, { "compression_loss": 0.0, "epoch": 0.94, "learning_rate": 1.741466657612244e-05, "loss": 0.3048, "step": 11500 }, { "compression_loss": 0.0, "epoch": 0.98, "learning_rate": 1.7196193308410236e-05, "loss": 0.3038, "step": 12000 }, { "Minimum SubNet": "OrderedDict([(, {0: 256, 1: 256, 2: 192, 3: 320, 4: 192, 5: 384, 6: 128, 7: 256, 8: 256, 9: 256, 10: 192, 11: 256, 12: 1542, 13: 1611, 14: 1891, 15: 1877, 16: 1825, 17: 1790, 18: 1678, 19: 1544, 20: 1223, 21: 628, 22: 345, 23: 213})])", "epoch": 1.0, "eval_accuracy": 0.823841059602649, "eval_loss": 0.4950415790081024, "eval_runtime": 6.6676, "eval_samples_per_second": 1472.051, "eval_steps_per_second": 11.548, "step": 12272 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 1.0, "eval_accuracy": 0.8385124808965868, "eval_loss": 0.45917272567749023, "eval_runtime": 7.9134, "eval_samples_per_second": 1240.309, "eval_steps_per_second": 9.73, "step": 12272 }, { "compression_loss": 0.0, "epoch": 1.02, "learning_rate": 1.6970810835690784e-05, "loss": 0.2515, "step": 12500 }, { "compression_loss": 0.0, "epoch": 1.06, "learning_rate": 1.6737846664816916e-05, "loss": 0.1979, "step": 13000 }, { "compression_loss": 0.0, "epoch": 1.1, "learning_rate": 1.649798370049237e-05, "loss": 0.1959, "step": 13500 }, { "compression_loss": 0.0, "epoch": 1.14, "learning_rate": 1.6251467535282483e-05, "loss": 0.1967, "step": 14000 }, { "compression_loss": 0.0, "epoch": 1.18, "learning_rate": 1.5998550573878364e-05, "loss": 0.1961, "step": 14500 }, { "compression_loss": 0.0, "epoch": 1.22, "learning_rate": 1.5739491774662946e-05, "loss": 0.1956, "step": 15000 }, { "compression_loss": 0.0, "epoch": 1.26, "learning_rate": 1.547562747472503e-05, "loss": 0.188, "step": 15500 }, { "compression_loss": 0.0, "epoch": 1.3, "learning_rate": 1.5205108630894239e-05, "loss": 0.189, "step": 16000 }, { "compression_loss": 0.0, "epoch": 1.34, "learning_rate": 1.4929260344124287e-05, "loss": 0.1886, "step": 16500 }, { "compression_loss": 0.0, "epoch": 1.39, "learning_rate": 1.4648365051883542e-05, "loss": 0.1867, "step": 17000 }, { "compression_loss": 0.0, "epoch": 1.43, "learning_rate": 1.4362710359203518e-05, "loss": 0.1851, "step": 17500 }, { "compression_loss": 0.0, "epoch": 1.47, "learning_rate": 1.4072588744203866e-05, "loss": 0.1872, "step": 18000 }, { "compression_loss": 0.0, "epoch": 1.51, "learning_rate": 1.3778297258627826e-05, "loss": 0.1925, "step": 18500 }, { "compression_loss": 0.0, "epoch": 1.55, "learning_rate": 1.3480737202294069e-05, "loss": 0.1805, "step": 19000 }, { "compression_loss": 0.0, "epoch": 1.59, "learning_rate": 1.3179020719566164e-05, "loss": 0.1787, "step": 19500 }, { "compression_loss": 0.0, "epoch": 1.63, "learning_rate": 1.2874049278925042e-05, "loss": 0.1833, "step": 20000 }, { "compression_loss": 0.0, "epoch": 1.67, "learning_rate": 1.2566135136657013e-05, "loss": 0.1757, "step": 20500 }, { "compression_loss": 0.0, "epoch": 1.71, "learning_rate": 1.2255593562042272e-05, "loss": 0.1772, "step": 21000 }, { "compression_loss": 0.0, "epoch": 1.75, "learning_rate": 1.194274251455477e-05, "loss": 0.1772, "step": 21500 }, { "compression_loss": 0.0, "epoch": 1.79, "learning_rate": 1.1627902318307658e-05, "loss": 0.1723, "step": 22000 }, { "compression_loss": 0.0, "epoch": 1.83, "learning_rate": 1.131139533407759e-05, "loss": 0.1739, "step": 22500 }, { "compression_loss": 0.0, "epoch": 1.87, "learning_rate": 1.099418245257325e-05, "loss": 0.173, "step": 23000 }, { "compression_loss": 0.0, "epoch": 1.91, "learning_rate": 1.0675955707907746e-05, "loss": 0.1693, "step": 23500 }, { "compression_loss": 0.0, "epoch": 1.96, "learning_rate": 1.035640004173869e-05, "loss": 0.174, "step": 24000 }, { "compression_loss": 0.0, "epoch": 2.0, "learning_rate": 1.0036479462208424e-05, "loss": 0.1683, "step": 24500 }, { "Minimum SubNet": "OrderedDict([(, {0: 256, 1: 256, 2: 192, 3: 320, 4: 192, 5: 384, 6: 128, 7: 256, 8: 256, 9: 256, 10: 192, 11: 256, 12: 1542, 13: 1611, 14: 1891, 15: 1877, 16: 1825, 17: 1790, 18: 1678, 19: 1544, 20: 1223, 21: 628, 22: 345, 23: 213})])", "epoch": 2.0, "eval_accuracy": 0.832603158430973, "eval_loss": 0.467803031206131, "eval_runtime": 6.6922, "eval_samples_per_second": 1466.635, "eval_steps_per_second": 11.506, "step": 24544 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 2.0, "eval_accuracy": 0.8478858889454916, "eval_loss": 0.4284982979297638, "eval_runtime": 7.9537, "eval_samples_per_second": 1234.02, "eval_steps_per_second": 9.681, "step": 24544 }, { "compression_loss": 0.0, "epoch": 2.04, "learning_rate": 9.717161267195924e-06, "loss": 0.1232, "step": 25000 }, { "compression_loss": 0.0, "epoch": 2.08, "learning_rate": 9.397492679427418e-06, "loss": 0.1194, "step": 25500 }, { "compression_loss": 0.0, "epoch": 2.12, "learning_rate": 9.07844099105778e-06, "loss": 0.1166, "step": 26000 }, { "compression_loss": 0.0, "epoch": 2.16, "learning_rate": 8.760332874955599e-06, "loss": 0.1195, "step": 26500 }, { "compression_loss": 0.0, "epoch": 2.2, "learning_rate": 8.443494037878114e-06, "loss": 0.118, "step": 27000 }, { "compression_loss": 0.0, "epoch": 2.24, "learning_rate": 8.128248886984306e-06, "loss": 0.1176, "step": 27500 }, { "compression_loss": 0.0, "epoch": 2.28, "learning_rate": 7.815544728834741e-06, "loss": 0.1171, "step": 28000 }, { "compression_loss": 0.0, "epoch": 2.32, "learning_rate": 7.50444852102862e-06, "loss": 0.1169, "step": 28500 }, { "compression_loss": 0.0, "epoch": 2.36, "learning_rate": 7.196521797475282e-06, "loss": 0.1176, "step": 29000 }, { "compression_loss": 0.0, "epoch": 2.4, "learning_rate": 6.890845770618091e-06, "loss": 0.1194, "step": 29500 }, { "compression_loss": 0.0, "epoch": 2.44, "learning_rate": 6.5883531662847225e-06, "loss": 0.1158, "step": 30000 }, { "compression_loss": 0.0, "epoch": 2.49, "learning_rate": 6.289353702713278e-06, "loss": 0.1169, "step": 30500 }, { "compression_loss": 0.0, "epoch": 2.53, "learning_rate": 5.994153521560526e-06, "loss": 0.1128, "step": 31000 }, { "compression_loss": 0.0, "epoch": 2.57, "learning_rate": 5.703054874447462e-06, "loss": 0.1131, "step": 31500 }, { "compression_loss": 0.0, "epoch": 2.61, "learning_rate": 5.41635581348777e-06, "loss": 0.1112, "step": 32000 }, { "compression_loss": 0.0, "epoch": 2.65, "learning_rate": 5.134349886116183e-06, "loss": 0.108, "step": 32500 }, { "compression_loss": 0.0, "epoch": 2.69, "learning_rate": 4.857325834529075e-06, "loss": 0.1134, "step": 33000 }, { "compression_loss": 0.0, "epoch": 2.73, "learning_rate": 4.586105376257812e-06, "loss": 0.111, "step": 33500 }, { "compression_loss": 0.0, "epoch": 2.77, "learning_rate": 4.319879247399316e-06, "loss": 0.1129, "step": 34000 }, { "compression_loss": 0.0, "epoch": 2.81, "learning_rate": 4.059983758449615e-06, "loss": 0.1119, "step": 34500 }, { "compression_loss": 0.0, "epoch": 2.85, "learning_rate": 3.805643436794584e-06, "loss": 0.1123, "step": 35000 }, { "compression_loss": 0.0, "epoch": 2.89, "learning_rate": 3.557645436153263e-06, "loss": 0.1089, "step": 35500 }, { "compression_loss": 0.0, "epoch": 2.93, "learning_rate": 3.316243678448742e-06, "loss": 0.1136, "step": 36000 }, { "compression_loss": 0.0, "epoch": 2.97, "learning_rate": 3.08168533179687e-06, "loss": 0.1132, "step": 36500 }, { "Minimum SubNet": "OrderedDict([(, {0: 256, 1: 256, 2: 192, 3: 320, 4: 192, 5: 384, 6: 128, 7: 256, 8: 256, 9: 256, 10: 192, 11: 256, 12: 1542, 13: 1611, 14: 1891, 15: 1877, 16: 1825, 17: 1790, 18: 1678, 19: 1544, 20: 1223, 21: 628, 22: 345, 23: 213})])", "epoch": 3.0, "eval_accuracy": 0.8381049414161997, "eval_loss": 0.46384474635124207, "eval_runtime": 6.3265, "eval_samples_per_second": 1551.419, "eval_steps_per_second": 12.171, "step": 36816 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 3.0, "eval_accuracy": 0.8492103922567499, "eval_loss": 0.423094242811203, "eval_runtime": 7.786, "eval_samples_per_second": 1260.595, "eval_steps_per_second": 9.89, "step": 36816 }, { "compression_loss": 0.0, "epoch": 3.01, "learning_rate": 2.854658281539182e-06, "loss": 0.1037, "step": 37000 }, { "compression_loss": 0.0, "epoch": 3.06, "learning_rate": 2.6344851274604987e-06, "loss": 0.0908, "step": 37500 }, { "compression_loss": 0.0, "epoch": 3.1, "learning_rate": 2.421853428136032e-06, "loss": 0.0905, "step": 38000 }, { "compression_loss": 0.0, "epoch": 3.14, "learning_rate": 2.2173827550443417e-06, "loss": 0.0944, "step": 38500 }, { "compression_loss": 0.0, "epoch": 3.18, "learning_rate": 2.0204630113806902e-06, "loss": 0.0918, "step": 39000 }, { "compression_loss": 0.0, "epoch": 3.22, "learning_rate": 1.8317134117192737e-06, "loss": 0.0906, "step": 39500 }, { "compression_loss": 0.0, "epoch": 3.26, "learning_rate": 1.6513272143170379e-06, "loss": 0.0942, "step": 40000 }, { "compression_loss": 0.0, "epoch": 3.3, "learning_rate": 1.4798241410321701e-06, "loss": 0.0948, "step": 40500 }, { "compression_loss": 0.0, "epoch": 3.34, "learning_rate": 1.3166924633344513e-06, "loss": 0.0922, "step": 41000 }, { "compression_loss": 0.0, "epoch": 3.38, "learning_rate": 1.1624515111376556e-06, "loss": 0.0915, "step": 41500 }, { "compression_loss": 0.0, "epoch": 3.42, "learning_rate": 1.0175404634253083e-06, "loss": 0.0924, "step": 42000 }, { "compression_loss": 0.0, "epoch": 3.46, "learning_rate": 8.815269365902945e-07, "loss": 0.0933, "step": 42500 }, { "compression_loss": 0.0, "epoch": 3.5, "learning_rate": 7.548496955717189e-07, "loss": 0.0916, "step": 43000 }, { "compression_loss": 0.0, "epoch": 3.54, "learning_rate": 6.376384435468941e-07, "loss": 0.091, "step": 43500 }, { "compression_loss": 0.0, "epoch": 3.59, "learning_rate": 5.300131915902884e-07, "loss": 0.0904, "step": 44000 }, { "compression_loss": 0.0, "epoch": 3.63, "learning_rate": 4.3208413579573505e-07, "loss": 0.092, "step": 44500 }, { "compression_loss": 0.0, "epoch": 3.67, "learning_rate": 3.439515444480945e-07, "loss": 0.0925, "step": 45000 }, { "compression_loss": 0.0, "epoch": 3.71, "learning_rate": 2.657056553598691e-07, "loss": 0.091, "step": 45500 }, { "compression_loss": 0.0, "epoch": 3.75, "learning_rate": 1.974265834778688e-07, "loss": 0.0911, "step": 46000 }, { "compression_loss": 0.0, "epoch": 3.79, "learning_rate": 1.3929066553600822e-07, "loss": 0.0893, "step": 46500 }, { "compression_loss": 0.0, "epoch": 3.83, "learning_rate": 9.112443637549263e-08, "loss": 0.0923, "step": 47000 }, { "compression_loss": 0.0, "epoch": 3.87, "learning_rate": 5.310377585768578e-08, "loss": 0.0874, "step": 47500 }, { "compression_loss": 0.0, "epoch": 3.91, "learning_rate": 2.5267612841735068e-08, "loss": 0.0906, "step": 48000 }, { "compression_loss": 0.0, "epoch": 3.95, "learning_rate": 7.644448412432726e-09, "loss": 0.091, "step": 48500 }, { "compression_loss": 0.0, "epoch": 3.99, "learning_rate": 2.5232669832453427e-10, "loss": 0.0894, "step": 49000 }, { "Minimum SubNet": "OrderedDict([(, {0: 256, 1: 256, 2: 192, 3: 320, 4: 192, 5: 384, 6: 128, 7: 256, 8: 256, 9: 256, 10: 192, 11: 256, 12: 1542, 13: 1611, 14: 1891, 15: 1877, 16: 1825, 17: 1790, 18: 1678, 19: 1544, 20: 1223, 21: 628, 22: 345, 23: 213})])", "epoch": 4.0, "eval_accuracy": 0.8383087111563933, "eval_loss": 0.4677698612213135, "eval_runtime": 6.3877, "eval_samples_per_second": 1536.535, "eval_steps_per_second": 12.054, "step": 49088 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 4.0, "eval_accuracy": 0.8497198166072338, "eval_loss": 0.42607390880584717, "eval_runtime": 7.8106, "eval_samples_per_second": 1256.619, "eval_steps_per_second": 9.858, "step": 49088 }, { "epoch": 4.0, "step": 49088, "total_flos": 1.0332516536862106e+17, "train_loss": 0.2031622279964805, "train_runtime": 75161.6129, "train_samples_per_second": 20.899, "train_steps_per_second": 0.653 } ], "max_steps": 49088, "min_subnet_acc": null, "min_subnet_best_acc": null, "num_train_epochs": 4, "supernet_acc": null, "supernet_best_acc": null, "total_flos": 1.0332516536862106e+17, "trial_name": null, "trial_params": null }