English
jinjieyuan's picture
Upload model
1960aa4
raw
history blame
20 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"best_supernet_model_checkpoint": null,
"epoch": 4.0,
"global_step": 49088,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"compression_loss": 0.0,
"epoch": 0.04,
"learning_rate": 1.9994982433350572e-05,
"loss": 1.1374,
"step": 500
},
{
"compression_loss": 0.0,
"epoch": 0.08,
"learning_rate": 1.9979731646343096e-05,
"loss": 0.6873,
"step": 1000
},
{
"compression_loss": 0.0,
"epoch": 0.12,
"learning_rate": 1.9954262742072594e-05,
"loss": 0.5978,
"step": 1500
},
{
"compression_loss": 0.0,
"epoch": 0.16,
"learning_rate": 1.9918601797818468e-05,
"loss": 0.5178,
"step": 2000
},
{
"compression_loss": 0.0,
"epoch": 0.2,
"learning_rate": 1.9872785326357108e-05,
"loss": 0.5054,
"step": 2500
},
{
"compression_loss": 0.0,
"epoch": 0.24,
"learning_rate": 1.9816860238576964e-05,
"loss": 0.4631,
"step": 3000
},
{
"compression_loss": 0.0,
"epoch": 0.29,
"learning_rate": 1.9750883795447073e-05,
"loss": 0.4353,
"step": 3500
},
{
"compression_loss": 0.0,
"epoch": 0.33,
"learning_rate": 1.967492354938833e-05,
"loss": 0.4238,
"step": 4000
},
{
"compression_loss": 0.0,
"epoch": 0.37,
"learning_rate": 1.9589057275107412e-05,
"loss": 0.3915,
"step": 4500
},
{
"compression_loss": 0.0,
"epoch": 0.41,
"learning_rate": 1.9493372889964286e-05,
"loss": 0.3847,
"step": 5000
},
{
"compression_loss": 0.0,
"epoch": 0.45,
"learning_rate": 1.938796836395477e-05,
"loss": 0.3748,
"step": 5500
},
{
"compression_loss": 0.0,
"epoch": 0.49,
"learning_rate": 1.9272951619400323e-05,
"loss": 0.3625,
"step": 6000
},
{
"compression_loss": 0.0,
"epoch": 0.53,
"learning_rate": 1.9148440420447786e-05,
"loss": 0.3628,
"step": 6500
},
{
"compression_loss": 0.0,
"epoch": 0.57,
"learning_rate": 1.901483926753625e-05,
"loss": 0.3639,
"step": 7000
},
{
"compression_loss": 0.0,
"epoch": 0.61,
"learning_rate": 1.8871749523368402e-05,
"loss": 0.3496,
"step": 7500
},
{
"compression_loss": 0.0,
"epoch": 0.65,
"learning_rate": 1.871957611040359e-05,
"loss": 0.3384,
"step": 8000
},
{
"compression_loss": 0.0,
"epoch": 0.69,
"learning_rate": 1.8558474837017542e-05,
"loss": 0.3401,
"step": 8500
},
{
"compression_loss": 0.0,
"epoch": 0.73,
"learning_rate": 1.8388610652706004e-05,
"loss": 0.3233,
"step": 9000
},
{
"compression_loss": 0.0,
"epoch": 0.77,
"learning_rate": 1.821015747919508e-05,
"loss": 0.313,
"step": 9500
},
{
"compression_loss": 0.0,
"epoch": 0.81,
"learning_rate": 1.802329803236507e-05,
"loss": 0.311,
"step": 10000
},
{
"compression_loss": 0.0,
"epoch": 0.86,
"learning_rate": 1.782822363517005e-05,
"loss": 0.3181,
"step": 10500
},
{
"compression_loss": 0.0,
"epoch": 0.9,
"learning_rate": 1.762554806329125e-05,
"loss": 0.3056,
"step": 11000
},
{
"compression_loss": 0.0,
"epoch": 0.94,
"learning_rate": 1.741466657612244e-05,
"loss": 0.3048,
"step": 11500
},
{
"compression_loss": 0.0,
"epoch": 0.98,
"learning_rate": 1.7196193308410236e-05,
"loss": 0.3038,
"step": 12000
},
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 256, 1: 256, 2: 192, 3: 320, 4: 192, 5: 384, 6: 128, 7: 256, 8: 256, 9: 256, 10: 192, 11: 256, 12: 1542, 13: 1611, 14: 1891, 15: 1877, 16: 1825, 17: 1790, 18: 1678, 19: 1544, 20: 1223, 21: 628, 22: 345, 23: 213})])",
"epoch": 1.0,
"eval_accuracy": 0.823841059602649,
"eval_loss": 0.4950415790081024,
"eval_runtime": 6.6676,
"eval_samples_per_second": 1472.051,
"eval_steps_per_second": 11.548,
"step": 12272
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 1.0,
"eval_accuracy": 0.8385124808965868,
"eval_loss": 0.45917272567749023,
"eval_runtime": 7.9134,
"eval_samples_per_second": 1240.309,
"eval_steps_per_second": 9.73,
"step": 12272
},
{
"compression_loss": 0.0,
"epoch": 1.02,
"learning_rate": 1.6970810835690784e-05,
"loss": 0.2515,
"step": 12500
},
{
"compression_loss": 0.0,
"epoch": 1.06,
"learning_rate": 1.6737846664816916e-05,
"loss": 0.1979,
"step": 13000
},
{
"compression_loss": 0.0,
"epoch": 1.1,
"learning_rate": 1.649798370049237e-05,
"loss": 0.1959,
"step": 13500
},
{
"compression_loss": 0.0,
"epoch": 1.14,
"learning_rate": 1.6251467535282483e-05,
"loss": 0.1967,
"step": 14000
},
{
"compression_loss": 0.0,
"epoch": 1.18,
"learning_rate": 1.5998550573878364e-05,
"loss": 0.1961,
"step": 14500
},
{
"compression_loss": 0.0,
"epoch": 1.22,
"learning_rate": 1.5739491774662946e-05,
"loss": 0.1956,
"step": 15000
},
{
"compression_loss": 0.0,
"epoch": 1.26,
"learning_rate": 1.547562747472503e-05,
"loss": 0.188,
"step": 15500
},
{
"compression_loss": 0.0,
"epoch": 1.3,
"learning_rate": 1.5205108630894239e-05,
"loss": 0.189,
"step": 16000
},
{
"compression_loss": 0.0,
"epoch": 1.34,
"learning_rate": 1.4929260344124287e-05,
"loss": 0.1886,
"step": 16500
},
{
"compression_loss": 0.0,
"epoch": 1.39,
"learning_rate": 1.4648365051883542e-05,
"loss": 0.1867,
"step": 17000
},
{
"compression_loss": 0.0,
"epoch": 1.43,
"learning_rate": 1.4362710359203518e-05,
"loss": 0.1851,
"step": 17500
},
{
"compression_loss": 0.0,
"epoch": 1.47,
"learning_rate": 1.4072588744203866e-05,
"loss": 0.1872,
"step": 18000
},
{
"compression_loss": 0.0,
"epoch": 1.51,
"learning_rate": 1.3778297258627826e-05,
"loss": 0.1925,
"step": 18500
},
{
"compression_loss": 0.0,
"epoch": 1.55,
"learning_rate": 1.3480737202294069e-05,
"loss": 0.1805,
"step": 19000
},
{
"compression_loss": 0.0,
"epoch": 1.59,
"learning_rate": 1.3179020719566164e-05,
"loss": 0.1787,
"step": 19500
},
{
"compression_loss": 0.0,
"epoch": 1.63,
"learning_rate": 1.2874049278925042e-05,
"loss": 0.1833,
"step": 20000
},
{
"compression_loss": 0.0,
"epoch": 1.67,
"learning_rate": 1.2566135136657013e-05,
"loss": 0.1757,
"step": 20500
},
{
"compression_loss": 0.0,
"epoch": 1.71,
"learning_rate": 1.2255593562042272e-05,
"loss": 0.1772,
"step": 21000
},
{
"compression_loss": 0.0,
"epoch": 1.75,
"learning_rate": 1.194274251455477e-05,
"loss": 0.1772,
"step": 21500
},
{
"compression_loss": 0.0,
"epoch": 1.79,
"learning_rate": 1.1627902318307658e-05,
"loss": 0.1723,
"step": 22000
},
{
"compression_loss": 0.0,
"epoch": 1.83,
"learning_rate": 1.131139533407759e-05,
"loss": 0.1739,
"step": 22500
},
{
"compression_loss": 0.0,
"epoch": 1.87,
"learning_rate": 1.099418245257325e-05,
"loss": 0.173,
"step": 23000
},
{
"compression_loss": 0.0,
"epoch": 1.91,
"learning_rate": 1.0675955707907746e-05,
"loss": 0.1693,
"step": 23500
},
{
"compression_loss": 0.0,
"epoch": 1.96,
"learning_rate": 1.035640004173869e-05,
"loss": 0.174,
"step": 24000
},
{
"compression_loss": 0.0,
"epoch": 2.0,
"learning_rate": 1.0036479462208424e-05,
"loss": 0.1683,
"step": 24500
},
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 256, 1: 256, 2: 192, 3: 320, 4: 192, 5: 384, 6: 128, 7: 256, 8: 256, 9: 256, 10: 192, 11: 256, 12: 1542, 13: 1611, 14: 1891, 15: 1877, 16: 1825, 17: 1790, 18: 1678, 19: 1544, 20: 1223, 21: 628, 22: 345, 23: 213})])",
"epoch": 2.0,
"eval_accuracy": 0.832603158430973,
"eval_loss": 0.467803031206131,
"eval_runtime": 6.6922,
"eval_samples_per_second": 1466.635,
"eval_steps_per_second": 11.506,
"step": 24544
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 2.0,
"eval_accuracy": 0.8478858889454916,
"eval_loss": 0.4284982979297638,
"eval_runtime": 7.9537,
"eval_samples_per_second": 1234.02,
"eval_steps_per_second": 9.681,
"step": 24544
},
{
"compression_loss": 0.0,
"epoch": 2.04,
"learning_rate": 9.717161267195924e-06,
"loss": 0.1232,
"step": 25000
},
{
"compression_loss": 0.0,
"epoch": 2.08,
"learning_rate": 9.397492679427418e-06,
"loss": 0.1194,
"step": 25500
},
{
"compression_loss": 0.0,
"epoch": 2.12,
"learning_rate": 9.07844099105778e-06,
"loss": 0.1166,
"step": 26000
},
{
"compression_loss": 0.0,
"epoch": 2.16,
"learning_rate": 8.760332874955599e-06,
"loss": 0.1195,
"step": 26500
},
{
"compression_loss": 0.0,
"epoch": 2.2,
"learning_rate": 8.443494037878114e-06,
"loss": 0.118,
"step": 27000
},
{
"compression_loss": 0.0,
"epoch": 2.24,
"learning_rate": 8.128248886984306e-06,
"loss": 0.1176,
"step": 27500
},
{
"compression_loss": 0.0,
"epoch": 2.28,
"learning_rate": 7.815544728834741e-06,
"loss": 0.1171,
"step": 28000
},
{
"compression_loss": 0.0,
"epoch": 2.32,
"learning_rate": 7.50444852102862e-06,
"loss": 0.1169,
"step": 28500
},
{
"compression_loss": 0.0,
"epoch": 2.36,
"learning_rate": 7.196521797475282e-06,
"loss": 0.1176,
"step": 29000
},
{
"compression_loss": 0.0,
"epoch": 2.4,
"learning_rate": 6.890845770618091e-06,
"loss": 0.1194,
"step": 29500
},
{
"compression_loss": 0.0,
"epoch": 2.44,
"learning_rate": 6.5883531662847225e-06,
"loss": 0.1158,
"step": 30000
},
{
"compression_loss": 0.0,
"epoch": 2.49,
"learning_rate": 6.289353702713278e-06,
"loss": 0.1169,
"step": 30500
},
{
"compression_loss": 0.0,
"epoch": 2.53,
"learning_rate": 5.994153521560526e-06,
"loss": 0.1128,
"step": 31000
},
{
"compression_loss": 0.0,
"epoch": 2.57,
"learning_rate": 5.703054874447462e-06,
"loss": 0.1131,
"step": 31500
},
{
"compression_loss": 0.0,
"epoch": 2.61,
"learning_rate": 5.41635581348777e-06,
"loss": 0.1112,
"step": 32000
},
{
"compression_loss": 0.0,
"epoch": 2.65,
"learning_rate": 5.134349886116183e-06,
"loss": 0.108,
"step": 32500
},
{
"compression_loss": 0.0,
"epoch": 2.69,
"learning_rate": 4.857325834529075e-06,
"loss": 0.1134,
"step": 33000
},
{
"compression_loss": 0.0,
"epoch": 2.73,
"learning_rate": 4.586105376257812e-06,
"loss": 0.111,
"step": 33500
},
{
"compression_loss": 0.0,
"epoch": 2.77,
"learning_rate": 4.319879247399316e-06,
"loss": 0.1129,
"step": 34000
},
{
"compression_loss": 0.0,
"epoch": 2.81,
"learning_rate": 4.059983758449615e-06,
"loss": 0.1119,
"step": 34500
},
{
"compression_loss": 0.0,
"epoch": 2.85,
"learning_rate": 3.805643436794584e-06,
"loss": 0.1123,
"step": 35000
},
{
"compression_loss": 0.0,
"epoch": 2.89,
"learning_rate": 3.557645436153263e-06,
"loss": 0.1089,
"step": 35500
},
{
"compression_loss": 0.0,
"epoch": 2.93,
"learning_rate": 3.316243678448742e-06,
"loss": 0.1136,
"step": 36000
},
{
"compression_loss": 0.0,
"epoch": 2.97,
"learning_rate": 3.08168533179687e-06,
"loss": 0.1132,
"step": 36500
},
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 256, 1: 256, 2: 192, 3: 320, 4: 192, 5: 384, 6: 128, 7: 256, 8: 256, 9: 256, 10: 192, 11: 256, 12: 1542, 13: 1611, 14: 1891, 15: 1877, 16: 1825, 17: 1790, 18: 1678, 19: 1544, 20: 1223, 21: 628, 22: 345, 23: 213})])",
"epoch": 3.0,
"eval_accuracy": 0.8381049414161997,
"eval_loss": 0.46384474635124207,
"eval_runtime": 6.3265,
"eval_samples_per_second": 1551.419,
"eval_steps_per_second": 12.171,
"step": 36816
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 3.0,
"eval_accuracy": 0.8492103922567499,
"eval_loss": 0.423094242811203,
"eval_runtime": 7.786,
"eval_samples_per_second": 1260.595,
"eval_steps_per_second": 9.89,
"step": 36816
},
{
"compression_loss": 0.0,
"epoch": 3.01,
"learning_rate": 2.854658281539182e-06,
"loss": 0.1037,
"step": 37000
},
{
"compression_loss": 0.0,
"epoch": 3.06,
"learning_rate": 2.6344851274604987e-06,
"loss": 0.0908,
"step": 37500
},
{
"compression_loss": 0.0,
"epoch": 3.1,
"learning_rate": 2.421853428136032e-06,
"loss": 0.0905,
"step": 38000
},
{
"compression_loss": 0.0,
"epoch": 3.14,
"learning_rate": 2.2173827550443417e-06,
"loss": 0.0944,
"step": 38500
},
{
"compression_loss": 0.0,
"epoch": 3.18,
"learning_rate": 2.0204630113806902e-06,
"loss": 0.0918,
"step": 39000
},
{
"compression_loss": 0.0,
"epoch": 3.22,
"learning_rate": 1.8317134117192737e-06,
"loss": 0.0906,
"step": 39500
},
{
"compression_loss": 0.0,
"epoch": 3.26,
"learning_rate": 1.6513272143170379e-06,
"loss": 0.0942,
"step": 40000
},
{
"compression_loss": 0.0,
"epoch": 3.3,
"learning_rate": 1.4798241410321701e-06,
"loss": 0.0948,
"step": 40500
},
{
"compression_loss": 0.0,
"epoch": 3.34,
"learning_rate": 1.3166924633344513e-06,
"loss": 0.0922,
"step": 41000
},
{
"compression_loss": 0.0,
"epoch": 3.38,
"learning_rate": 1.1624515111376556e-06,
"loss": 0.0915,
"step": 41500
},
{
"compression_loss": 0.0,
"epoch": 3.42,
"learning_rate": 1.0175404634253083e-06,
"loss": 0.0924,
"step": 42000
},
{
"compression_loss": 0.0,
"epoch": 3.46,
"learning_rate": 8.815269365902945e-07,
"loss": 0.0933,
"step": 42500
},
{
"compression_loss": 0.0,
"epoch": 3.5,
"learning_rate": 7.548496955717189e-07,
"loss": 0.0916,
"step": 43000
},
{
"compression_loss": 0.0,
"epoch": 3.54,
"learning_rate": 6.376384435468941e-07,
"loss": 0.091,
"step": 43500
},
{
"compression_loss": 0.0,
"epoch": 3.59,
"learning_rate": 5.300131915902884e-07,
"loss": 0.0904,
"step": 44000
},
{
"compression_loss": 0.0,
"epoch": 3.63,
"learning_rate": 4.3208413579573505e-07,
"loss": 0.092,
"step": 44500
},
{
"compression_loss": 0.0,
"epoch": 3.67,
"learning_rate": 3.439515444480945e-07,
"loss": 0.0925,
"step": 45000
},
{
"compression_loss": 0.0,
"epoch": 3.71,
"learning_rate": 2.657056553598691e-07,
"loss": 0.091,
"step": 45500
},
{
"compression_loss": 0.0,
"epoch": 3.75,
"learning_rate": 1.974265834778688e-07,
"loss": 0.0911,
"step": 46000
},
{
"compression_loss": 0.0,
"epoch": 3.79,
"learning_rate": 1.3929066553600822e-07,
"loss": 0.0893,
"step": 46500
},
{
"compression_loss": 0.0,
"epoch": 3.83,
"learning_rate": 9.112443637549263e-08,
"loss": 0.0923,
"step": 47000
},
{
"compression_loss": 0.0,
"epoch": 3.87,
"learning_rate": 5.310377585768578e-08,
"loss": 0.0874,
"step": 47500
},
{
"compression_loss": 0.0,
"epoch": 3.91,
"learning_rate": 2.5267612841735068e-08,
"loss": 0.0906,
"step": 48000
},
{
"compression_loss": 0.0,
"epoch": 3.95,
"learning_rate": 7.644448412432726e-09,
"loss": 0.091,
"step": 48500
},
{
"compression_loss": 0.0,
"epoch": 3.99,
"learning_rate": 2.5232669832453427e-10,
"loss": 0.0894,
"step": 49000
},
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 256, 1: 256, 2: 192, 3: 320, 4: 192, 5: 384, 6: 128, 7: 256, 8: 256, 9: 256, 10: 192, 11: 256, 12: 1542, 13: 1611, 14: 1891, 15: 1877, 16: 1825, 17: 1790, 18: 1678, 19: 1544, 20: 1223, 21: 628, 22: 345, 23: 213})])",
"epoch": 4.0,
"eval_accuracy": 0.8383087111563933,
"eval_loss": 0.4677698612213135,
"eval_runtime": 6.3877,
"eval_samples_per_second": 1536.535,
"eval_steps_per_second": 12.054,
"step": 49088
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 4.0,
"eval_accuracy": 0.8497198166072338,
"eval_loss": 0.42607390880584717,
"eval_runtime": 7.8106,
"eval_samples_per_second": 1256.619,
"eval_steps_per_second": 9.858,
"step": 49088
},
{
"epoch": 4.0,
"step": 49088,
"total_flos": 1.0332516536862106e+17,
"train_loss": 0.2031622279964805,
"train_runtime": 75161.6129,
"train_samples_per_second": 20.899,
"train_steps_per_second": 0.653
}
],
"max_steps": 49088,
"min_subnet_acc": null,
"min_subnet_best_acc": null,
"num_train_epochs": 4,
"supernet_acc": null,
"supernet_best_acc": null,
"total_flos": 1.0332516536862106e+17,
"trial_name": null,
"trial_params": null
}