|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"best_supernet_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"global_step": 49088, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.04, |
|
"learning_rate": 1.9994982433350572e-05, |
|
"loss": 1.1374, |
|
"step": 500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.08, |
|
"learning_rate": 1.9979731646343096e-05, |
|
"loss": 0.6873, |
|
"step": 1000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.12, |
|
"learning_rate": 1.9954262742072594e-05, |
|
"loss": 0.5978, |
|
"step": 1500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.16, |
|
"learning_rate": 1.9918601797818468e-05, |
|
"loss": 0.5178, |
|
"step": 2000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.2, |
|
"learning_rate": 1.9872785326357108e-05, |
|
"loss": 0.5054, |
|
"step": 2500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.24, |
|
"learning_rate": 1.9816860238576964e-05, |
|
"loss": 0.4631, |
|
"step": 3000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.29, |
|
"learning_rate": 1.9750883795447073e-05, |
|
"loss": 0.4353, |
|
"step": 3500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.33, |
|
"learning_rate": 1.967492354938833e-05, |
|
"loss": 0.4238, |
|
"step": 4000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.37, |
|
"learning_rate": 1.9589057275107412e-05, |
|
"loss": 0.3915, |
|
"step": 4500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.41, |
|
"learning_rate": 1.9493372889964286e-05, |
|
"loss": 0.3847, |
|
"step": 5000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.45, |
|
"learning_rate": 1.938796836395477e-05, |
|
"loss": 0.3748, |
|
"step": 5500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.49, |
|
"learning_rate": 1.9272951619400323e-05, |
|
"loss": 0.3625, |
|
"step": 6000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.53, |
|
"learning_rate": 1.9148440420447786e-05, |
|
"loss": 0.3628, |
|
"step": 6500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.57, |
|
"learning_rate": 1.901483926753625e-05, |
|
"loss": 0.3639, |
|
"step": 7000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.61, |
|
"learning_rate": 1.8871749523368402e-05, |
|
"loss": 0.3496, |
|
"step": 7500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.65, |
|
"learning_rate": 1.871957611040359e-05, |
|
"loss": 0.3384, |
|
"step": 8000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.69, |
|
"learning_rate": 1.8558474837017542e-05, |
|
"loss": 0.3401, |
|
"step": 8500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.73, |
|
"learning_rate": 1.8388610652706004e-05, |
|
"loss": 0.3233, |
|
"step": 9000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.77, |
|
"learning_rate": 1.821015747919508e-05, |
|
"loss": 0.313, |
|
"step": 9500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.81, |
|
"learning_rate": 1.802329803236507e-05, |
|
"loss": 0.311, |
|
"step": 10000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.86, |
|
"learning_rate": 1.782822363517005e-05, |
|
"loss": 0.3181, |
|
"step": 10500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.9, |
|
"learning_rate": 1.762554806329125e-05, |
|
"loss": 0.3056, |
|
"step": 11000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.94, |
|
"learning_rate": 1.741466657612244e-05, |
|
"loss": 0.3048, |
|
"step": 11500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.98, |
|
"learning_rate": 1.7196193308410236e-05, |
|
"loss": 0.3038, |
|
"step": 12000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 256, 1: 256, 2: 192, 3: 320, 4: 192, 5: 384, 6: 128, 7: 256, 8: 256, 9: 256, 10: 192, 11: 256, 12: 1542, 13: 1611, 14: 1891, 15: 1877, 16: 1825, 17: 1790, 18: 1678, 19: 1544, 20: 1223, 21: 628, 22: 345, 23: 213})])", |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.823841059602649, |
|
"eval_loss": 0.4950415790081024, |
|
"eval_runtime": 6.6676, |
|
"eval_samples_per_second": 1472.051, |
|
"eval_steps_per_second": 11.548, |
|
"step": 12272 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8385124808965868, |
|
"eval_loss": 0.45917272567749023, |
|
"eval_runtime": 7.9134, |
|
"eval_samples_per_second": 1240.309, |
|
"eval_steps_per_second": 9.73, |
|
"step": 12272 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.02, |
|
"learning_rate": 1.6970810835690784e-05, |
|
"loss": 0.2515, |
|
"step": 12500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.06, |
|
"learning_rate": 1.6737846664816916e-05, |
|
"loss": 0.1979, |
|
"step": 13000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.1, |
|
"learning_rate": 1.649798370049237e-05, |
|
"loss": 0.1959, |
|
"step": 13500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.14, |
|
"learning_rate": 1.6251467535282483e-05, |
|
"loss": 0.1967, |
|
"step": 14000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.18, |
|
"learning_rate": 1.5998550573878364e-05, |
|
"loss": 0.1961, |
|
"step": 14500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.22, |
|
"learning_rate": 1.5739491774662946e-05, |
|
"loss": 0.1956, |
|
"step": 15000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.26, |
|
"learning_rate": 1.547562747472503e-05, |
|
"loss": 0.188, |
|
"step": 15500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.3, |
|
"learning_rate": 1.5205108630894239e-05, |
|
"loss": 0.189, |
|
"step": 16000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.34, |
|
"learning_rate": 1.4929260344124287e-05, |
|
"loss": 0.1886, |
|
"step": 16500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.39, |
|
"learning_rate": 1.4648365051883542e-05, |
|
"loss": 0.1867, |
|
"step": 17000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.43, |
|
"learning_rate": 1.4362710359203518e-05, |
|
"loss": 0.1851, |
|
"step": 17500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.47, |
|
"learning_rate": 1.4072588744203866e-05, |
|
"loss": 0.1872, |
|
"step": 18000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.51, |
|
"learning_rate": 1.3778297258627826e-05, |
|
"loss": 0.1925, |
|
"step": 18500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.55, |
|
"learning_rate": 1.3480737202294069e-05, |
|
"loss": 0.1805, |
|
"step": 19000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.59, |
|
"learning_rate": 1.3179020719566164e-05, |
|
"loss": 0.1787, |
|
"step": 19500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.63, |
|
"learning_rate": 1.2874049278925042e-05, |
|
"loss": 0.1833, |
|
"step": 20000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.67, |
|
"learning_rate": 1.2566135136657013e-05, |
|
"loss": 0.1757, |
|
"step": 20500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.71, |
|
"learning_rate": 1.2255593562042272e-05, |
|
"loss": 0.1772, |
|
"step": 21000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.75, |
|
"learning_rate": 1.194274251455477e-05, |
|
"loss": 0.1772, |
|
"step": 21500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.79, |
|
"learning_rate": 1.1627902318307658e-05, |
|
"loss": 0.1723, |
|
"step": 22000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.83, |
|
"learning_rate": 1.131139533407759e-05, |
|
"loss": 0.1739, |
|
"step": 22500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.87, |
|
"learning_rate": 1.099418245257325e-05, |
|
"loss": 0.173, |
|
"step": 23000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.91, |
|
"learning_rate": 1.0675955707907746e-05, |
|
"loss": 0.1693, |
|
"step": 23500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.96, |
|
"learning_rate": 1.035640004173869e-05, |
|
"loss": 0.174, |
|
"step": 24000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.0, |
|
"learning_rate": 1.0036479462208424e-05, |
|
"loss": 0.1683, |
|
"step": 24500 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 256, 1: 256, 2: 192, 3: 320, 4: 192, 5: 384, 6: 128, 7: 256, 8: 256, 9: 256, 10: 192, 11: 256, 12: 1542, 13: 1611, 14: 1891, 15: 1877, 16: 1825, 17: 1790, 18: 1678, 19: 1544, 20: 1223, 21: 628, 22: 345, 23: 213})])", |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.832603158430973, |
|
"eval_loss": 0.467803031206131, |
|
"eval_runtime": 6.6922, |
|
"eval_samples_per_second": 1466.635, |
|
"eval_steps_per_second": 11.506, |
|
"step": 24544 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8478858889454916, |
|
"eval_loss": 0.4284982979297638, |
|
"eval_runtime": 7.9537, |
|
"eval_samples_per_second": 1234.02, |
|
"eval_steps_per_second": 9.681, |
|
"step": 24544 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.04, |
|
"learning_rate": 9.717161267195924e-06, |
|
"loss": 0.1232, |
|
"step": 25000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.08, |
|
"learning_rate": 9.397492679427418e-06, |
|
"loss": 0.1194, |
|
"step": 25500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.12, |
|
"learning_rate": 9.07844099105778e-06, |
|
"loss": 0.1166, |
|
"step": 26000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.16, |
|
"learning_rate": 8.760332874955599e-06, |
|
"loss": 0.1195, |
|
"step": 26500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.2, |
|
"learning_rate": 8.443494037878114e-06, |
|
"loss": 0.118, |
|
"step": 27000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.24, |
|
"learning_rate": 8.128248886984306e-06, |
|
"loss": 0.1176, |
|
"step": 27500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.28, |
|
"learning_rate": 7.815544728834741e-06, |
|
"loss": 0.1171, |
|
"step": 28000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.32, |
|
"learning_rate": 7.50444852102862e-06, |
|
"loss": 0.1169, |
|
"step": 28500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.36, |
|
"learning_rate": 7.196521797475282e-06, |
|
"loss": 0.1176, |
|
"step": 29000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.4, |
|
"learning_rate": 6.890845770618091e-06, |
|
"loss": 0.1194, |
|
"step": 29500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.44, |
|
"learning_rate": 6.5883531662847225e-06, |
|
"loss": 0.1158, |
|
"step": 30000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.49, |
|
"learning_rate": 6.289353702713278e-06, |
|
"loss": 0.1169, |
|
"step": 30500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.53, |
|
"learning_rate": 5.994153521560526e-06, |
|
"loss": 0.1128, |
|
"step": 31000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.57, |
|
"learning_rate": 5.703054874447462e-06, |
|
"loss": 0.1131, |
|
"step": 31500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.61, |
|
"learning_rate": 5.41635581348777e-06, |
|
"loss": 0.1112, |
|
"step": 32000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.65, |
|
"learning_rate": 5.134349886116183e-06, |
|
"loss": 0.108, |
|
"step": 32500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.69, |
|
"learning_rate": 4.857325834529075e-06, |
|
"loss": 0.1134, |
|
"step": 33000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.73, |
|
"learning_rate": 4.586105376257812e-06, |
|
"loss": 0.111, |
|
"step": 33500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.77, |
|
"learning_rate": 4.319879247399316e-06, |
|
"loss": 0.1129, |
|
"step": 34000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.81, |
|
"learning_rate": 4.059983758449615e-06, |
|
"loss": 0.1119, |
|
"step": 34500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.85, |
|
"learning_rate": 3.805643436794584e-06, |
|
"loss": 0.1123, |
|
"step": 35000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.89, |
|
"learning_rate": 3.557645436153263e-06, |
|
"loss": 0.1089, |
|
"step": 35500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.93, |
|
"learning_rate": 3.316243678448742e-06, |
|
"loss": 0.1136, |
|
"step": 36000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.97, |
|
"learning_rate": 3.08168533179687e-06, |
|
"loss": 0.1132, |
|
"step": 36500 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 256, 1: 256, 2: 192, 3: 320, 4: 192, 5: 384, 6: 128, 7: 256, 8: 256, 9: 256, 10: 192, 11: 256, 12: 1542, 13: 1611, 14: 1891, 15: 1877, 16: 1825, 17: 1790, 18: 1678, 19: 1544, 20: 1223, 21: 628, 22: 345, 23: 213})])", |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8381049414161997, |
|
"eval_loss": 0.46384474635124207, |
|
"eval_runtime": 6.3265, |
|
"eval_samples_per_second": 1551.419, |
|
"eval_steps_per_second": 12.171, |
|
"step": 36816 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8492103922567499, |
|
"eval_loss": 0.423094242811203, |
|
"eval_runtime": 7.786, |
|
"eval_samples_per_second": 1260.595, |
|
"eval_steps_per_second": 9.89, |
|
"step": 36816 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.01, |
|
"learning_rate": 2.854658281539182e-06, |
|
"loss": 0.1037, |
|
"step": 37000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.06, |
|
"learning_rate": 2.6344851274604987e-06, |
|
"loss": 0.0908, |
|
"step": 37500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.1, |
|
"learning_rate": 2.421853428136032e-06, |
|
"loss": 0.0905, |
|
"step": 38000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.14, |
|
"learning_rate": 2.2173827550443417e-06, |
|
"loss": 0.0944, |
|
"step": 38500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.18, |
|
"learning_rate": 2.0204630113806902e-06, |
|
"loss": 0.0918, |
|
"step": 39000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.22, |
|
"learning_rate": 1.8317134117192737e-06, |
|
"loss": 0.0906, |
|
"step": 39500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.26, |
|
"learning_rate": 1.6513272143170379e-06, |
|
"loss": 0.0942, |
|
"step": 40000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.3, |
|
"learning_rate": 1.4798241410321701e-06, |
|
"loss": 0.0948, |
|
"step": 40500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.34, |
|
"learning_rate": 1.3166924633344513e-06, |
|
"loss": 0.0922, |
|
"step": 41000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.38, |
|
"learning_rate": 1.1624515111376556e-06, |
|
"loss": 0.0915, |
|
"step": 41500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.42, |
|
"learning_rate": 1.0175404634253083e-06, |
|
"loss": 0.0924, |
|
"step": 42000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.46, |
|
"learning_rate": 8.815269365902945e-07, |
|
"loss": 0.0933, |
|
"step": 42500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.5, |
|
"learning_rate": 7.548496955717189e-07, |
|
"loss": 0.0916, |
|
"step": 43000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.54, |
|
"learning_rate": 6.376384435468941e-07, |
|
"loss": 0.091, |
|
"step": 43500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.59, |
|
"learning_rate": 5.300131915902884e-07, |
|
"loss": 0.0904, |
|
"step": 44000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.63, |
|
"learning_rate": 4.3208413579573505e-07, |
|
"loss": 0.092, |
|
"step": 44500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.67, |
|
"learning_rate": 3.439515444480945e-07, |
|
"loss": 0.0925, |
|
"step": 45000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.71, |
|
"learning_rate": 2.657056553598691e-07, |
|
"loss": 0.091, |
|
"step": 45500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.75, |
|
"learning_rate": 1.974265834778688e-07, |
|
"loss": 0.0911, |
|
"step": 46000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.79, |
|
"learning_rate": 1.3929066553600822e-07, |
|
"loss": 0.0893, |
|
"step": 46500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.83, |
|
"learning_rate": 9.112443637549263e-08, |
|
"loss": 0.0923, |
|
"step": 47000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.87, |
|
"learning_rate": 5.310377585768578e-08, |
|
"loss": 0.0874, |
|
"step": 47500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.91, |
|
"learning_rate": 2.5267612841735068e-08, |
|
"loss": 0.0906, |
|
"step": 48000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.95, |
|
"learning_rate": 7.644448412432726e-09, |
|
"loss": 0.091, |
|
"step": 48500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.99, |
|
"learning_rate": 2.5232669832453427e-10, |
|
"loss": 0.0894, |
|
"step": 49000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 256, 1: 256, 2: 192, 3: 320, 4: 192, 5: 384, 6: 128, 7: 256, 8: 256, 9: 256, 10: 192, 11: 256, 12: 1542, 13: 1611, 14: 1891, 15: 1877, 16: 1825, 17: 1790, 18: 1678, 19: 1544, 20: 1223, 21: 628, 22: 345, 23: 213})])", |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8383087111563933, |
|
"eval_loss": 0.4677698612213135, |
|
"eval_runtime": 6.3877, |
|
"eval_samples_per_second": 1536.535, |
|
"eval_steps_per_second": 12.054, |
|
"step": 49088 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8497198166072338, |
|
"eval_loss": 0.42607390880584717, |
|
"eval_runtime": 7.8106, |
|
"eval_samples_per_second": 1256.619, |
|
"eval_steps_per_second": 9.858, |
|
"step": 49088 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 49088, |
|
"total_flos": 1.0332516536862106e+17, |
|
"train_loss": 0.2031622279964805, |
|
"train_runtime": 75161.6129, |
|
"train_samples_per_second": 20.899, |
|
"train_steps_per_second": 0.653 |
|
} |
|
], |
|
"max_steps": 49088, |
|
"min_subnet_acc": null, |
|
"min_subnet_best_acc": null, |
|
"num_train_epochs": 4, |
|
"supernet_acc": null, |
|
"supernet_best_acc": null, |
|
"total_flos": 1.0332516536862106e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|