|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"best_supernet_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"global_step": 45484, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.04, |
|
"learning_rate": 1.9994108544833432e-05, |
|
"loss": 0.9256, |
|
"step": 500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.09, |
|
"learning_rate": 1.9976346258894502e-05, |
|
"loss": 0.6433, |
|
"step": 1000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.13, |
|
"learning_rate": 1.9946662925610062e-05, |
|
"loss": 0.5994, |
|
"step": 1500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.18, |
|
"learning_rate": 1.9905117625959957e-05, |
|
"loss": 0.5411, |
|
"step": 2000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.22, |
|
"learning_rate": 1.9851759905098248e-05, |
|
"loss": 0.5124, |
|
"step": 2500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.26, |
|
"learning_rate": 1.97866533951692e-05, |
|
"loss": 0.4807, |
|
"step": 3000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.31, |
|
"learning_rate": 1.9709875739422284e-05, |
|
"loss": 0.4652, |
|
"step": 3500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.35, |
|
"learning_rate": 1.9621518499618157e-05, |
|
"loss": 0.4457, |
|
"step": 4000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.4, |
|
"learning_rate": 1.9521687046836e-05, |
|
"loss": 0.4466, |
|
"step": 4500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.44, |
|
"learning_rate": 1.9410500435812454e-05, |
|
"loss": 0.4176, |
|
"step": 5000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.48, |
|
"learning_rate": 1.928834718660774e-05, |
|
"loss": 0.4164, |
|
"step": 5500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.53, |
|
"learning_rate": 1.9154883430559243e-05, |
|
"loss": 0.4014, |
|
"step": 6000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.57, |
|
"learning_rate": 1.901080149718364e-05, |
|
"loss": 0.3843, |
|
"step": 6500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.62, |
|
"learning_rate": 1.8855695787268013e-05, |
|
"loss": 0.3758, |
|
"step": 7000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.66, |
|
"learning_rate": 1.869002915190846e-05, |
|
"loss": 0.369, |
|
"step": 7500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.7, |
|
"learning_rate": 1.8513999158075064e-05, |
|
"loss": 0.3938, |
|
"step": 8000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.75, |
|
"learning_rate": 1.8327815731637612e-05, |
|
"loss": 0.3676, |
|
"step": 8500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.79, |
|
"learning_rate": 1.8131700907016972e-05, |
|
"loss": 0.363, |
|
"step": 9000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.84, |
|
"learning_rate": 1.79258885623963e-05, |
|
"loss": 0.3566, |
|
"step": 9500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.88, |
|
"learning_rate": 1.771062414080787e-05, |
|
"loss": 0.3426, |
|
"step": 10000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.92, |
|
"learning_rate": 1.7486164357428192e-05, |
|
"loss": 0.3246, |
|
"step": 10500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.97, |
|
"learning_rate": 1.725277689343042e-05, |
|
"loss": 0.3347, |
|
"step": 11000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 320, 1: 256, 2: 320, 3: 192, 4: 256, 5: 256, 6: 192, 7: 256, 8: 64, 9: 192, 10: 192, 11: 512, 12: 1675, 13: 1666, 14: 1787, 15: 1791, 16: 1772, 17: 1751, 18: 1709, 19: 1590, 20: 1320, 21: 762, 22: 348, 23: 115})])", |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8986148899332179, |
|
"eval_combined_score": 0.8815576185145908, |
|
"eval_f1": 0.8645003470959637, |
|
"eval_loss": 0.27813825011253357, |
|
"eval_runtime": 27.532, |
|
"eval_samples_per_second": 1468.473, |
|
"eval_steps_per_second": 11.478, |
|
"step": 11371 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9021766015335148, |
|
"eval_combined_score": 0.8865253049131546, |
|
"eval_f1": 0.8708740082927945, |
|
"eval_loss": 0.2632658779621124, |
|
"eval_runtime": 33.2751, |
|
"eval_samples_per_second": 1215.023, |
|
"eval_steps_per_second": 9.497, |
|
"step": 11371 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.01, |
|
"learning_rate": 1.7011232592278327e-05, |
|
"loss": 0.2935, |
|
"step": 11500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.06, |
|
"learning_rate": 1.6760851492396305e-05, |
|
"loss": 0.2134, |
|
"step": 12000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.1, |
|
"learning_rate": 1.650293242000503e-05, |
|
"loss": 0.215, |
|
"step": 12500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.14, |
|
"learning_rate": 1.623728921389968e-05, |
|
"loss": 0.214, |
|
"step": 13000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.19, |
|
"learning_rate": 1.5963682982587413e-05, |
|
"loss": 0.2014, |
|
"step": 13500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.23, |
|
"learning_rate": 1.5682964717069908e-05, |
|
"loss": 0.2065, |
|
"step": 14000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.28, |
|
"learning_rate": 1.5395469189988864e-05, |
|
"loss": 0.2073, |
|
"step": 14500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.32, |
|
"learning_rate": 1.5101539256259345e-05, |
|
"loss": 0.2234, |
|
"step": 15000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.36, |
|
"learning_rate": 1.4801525444195615e-05, |
|
"loss": 0.203, |
|
"step": 15500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.41, |
|
"learning_rate": 1.4495785537485976e-05, |
|
"loss": 0.1976, |
|
"step": 16000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.45, |
|
"learning_rate": 1.418468414851523e-05, |
|
"loss": 0.2019, |
|
"step": 16500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.5, |
|
"learning_rate": 1.3868592283543443e-05, |
|
"loss": 0.2025, |
|
"step": 17000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.54, |
|
"learning_rate": 1.3548532661979667e-05, |
|
"loss": 0.1949, |
|
"step": 17500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.58, |
|
"learning_rate": 1.3223604296950975e-05, |
|
"loss": 0.1989, |
|
"step": 18000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.63, |
|
"learning_rate": 1.2894831598799443e-05, |
|
"loss": 0.2037, |
|
"step": 18500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.67, |
|
"learning_rate": 1.2563274280384495e-05, |
|
"loss": 0.1902, |
|
"step": 19000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.71, |
|
"learning_rate": 1.222799898813265e-05, |
|
"loss": 0.1871, |
|
"step": 19500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.76, |
|
"learning_rate": 1.1890066679225309e-05, |
|
"loss": 0.1857, |
|
"step": 20000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.8, |
|
"learning_rate": 1.1549880357334364e-05, |
|
"loss": 0.1832, |
|
"step": 20500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.85, |
|
"learning_rate": 1.1207845714171553e-05, |
|
"loss": 0.1872, |
|
"step": 21000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.89, |
|
"learning_rate": 1.0864370645677793e-05, |
|
"loss": 0.1788, |
|
"step": 21500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.93, |
|
"learning_rate": 1.0519864765583883e-05, |
|
"loss": 0.1808, |
|
"step": 22000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.98, |
|
"learning_rate": 1.0174738916922611e-05, |
|
"loss": 0.1664, |
|
"step": 22500 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 320, 1: 256, 2: 320, 3: 192, 4: 256, 5: 256, 6: 192, 7: 256, 8: 64, 9: 192, 10: 192, 11: 512, 12: 1675, 13: 1666, 14: 1787, 15: 1791, 16: 1772, 17: 1751, 18: 1709, 19: 1590, 20: 1320, 21: 762, 22: 348, 23: 115})])", |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9048478852337373, |
|
"eval_combined_score": 0.8892236797616033, |
|
"eval_f1": 0.8735994742894694, |
|
"eval_loss": 0.27244308590888977, |
|
"eval_runtime": 32.6391, |
|
"eval_samples_per_second": 1238.699, |
|
"eval_steps_per_second": 9.682, |
|
"step": 22742 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9106109324758842, |
|
"eval_combined_score": 0.8957706744614529, |
|
"eval_f1": 0.8809304164470216, |
|
"eval_loss": 0.26653361320495605, |
|
"eval_runtime": 45.2051, |
|
"eval_samples_per_second": 894.367, |
|
"eval_steps_per_second": 6.99, |
|
"step": 22742 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.02, |
|
"learning_rate": 9.830785888286694e-06, |
|
"loss": 0.1477, |
|
"step": 23000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.07, |
|
"learning_rate": 9.485653464158049e-06, |
|
"loss": 0.112, |
|
"step": 23500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.11, |
|
"learning_rate": 9.141134427791111e-06, |
|
"loss": 0.107, |
|
"step": 24000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.15, |
|
"learning_rate": 8.797639637908284e-06, |
|
"loss": 0.1161, |
|
"step": 24500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.2, |
|
"learning_rate": 8.45626115108603e-06, |
|
"loss": 0.1099, |
|
"step": 25000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.24, |
|
"learning_rate": 8.116037966577595e-06, |
|
"loss": 0.11, |
|
"step": 25500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.29, |
|
"learning_rate": 7.778061514912307e-06, |
|
"loss": 0.111, |
|
"step": 26000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.33, |
|
"learning_rate": 7.4427348524048315e-06, |
|
"loss": 0.1096, |
|
"step": 26500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.37, |
|
"learning_rate": 7.111119121678447e-06, |
|
"loss": 0.1057, |
|
"step": 27000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.42, |
|
"learning_rate": 6.782280804725536e-06, |
|
"loss": 0.1046, |
|
"step": 27500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.46, |
|
"learning_rate": 6.457279802573981e-06, |
|
"loss": 0.097, |
|
"step": 28000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.51, |
|
"learning_rate": 6.136503697570255e-06, |
|
"loss": 0.1122, |
|
"step": 28500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.55, |
|
"learning_rate": 5.82033503362853e-06, |
|
"loss": 0.1063, |
|
"step": 29000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.59, |
|
"learning_rate": 5.509150860025087e-06, |
|
"loss": 0.1052, |
|
"step": 29500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.64, |
|
"learning_rate": 5.2039283504942894e-06, |
|
"loss": 0.1023, |
|
"step": 30000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.68, |
|
"learning_rate": 4.9038082855161555e-06, |
|
"loss": 0.0949, |
|
"step": 30500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.73, |
|
"learning_rate": 4.609765721621778e-06, |
|
"loss": 0.0949, |
|
"step": 31000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.77, |
|
"learning_rate": 4.322151321443352e-06, |
|
"loss": 0.1048, |
|
"step": 31500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.81, |
|
"learning_rate": 4.041862786524023e-06, |
|
"loss": 0.0938, |
|
"step": 32000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.86, |
|
"learning_rate": 3.768111087965475e-06, |
|
"loss": 0.0971, |
|
"step": 32500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.9, |
|
"learning_rate": 3.501791274575046e-06, |
|
"loss": 0.0905, |
|
"step": 33000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.95, |
|
"learning_rate": 3.243220948013981e-06, |
|
"loss": 0.0934, |
|
"step": 33500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.99, |
|
"learning_rate": 2.9927084682357343e-06, |
|
"loss": 0.092, |
|
"step": 34000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 320, 1: 256, 2: 320, 3: 192, 4: 256, 5: 256, 6: 192, 7: 256, 8: 64, 9: 192, 10: 192, 11: 512, 12: 1675, 13: 1666, 14: 1787, 15: 1791, 16: 1772, 17: 1751, 18: 1709, 19: 1590, 20: 1320, 21: 762, 22: 348, 23: 115})])", |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9094484293841207, |
|
"eval_combined_score": 0.8940211803693942, |
|
"eval_f1": 0.8785939313546676, |
|
"eval_loss": 0.28715240955352783, |
|
"eval_runtime": 25.8149, |
|
"eval_samples_per_second": 1566.149, |
|
"eval_steps_per_second": 12.241, |
|
"step": 34113 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9141231758595103, |
|
"eval_combined_score": 0.8993641670310579, |
|
"eval_f1": 0.8846051582026057, |
|
"eval_loss": 0.2707538604736328, |
|
"eval_runtime": 32.4731, |
|
"eval_samples_per_second": 1245.032, |
|
"eval_steps_per_second": 9.731, |
|
"step": 34113 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.03, |
|
"learning_rate": 2.751504176813342e-06, |
|
"loss": 0.0737, |
|
"step": 34500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.08, |
|
"learning_rate": 2.5179585331698086e-06, |
|
"loss": 0.0666, |
|
"step": 35000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.12, |
|
"learning_rate": 2.2933356533607244e-06, |
|
"loss": 0.0736, |
|
"step": 35500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.17, |
|
"learning_rate": 2.0779034130610965e-06, |
|
"loss": 0.0682, |
|
"step": 36000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.21, |
|
"learning_rate": 1.8723211031117794e-06, |
|
"loss": 0.07, |
|
"step": 36500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.25, |
|
"learning_rate": 1.6760099970815446e-06, |
|
"loss": 0.0701, |
|
"step": 37000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.3, |
|
"learning_rate": 1.4896257268334447e-06, |
|
"loss": 0.0665, |
|
"step": 37500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.34, |
|
"learning_rate": 1.3133905663061163e-06, |
|
"loss": 0.068, |
|
"step": 38000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.39, |
|
"learning_rate": 1.1475146860424102e-06, |
|
"loss": 0.0696, |
|
"step": 38500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.43, |
|
"learning_rate": 9.924958789004234e-07, |
|
"loss": 0.0687, |
|
"step": 39000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.47, |
|
"learning_rate": 8.478977577170089e-07, |
|
"loss": 0.0682, |
|
"step": 39500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.52, |
|
"learning_rate": 7.144704103835564e-07, |
|
"loss": 0.0723, |
|
"step": 40000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.56, |
|
"learning_rate": 5.918382309806136e-07, |
|
"loss": 0.0707, |
|
"step": 40500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.61, |
|
"learning_rate": 4.80425824410179e-07, |
|
"loss": 0.074, |
|
"step": 41000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.65, |
|
"learning_rate": 3.8036605636091126e-07, |
|
"loss": 0.0727, |
|
"step": 41500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.69, |
|
"learning_rate": 2.91778253848517e-07, |
|
"loss": 0.0687, |
|
"step": 42000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.74, |
|
"learning_rate": 2.147680629114379e-07, |
|
"loss": 0.0679, |
|
"step": 42500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.78, |
|
"learning_rate": 1.4942732262188898e-07, |
|
"loss": 0.0659, |
|
"step": 43000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.83, |
|
"learning_rate": 9.583395556248875e-08, |
|
"loss": 0.07, |
|
"step": 43500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.87, |
|
"learning_rate": 5.405187489911701e-08, |
|
"loss": 0.0685, |
|
"step": 44000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.91, |
|
"learning_rate": 2.4178886625614294e-08, |
|
"loss": 0.0739, |
|
"step": 44500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.96, |
|
"learning_rate": 6.130896503045991e-09, |
|
"loss": 0.0693, |
|
"step": 45000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 320, 1: 256, 2: 320, 3: 192, 4: 256, 5: 256, 6: 192, 7: 256, 8: 64, 9: 192, 10: 192, 11: 512, 12: 1675, 13: 1666, 14: 1787, 15: 1791, 16: 1772, 17: 1751, 18: 1709, 19: 1590, 20: 1320, 21: 762, 22: 348, 23: 115})])", |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9088300766757358, |
|
"eval_combined_score": 0.8929735128011296, |
|
"eval_f1": 0.8771169489265235, |
|
"eval_loss": 0.29656311869621277, |
|
"eval_runtime": 26.0405, |
|
"eval_samples_per_second": 1552.579, |
|
"eval_steps_per_second": 12.135, |
|
"step": 45484 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9143952510511996, |
|
"eval_combined_score": 0.8995123693503939, |
|
"eval_f1": 0.8846294876495883, |
|
"eval_loss": 0.2779307961463928, |
|
"eval_runtime": 32.5644, |
|
"eval_samples_per_second": 1241.54, |
|
"eval_steps_per_second": 9.704, |
|
"step": 45484 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 45484, |
|
"total_flos": 9.573190504851456e+16, |
|
"train_loss": 0.20403898293818326, |
|
"train_runtime": 74603.6038, |
|
"train_samples_per_second": 19.508, |
|
"train_steps_per_second": 0.61 |
|
} |
|
], |
|
"max_steps": 45484, |
|
"min_subnet_acc": null, |
|
"min_subnet_best_acc": null, |
|
"num_train_epochs": 4, |
|
"supernet_acc": null, |
|
"supernet_best_acc": null, |
|
"total_flos": 9.573190504851456e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|