|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"best_supernet_model_checkpoint": null, |
|
"epoch": 16.0, |
|
"global_step": 1248, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5126353790613718, |
|
"eval_loss": 0.6907398700714111, |
|
"eval_runtime": 0.3187, |
|
"eval_samples_per_second": 869.169, |
|
"eval_steps_per_second": 9.413, |
|
"step": 78 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.631768953068592, |
|
"eval_loss": 0.6698081493377686, |
|
"eval_runtime": 0.3205, |
|
"eval_samples_per_second": 864.196, |
|
"eval_steps_per_second": 9.36, |
|
"step": 78 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5884476534296029, |
|
"eval_loss": 0.7006412744522095, |
|
"eval_runtime": 0.313, |
|
"eval_samples_per_second": 884.931, |
|
"eval_steps_per_second": 9.584, |
|
"step": 156 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6714801444043321, |
|
"eval_loss": 0.6682543158531189, |
|
"eval_runtime": 0.3443, |
|
"eval_samples_per_second": 804.534, |
|
"eval_steps_per_second": 8.713, |
|
"step": 156 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5956678700361011, |
|
"eval_loss": 0.7282483577728271, |
|
"eval_runtime": 0.271, |
|
"eval_samples_per_second": 1022.293, |
|
"eval_steps_per_second": 11.072, |
|
"step": 234 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.7188943028450012, |
|
"eval_runtime": 0.3007, |
|
"eval_samples_per_second": 921.259, |
|
"eval_steps_per_second": 9.978, |
|
"step": 234 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6534296028880866, |
|
"eval_loss": 0.7194212675094604, |
|
"eval_runtime": 0.3156, |
|
"eval_samples_per_second": 877.769, |
|
"eval_steps_per_second": 9.507, |
|
"step": 312 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.7804971933364868, |
|
"eval_runtime": 0.3328, |
|
"eval_samples_per_second": 832.291, |
|
"eval_steps_per_second": 9.014, |
|
"step": 312 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6353790613718412, |
|
"eval_loss": 0.8791369199752808, |
|
"eval_runtime": 0.3055, |
|
"eval_samples_per_second": 906.57, |
|
"eval_steps_per_second": 9.818, |
|
"step": 390 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.8327999711036682, |
|
"eval_runtime": 0.3457, |
|
"eval_samples_per_second": 801.306, |
|
"eval_steps_per_second": 8.678, |
|
"step": 390 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6173285198555957, |
|
"eval_loss": 1.0036966800689697, |
|
"eval_runtime": 0.366, |
|
"eval_samples_per_second": 756.88, |
|
"eval_steps_per_second": 8.197, |
|
"step": 468 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.8701478242874146, |
|
"eval_runtime": 0.4386, |
|
"eval_samples_per_second": 631.518, |
|
"eval_steps_per_second": 6.84, |
|
"step": 468 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 6.41, |
|
"learning_rate": 1.3285813580973207e-05, |
|
"loss": 0.5371, |
|
"step": 500 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6245487364620939, |
|
"eval_loss": 0.9121482372283936, |
|
"eval_runtime": 0.3365, |
|
"eval_samples_per_second": 823.293, |
|
"eval_steps_per_second": 8.917, |
|
"step": 546 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6859205776173285, |
|
"eval_loss": 0.8219799995422363, |
|
"eval_runtime": 0.458, |
|
"eval_samples_per_second": 604.827, |
|
"eval_steps_per_second": 6.55, |
|
"step": 546 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6245487364620939, |
|
"eval_loss": 1.009196162223816, |
|
"eval_runtime": 0.323, |
|
"eval_samples_per_second": 857.674, |
|
"eval_steps_per_second": 9.289, |
|
"step": 624 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6823104693140795, |
|
"eval_loss": 0.8340579867362976, |
|
"eval_runtime": 0.4417, |
|
"eval_samples_per_second": 627.172, |
|
"eval_steps_per_second": 6.792, |
|
"step": 624 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6425992779783394, |
|
"eval_loss": 0.9687350392341614, |
|
"eval_runtime": 0.2576, |
|
"eval_samples_per_second": 1075.361, |
|
"eval_steps_per_second": 11.647, |
|
"step": 702 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6642599277978339, |
|
"eval_loss": 0.8537887334823608, |
|
"eval_runtime": 0.4396, |
|
"eval_samples_per_second": 630.096, |
|
"eval_steps_per_second": 6.824, |
|
"step": 702 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6353790613718412, |
|
"eval_loss": 1.0111019611358643, |
|
"eval_runtime": 0.3165, |
|
"eval_samples_per_second": 875.142, |
|
"eval_steps_per_second": 9.478, |
|
"step": 780 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6967509025270758, |
|
"eval_loss": 0.8117498159408569, |
|
"eval_runtime": 0.4348, |
|
"eval_samples_per_second": 637.033, |
|
"eval_steps_per_second": 6.899, |
|
"step": 780 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6498194945848376, |
|
"eval_loss": 0.9616022706031799, |
|
"eval_runtime": 0.3223, |
|
"eval_samples_per_second": 859.427, |
|
"eval_steps_per_second": 9.308, |
|
"step": 858 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.8113434910774231, |
|
"eval_runtime": 0.4354, |
|
"eval_samples_per_second": 636.179, |
|
"eval_steps_per_second": 6.89, |
|
"step": 858 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 0.9934073090553284, |
|
"eval_runtime": 0.3202, |
|
"eval_samples_per_second": 865.131, |
|
"eval_steps_per_second": 9.37, |
|
"step": 936 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.8179090023040771, |
|
"eval_runtime": 0.4603, |
|
"eval_samples_per_second": 601.726, |
|
"eval_steps_per_second": 6.517, |
|
"step": 936 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 12.82, |
|
"learning_rate": 2.1281651939094996e-06, |
|
"loss": 0.1174, |
|
"step": 1000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.631768953068592, |
|
"eval_loss": 1.009687900543213, |
|
"eval_runtime": 0.3338, |
|
"eval_samples_per_second": 829.86, |
|
"eval_steps_per_second": 8.988, |
|
"step": 1014 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.8190819025039673, |
|
"eval_runtime": 0.4638, |
|
"eval_samples_per_second": 597.268, |
|
"eval_steps_per_second": 6.469, |
|
"step": 1014 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 1.0018943548202515, |
|
"eval_runtime": 0.2723, |
|
"eval_samples_per_second": 1017.137, |
|
"eval_steps_per_second": 11.016, |
|
"step": 1092 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.8157313466072083, |
|
"eval_runtime": 0.4417, |
|
"eval_samples_per_second": 627.137, |
|
"eval_steps_per_second": 6.792, |
|
"step": 1092 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.631768953068592, |
|
"eval_loss": 1.012693166732788, |
|
"eval_runtime": 0.3213, |
|
"eval_samples_per_second": 862.201, |
|
"eval_steps_per_second": 9.338, |
|
"step": 1170 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.8178156614303589, |
|
"eval_runtime": 0.4461, |
|
"eval_samples_per_second": 620.871, |
|
"eval_steps_per_second": 6.724, |
|
"step": 1170 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 1.009476661682129, |
|
"eval_runtime": 0.3271, |
|
"eval_samples_per_second": 846.951, |
|
"eval_steps_per_second": 9.173, |
|
"step": 1248 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.8178343772888184, |
|
"eval_runtime": 0.4589, |
|
"eval_samples_per_second": 603.661, |
|
"eval_steps_per_second": 6.538, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 1248, |
|
"total_flos": 2620586111385600.0, |
|
"train_loss": 0.27760597070058185, |
|
"train_runtime": 2463.0354, |
|
"train_samples_per_second": 16.175, |
|
"train_steps_per_second": 0.507 |
|
} |
|
], |
|
"max_steps": 1248, |
|
"min_subnet_acc": null, |
|
"min_subnet_best_acc": null, |
|
"num_train_epochs": 16, |
|
"supernet_acc": null, |
|
"supernet_best_acc": null, |
|
"total_flos": 2620586111385600.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|