|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"best_supernet_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"global_step": 2300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8235294117647058, |
|
"eval_combined_score": 0.8490469358475097, |
|
"eval_f1": 0.8745644599303136, |
|
"eval_loss": 0.46352434158325195, |
|
"eval_runtime": 0.5288, |
|
"eval_samples_per_second": 771.53, |
|
"eval_steps_per_second": 7.564, |
|
"step": 115 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8357843137254902, |
|
"eval_combined_score": 0.8596312872975278, |
|
"eval_f1": 0.8834782608695653, |
|
"eval_loss": 0.40375280380249023, |
|
"eval_runtime": 0.6306, |
|
"eval_samples_per_second": 647.039, |
|
"eval_steps_per_second": 6.344, |
|
"step": 115 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8235294117647058, |
|
"eval_combined_score": 0.8492647058823529, |
|
"eval_f1": 0.875, |
|
"eval_loss": 0.4734054207801819, |
|
"eval_runtime": 0.4906, |
|
"eval_samples_per_second": 831.692, |
|
"eval_steps_per_second": 8.154, |
|
"step": 230 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8480392156862745, |
|
"eval_combined_score": 0.8712985194077632, |
|
"eval_f1": 0.8945578231292517, |
|
"eval_loss": 0.3588949739933014, |
|
"eval_runtime": 0.628, |
|
"eval_samples_per_second": 649.705, |
|
"eval_steps_per_second": 6.37, |
|
"step": 230 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.803921568627451, |
|
"eval_combined_score": 0.8359541836536595, |
|
"eval_f1": 0.867986798679868, |
|
"eval_loss": 0.9021703004837036, |
|
"eval_runtime": 0.5062, |
|
"eval_samples_per_second": 806.001, |
|
"eval_steps_per_second": 7.902, |
|
"step": 345 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8504901960784313, |
|
"eval_combined_score": 0.8743269010442241, |
|
"eval_f1": 0.8981636060100167, |
|
"eval_loss": 0.6408894658088684, |
|
"eval_runtime": 0.6431, |
|
"eval_samples_per_second": 634.428, |
|
"eval_steps_per_second": 6.22, |
|
"step": 345 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8406862745098039, |
|
"eval_combined_score": 0.8645969966031009, |
|
"eval_f1": 0.8885077186963979, |
|
"eval_loss": 0.733981728553772, |
|
"eval_runtime": 0.5322, |
|
"eval_samples_per_second": 766.697, |
|
"eval_steps_per_second": 7.517, |
|
"step": 460 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8578431372549019, |
|
"eval_combined_score": 0.8797690262545697, |
|
"eval_f1": 0.9016949152542373, |
|
"eval_loss": 0.6668654680252075, |
|
"eval_runtime": 0.664, |
|
"eval_samples_per_second": 614.462, |
|
"eval_steps_per_second": 6.024, |
|
"step": 460 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.35, |
|
"learning_rate": 2.6738413736593183e-05, |
|
"loss": 0.6907, |
|
"step": 500 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8235294117647058, |
|
"eval_combined_score": 0.8511586452762923, |
|
"eval_f1": 0.8787878787878788, |
|
"eval_loss": 0.839835524559021, |
|
"eval_runtime": 0.4417, |
|
"eval_samples_per_second": 923.667, |
|
"eval_steps_per_second": 9.056, |
|
"step": 575 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8799019607843137, |
|
"eval_combined_score": 0.8976366453317077, |
|
"eval_f1": 0.9153713298791019, |
|
"eval_loss": 0.6096086502075195, |
|
"eval_runtime": 0.5485, |
|
"eval_samples_per_second": 743.792, |
|
"eval_steps_per_second": 7.292, |
|
"step": 575 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8186274509803921, |
|
"eval_combined_score": 0.8459575611066344, |
|
"eval_f1": 0.8732876712328768, |
|
"eval_loss": 1.0232644081115723, |
|
"eval_runtime": 0.4729, |
|
"eval_samples_per_second": 862.676, |
|
"eval_steps_per_second": 8.458, |
|
"step": 690 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8406862745098039, |
|
"eval_combined_score": 0.8645969966031009, |
|
"eval_f1": 0.8885077186963979, |
|
"eval_loss": 0.8277555108070374, |
|
"eval_runtime": 0.6413, |
|
"eval_samples_per_second": 636.161, |
|
"eval_steps_per_second": 6.237, |
|
"step": 690 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8137254901960784, |
|
"eval_combined_score": 0.8382706873363064, |
|
"eval_f1": 0.8628158844765343, |
|
"eval_loss": 0.9062389135360718, |
|
"eval_runtime": 0.5264, |
|
"eval_samples_per_second": 775.021, |
|
"eval_steps_per_second": 7.598, |
|
"step": 805 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8455882352941176, |
|
"eval_combined_score": 0.8670419052576783, |
|
"eval_f1": 0.8884955752212389, |
|
"eval_loss": 0.696999728679657, |
|
"eval_runtime": 0.6178, |
|
"eval_samples_per_second": 660.427, |
|
"eval_steps_per_second": 6.475, |
|
"step": 805 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8235294117647058, |
|
"eval_combined_score": 0.8499090357792601, |
|
"eval_f1": 0.8762886597938144, |
|
"eval_loss": 0.9299286603927612, |
|
"eval_runtime": 0.5047, |
|
"eval_samples_per_second": 808.406, |
|
"eval_steps_per_second": 7.926, |
|
"step": 920 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8455882352941176, |
|
"eval_combined_score": 0.8700302985515815, |
|
"eval_f1": 0.8944723618090453, |
|
"eval_loss": 0.7925681471824646, |
|
"eval_runtime": 0.6718, |
|
"eval_samples_per_second": 607.307, |
|
"eval_steps_per_second": 5.954, |
|
"step": 920 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 8.7, |
|
"learning_rate": 1.829214077705104e-05, |
|
"loss": 0.0889, |
|
"step": 1000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.821078431372549, |
|
"eval_combined_score": 0.8487794864138549, |
|
"eval_f1": 0.8764805414551609, |
|
"eval_loss": 0.9827358722686768, |
|
"eval_runtime": 0.4726, |
|
"eval_samples_per_second": 863.398, |
|
"eval_steps_per_second": 8.465, |
|
"step": 1035 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8529411764705882, |
|
"eval_combined_score": 0.8754501800720287, |
|
"eval_f1": 0.8979591836734693, |
|
"eval_loss": 0.7438695430755615, |
|
"eval_runtime": 0.6707, |
|
"eval_samples_per_second": 608.321, |
|
"eval_steps_per_second": 5.964, |
|
"step": 1035 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8284313725490197, |
|
"eval_combined_score": 0.8546918767507002, |
|
"eval_f1": 0.8809523809523808, |
|
"eval_loss": 0.8921936750411987, |
|
"eval_runtime": 0.4539, |
|
"eval_samples_per_second": 898.928, |
|
"eval_steps_per_second": 8.813, |
|
"step": 1150 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8602941176470589, |
|
"eval_combined_score": 0.8807536446120909, |
|
"eval_f1": 0.9012131715771231, |
|
"eval_loss": 0.6455641984939575, |
|
"eval_runtime": 0.6908, |
|
"eval_samples_per_second": 590.661, |
|
"eval_steps_per_second": 5.791, |
|
"step": 1150 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.821078431372549, |
|
"eval_combined_score": 0.8491946778711483, |
|
"eval_f1": 0.8773109243697478, |
|
"eval_loss": 0.9563024044036865, |
|
"eval_runtime": 0.5754, |
|
"eval_samples_per_second": 709.094, |
|
"eval_steps_per_second": 6.952, |
|
"step": 1265 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8553921568627451, |
|
"eval_combined_score": 0.877611188787909, |
|
"eval_f1": 0.8998302207130731, |
|
"eval_loss": 0.6870017051696777, |
|
"eval_runtime": 0.6557, |
|
"eval_samples_per_second": 622.197, |
|
"eval_steps_per_second": 6.1, |
|
"step": 1265 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8259803921568627, |
|
"eval_combined_score": 0.8525131943748538, |
|
"eval_f1": 0.8790459965928449, |
|
"eval_loss": 0.9068766832351685, |
|
"eval_runtime": 0.5352, |
|
"eval_samples_per_second": 762.351, |
|
"eval_steps_per_second": 7.474, |
|
"step": 1380 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8676470588235294, |
|
"eval_combined_score": 0.8866207322089674, |
|
"eval_f1": 0.9055944055944055, |
|
"eval_loss": 0.5972063541412354, |
|
"eval_runtime": 0.6204, |
|
"eval_samples_per_second": 657.677, |
|
"eval_steps_per_second": 6.448, |
|
"step": 1380 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8235294117647058, |
|
"eval_combined_score": 0.8515640369860318, |
|
"eval_f1": 0.8795986622073578, |
|
"eval_loss": 0.9531628489494324, |
|
"eval_runtime": 0.4389, |
|
"eval_samples_per_second": 929.512, |
|
"eval_steps_per_second": 9.113, |
|
"step": 1495 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8651960784313726, |
|
"eval_combined_score": 0.8849377272572807, |
|
"eval_f1": 0.9046793760831888, |
|
"eval_loss": 0.641593337059021, |
|
"eval_runtime": 0.5799, |
|
"eval_samples_per_second": 703.627, |
|
"eval_steps_per_second": 6.898, |
|
"step": 1495 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 13.04, |
|
"learning_rate": 8.410113675774417e-06, |
|
"loss": 0.0339, |
|
"step": 1500 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8259803921568627, |
|
"eval_combined_score": 0.8523064353946708, |
|
"eval_f1": 0.8786324786324787, |
|
"eval_loss": 0.9255245327949524, |
|
"eval_runtime": 0.5277, |
|
"eval_samples_per_second": 773.205, |
|
"eval_steps_per_second": 7.58, |
|
"step": 1610 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8578431372549019, |
|
"eval_combined_score": 0.8796018407362944, |
|
"eval_f1": 0.901360544217687, |
|
"eval_loss": 0.7534880042076111, |
|
"eval_runtime": 0.6535, |
|
"eval_samples_per_second": 624.357, |
|
"eval_steps_per_second": 6.121, |
|
"step": 1610 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.821078431372549, |
|
"eval_combined_score": 0.8479320115696364, |
|
"eval_f1": 0.8747855917667238, |
|
"eval_loss": 0.9056081771850586, |
|
"eval_runtime": 0.3636, |
|
"eval_samples_per_second": 1122.037, |
|
"eval_steps_per_second": 11.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8602941176470589, |
|
"eval_combined_score": 0.8802346244977852, |
|
"eval_f1": 0.9001751313485115, |
|
"eval_loss": 0.6561837196350098, |
|
"eval_runtime": 0.492, |
|
"eval_samples_per_second": 829.301, |
|
"eval_steps_per_second": 8.13, |
|
"step": 1725 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8088235294117647, |
|
"eval_combined_score": 0.8396277115497361, |
|
"eval_f1": 0.8704318936877077, |
|
"eval_loss": 0.9581977725028992, |
|
"eval_runtime": 0.3672, |
|
"eval_samples_per_second": 1111.013, |
|
"eval_steps_per_second": 10.892, |
|
"step": 1840 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8553921568627451, |
|
"eval_combined_score": 0.877440541804456, |
|
"eval_f1": 0.899488926746167, |
|
"eval_loss": 0.7179605960845947, |
|
"eval_runtime": 0.4744, |
|
"eval_samples_per_second": 860.007, |
|
"eval_steps_per_second": 8.431, |
|
"step": 1840 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8308823529411765, |
|
"eval_combined_score": 0.856867322480775, |
|
"eval_f1": 0.8828522920203735, |
|
"eval_loss": 0.9101623892784119, |
|
"eval_runtime": 0.3973, |
|
"eval_samples_per_second": 1026.915, |
|
"eval_steps_per_second": 10.068, |
|
"step": 1955 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8627450980392157, |
|
"eval_combined_score": 0.883262583383869, |
|
"eval_f1": 0.9037800687285222, |
|
"eval_loss": 0.6792478561401367, |
|
"eval_runtime": 0.4894, |
|
"eval_samples_per_second": 833.646, |
|
"eval_steps_per_second": 8.173, |
|
"step": 1955 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 17.39, |
|
"learning_rate": 1.41888323488484e-06, |
|
"loss": 0.0195, |
|
"step": 2000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8284313725490197, |
|
"eval_combined_score": 0.8546918767507002, |
|
"eval_f1": 0.8809523809523808, |
|
"eval_loss": 0.8795027136802673, |
|
"eval_runtime": 0.4269, |
|
"eval_samples_per_second": 955.795, |
|
"eval_steps_per_second": 9.371, |
|
"step": 2070 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8651960784313726, |
|
"eval_combined_score": 0.8851023570049782, |
|
"eval_f1": 0.9050086355785838, |
|
"eval_loss": 0.6670258641242981, |
|
"eval_runtime": 0.488, |
|
"eval_samples_per_second": 836.011, |
|
"eval_steps_per_second": 8.196, |
|
"step": 2070 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8235294117647058, |
|
"eval_combined_score": 0.8509538950715421, |
|
"eval_f1": 0.8783783783783784, |
|
"eval_loss": 0.9087016582489014, |
|
"eval_runtime": 0.4014, |
|
"eval_samples_per_second": 1016.504, |
|
"eval_steps_per_second": 9.966, |
|
"step": 2185 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8627450980392157, |
|
"eval_combined_score": 0.883262583383869, |
|
"eval_f1": 0.9037800687285222, |
|
"eval_loss": 0.6763139367103577, |
|
"eval_runtime": 0.5343, |
|
"eval_samples_per_second": 763.583, |
|
"eval_steps_per_second": 7.486, |
|
"step": 2185 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8235294117647058, |
|
"eval_combined_score": 0.8509538950715421, |
|
"eval_f1": 0.8783783783783784, |
|
"eval_loss": 0.9081816673278809, |
|
"eval_runtime": 0.4205, |
|
"eval_samples_per_second": 970.314, |
|
"eval_steps_per_second": 9.513, |
|
"step": 2300 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8627450980392157, |
|
"eval_combined_score": 0.883262583383869, |
|
"eval_f1": 0.9037800687285222, |
|
"eval_loss": 0.6761664748191833, |
|
"eval_runtime": 0.4936, |
|
"eval_samples_per_second": 826.651, |
|
"eval_steps_per_second": 8.104, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2300, |
|
"total_flos": 4825456755302400.0, |
|
"train_loss": 0.18312535887179168, |
|
"train_runtime": 4630.0259, |
|
"train_samples_per_second": 15.844, |
|
"train_steps_per_second": 0.497 |
|
} |
|
], |
|
"max_steps": 2300, |
|
"min_subnet_acc": null, |
|
"min_subnet_best_acc": null, |
|
"num_train_epochs": 20, |
|
"supernet_acc": null, |
|
"supernet_best_acc": null, |
|
"total_flos": 4825456755302400.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|