{ "best_metric": null, "best_model_checkpoint": null, "best_supernet_model_checkpoint": null, "epoch": 20.0, "global_step": 2300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 1.0, "eval_accuracy": 0.8235294117647058, "eval_combined_score": 0.8490469358475097, "eval_f1": 0.8745644599303136, "eval_loss": 0.46352434158325195, "eval_runtime": 0.5288, "eval_samples_per_second": 771.53, "eval_steps_per_second": 7.564, "step": 115 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 1.0, "eval_accuracy": 0.8357843137254902, "eval_combined_score": 0.8596312872975278, "eval_f1": 0.8834782608695653, "eval_loss": 0.40375280380249023, "eval_runtime": 0.6306, "eval_samples_per_second": 647.039, "eval_steps_per_second": 6.344, "step": 115 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 2.0, "eval_accuracy": 0.8235294117647058, "eval_combined_score": 0.8492647058823529, "eval_f1": 0.875, "eval_loss": 0.4734054207801819, "eval_runtime": 0.4906, "eval_samples_per_second": 831.692, "eval_steps_per_second": 8.154, "step": 230 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 2.0, "eval_accuracy": 0.8480392156862745, "eval_combined_score": 0.8712985194077632, "eval_f1": 0.8945578231292517, "eval_loss": 0.3588949739933014, "eval_runtime": 0.628, "eval_samples_per_second": 649.705, "eval_steps_per_second": 6.37, "step": 230 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 3.0, "eval_accuracy": 0.803921568627451, "eval_combined_score": 0.8359541836536595, "eval_f1": 0.867986798679868, "eval_loss": 0.9021703004837036, "eval_runtime": 0.5062, "eval_samples_per_second": 806.001, "eval_steps_per_second": 7.902, "step": 345 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 3.0, "eval_accuracy": 0.8504901960784313, "eval_combined_score": 0.8743269010442241, "eval_f1": 0.8981636060100167, "eval_loss": 0.6408894658088684, "eval_runtime": 0.6431, "eval_samples_per_second": 634.428, "eval_steps_per_second": 6.22, "step": 345 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 4.0, "eval_accuracy": 0.8406862745098039, "eval_combined_score": 0.8645969966031009, "eval_f1": 0.8885077186963979, "eval_loss": 0.733981728553772, "eval_runtime": 0.5322, "eval_samples_per_second": 766.697, "eval_steps_per_second": 7.517, "step": 460 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 4.0, "eval_accuracy": 0.8578431372549019, "eval_combined_score": 0.8797690262545697, "eval_f1": 0.9016949152542373, "eval_loss": 0.6668654680252075, "eval_runtime": 0.664, "eval_samples_per_second": 614.462, "eval_steps_per_second": 6.024, "step": 460 }, { "compression_loss": 0.0, "epoch": 4.35, "learning_rate": 2.6738413736593183e-05, "loss": 0.6907, "step": 500 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 5.0, "eval_accuracy": 0.8235294117647058, "eval_combined_score": 0.8511586452762923, "eval_f1": 0.8787878787878788, "eval_loss": 0.839835524559021, "eval_runtime": 0.4417, "eval_samples_per_second": 923.667, "eval_steps_per_second": 9.056, "step": 575 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 5.0, "eval_accuracy": 0.8799019607843137, "eval_combined_score": 0.8976366453317077, "eval_f1": 0.9153713298791019, "eval_loss": 0.6096086502075195, "eval_runtime": 0.5485, "eval_samples_per_second": 743.792, "eval_steps_per_second": 7.292, "step": 575 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 6.0, "eval_accuracy": 0.8186274509803921, "eval_combined_score": 0.8459575611066344, "eval_f1": 0.8732876712328768, "eval_loss": 1.0232644081115723, "eval_runtime": 0.4729, "eval_samples_per_second": 862.676, "eval_steps_per_second": 8.458, "step": 690 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 6.0, "eval_accuracy": 0.8406862745098039, "eval_combined_score": 0.8645969966031009, "eval_f1": 0.8885077186963979, "eval_loss": 0.8277555108070374, "eval_runtime": 0.6413, "eval_samples_per_second": 636.161, "eval_steps_per_second": 6.237, "step": 690 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 7.0, "eval_accuracy": 0.8137254901960784, "eval_combined_score": 0.8382706873363064, "eval_f1": 0.8628158844765343, "eval_loss": 0.9062389135360718, "eval_runtime": 0.5264, "eval_samples_per_second": 775.021, "eval_steps_per_second": 7.598, "step": 805 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 7.0, "eval_accuracy": 0.8455882352941176, "eval_combined_score": 0.8670419052576783, "eval_f1": 0.8884955752212389, "eval_loss": 0.696999728679657, "eval_runtime": 0.6178, "eval_samples_per_second": 660.427, "eval_steps_per_second": 6.475, "step": 805 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 8.0, "eval_accuracy": 0.8235294117647058, "eval_combined_score": 0.8499090357792601, "eval_f1": 0.8762886597938144, "eval_loss": 0.9299286603927612, "eval_runtime": 0.5047, "eval_samples_per_second": 808.406, "eval_steps_per_second": 7.926, "step": 920 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 8.0, "eval_accuracy": 0.8455882352941176, "eval_combined_score": 0.8700302985515815, "eval_f1": 0.8944723618090453, "eval_loss": 0.7925681471824646, "eval_runtime": 0.6718, "eval_samples_per_second": 607.307, "eval_steps_per_second": 5.954, "step": 920 }, { "compression_loss": 0.0, "epoch": 8.7, "learning_rate": 1.829214077705104e-05, "loss": 0.0889, "step": 1000 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 9.0, "eval_accuracy": 0.821078431372549, "eval_combined_score": 0.8487794864138549, "eval_f1": 0.8764805414551609, "eval_loss": 0.9827358722686768, "eval_runtime": 0.4726, "eval_samples_per_second": 863.398, "eval_steps_per_second": 8.465, "step": 1035 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 9.0, "eval_accuracy": 0.8529411764705882, "eval_combined_score": 0.8754501800720287, "eval_f1": 0.8979591836734693, "eval_loss": 0.7438695430755615, "eval_runtime": 0.6707, "eval_samples_per_second": 608.321, "eval_steps_per_second": 5.964, "step": 1035 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 10.0, "eval_accuracy": 0.8284313725490197, "eval_combined_score": 0.8546918767507002, "eval_f1": 0.8809523809523808, "eval_loss": 0.8921936750411987, "eval_runtime": 0.4539, "eval_samples_per_second": 898.928, "eval_steps_per_second": 8.813, "step": 1150 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 10.0, "eval_accuracy": 0.8602941176470589, "eval_combined_score": 0.8807536446120909, "eval_f1": 0.9012131715771231, "eval_loss": 0.6455641984939575, "eval_runtime": 0.6908, "eval_samples_per_second": 590.661, "eval_steps_per_second": 5.791, "step": 1150 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 11.0, "eval_accuracy": 0.821078431372549, "eval_combined_score": 0.8491946778711483, "eval_f1": 0.8773109243697478, "eval_loss": 0.9563024044036865, "eval_runtime": 0.5754, "eval_samples_per_second": 709.094, "eval_steps_per_second": 6.952, "step": 1265 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 11.0, "eval_accuracy": 0.8553921568627451, "eval_combined_score": 0.877611188787909, "eval_f1": 0.8998302207130731, "eval_loss": 0.6870017051696777, "eval_runtime": 0.6557, "eval_samples_per_second": 622.197, "eval_steps_per_second": 6.1, "step": 1265 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 12.0, "eval_accuracy": 0.8259803921568627, "eval_combined_score": 0.8525131943748538, "eval_f1": 0.8790459965928449, "eval_loss": 0.9068766832351685, "eval_runtime": 0.5352, "eval_samples_per_second": 762.351, "eval_steps_per_second": 7.474, "step": 1380 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 12.0, "eval_accuracy": 0.8676470588235294, "eval_combined_score": 0.8866207322089674, "eval_f1": 0.9055944055944055, "eval_loss": 0.5972063541412354, "eval_runtime": 0.6204, "eval_samples_per_second": 657.677, "eval_steps_per_second": 6.448, "step": 1380 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 13.0, "eval_accuracy": 0.8235294117647058, "eval_combined_score": 0.8515640369860318, "eval_f1": 0.8795986622073578, "eval_loss": 0.9531628489494324, "eval_runtime": 0.4389, "eval_samples_per_second": 929.512, "eval_steps_per_second": 9.113, "step": 1495 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 13.0, "eval_accuracy": 0.8651960784313726, "eval_combined_score": 0.8849377272572807, "eval_f1": 0.9046793760831888, "eval_loss": 0.641593337059021, "eval_runtime": 0.5799, "eval_samples_per_second": 703.627, "eval_steps_per_second": 6.898, "step": 1495 }, { "compression_loss": 0.0, "epoch": 13.04, "learning_rate": 8.410113675774417e-06, "loss": 0.0339, "step": 1500 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 14.0, "eval_accuracy": 0.8259803921568627, "eval_combined_score": 0.8523064353946708, "eval_f1": 0.8786324786324787, "eval_loss": 0.9255245327949524, "eval_runtime": 0.5277, "eval_samples_per_second": 773.205, "eval_steps_per_second": 7.58, "step": 1610 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 14.0, "eval_accuracy": 0.8578431372549019, "eval_combined_score": 0.8796018407362944, "eval_f1": 0.901360544217687, "eval_loss": 0.7534880042076111, "eval_runtime": 0.6535, "eval_samples_per_second": 624.357, "eval_steps_per_second": 6.121, "step": 1610 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 15.0, "eval_accuracy": 0.821078431372549, "eval_combined_score": 0.8479320115696364, "eval_f1": 0.8747855917667238, "eval_loss": 0.9056081771850586, "eval_runtime": 0.3636, "eval_samples_per_second": 1122.037, "eval_steps_per_second": 11.0, "step": 1725 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 15.0, "eval_accuracy": 0.8602941176470589, "eval_combined_score": 0.8802346244977852, "eval_f1": 0.9001751313485115, "eval_loss": 0.6561837196350098, "eval_runtime": 0.492, "eval_samples_per_second": 829.301, "eval_steps_per_second": 8.13, "step": 1725 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 16.0, "eval_accuracy": 0.8088235294117647, "eval_combined_score": 0.8396277115497361, "eval_f1": 0.8704318936877077, "eval_loss": 0.9581977725028992, "eval_runtime": 0.3672, "eval_samples_per_second": 1111.013, "eval_steps_per_second": 10.892, "step": 1840 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 16.0, "eval_accuracy": 0.8553921568627451, "eval_combined_score": 0.877440541804456, "eval_f1": 0.899488926746167, "eval_loss": 0.7179605960845947, "eval_runtime": 0.4744, "eval_samples_per_second": 860.007, "eval_steps_per_second": 8.431, "step": 1840 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 17.0, "eval_accuracy": 0.8308823529411765, "eval_combined_score": 0.856867322480775, "eval_f1": 0.8828522920203735, "eval_loss": 0.9101623892784119, "eval_runtime": 0.3973, "eval_samples_per_second": 1026.915, "eval_steps_per_second": 10.068, "step": 1955 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 17.0, "eval_accuracy": 0.8627450980392157, "eval_combined_score": 0.883262583383869, "eval_f1": 0.9037800687285222, "eval_loss": 0.6792478561401367, "eval_runtime": 0.4894, "eval_samples_per_second": 833.646, "eval_steps_per_second": 8.173, "step": 1955 }, { "compression_loss": 0.0, "epoch": 17.39, "learning_rate": 1.41888323488484e-06, "loss": 0.0195, "step": 2000 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 18.0, "eval_accuracy": 0.8284313725490197, "eval_combined_score": 0.8546918767507002, "eval_f1": 0.8809523809523808, "eval_loss": 0.8795027136802673, "eval_runtime": 0.4269, "eval_samples_per_second": 955.795, "eval_steps_per_second": 9.371, "step": 2070 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 18.0, "eval_accuracy": 0.8651960784313726, "eval_combined_score": 0.8851023570049782, "eval_f1": 0.9050086355785838, "eval_loss": 0.6670258641242981, "eval_runtime": 0.488, "eval_samples_per_second": 836.011, "eval_steps_per_second": 8.196, "step": 2070 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 19.0, "eval_accuracy": 0.8235294117647058, "eval_combined_score": 0.8509538950715421, "eval_f1": 0.8783783783783784, "eval_loss": 0.9087016582489014, "eval_runtime": 0.4014, "eval_samples_per_second": 1016.504, "eval_steps_per_second": 9.966, "step": 2185 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 19.0, "eval_accuracy": 0.8627450980392157, "eval_combined_score": 0.883262583383869, "eval_f1": 0.9037800687285222, "eval_loss": 0.6763139367103577, "eval_runtime": 0.5343, "eval_samples_per_second": 763.583, "eval_steps_per_second": 7.486, "step": 2185 }, { "Minimum SubNet": "OrderedDict([(, {0: 768, 1: 704, 2: 768, 3: 768, 4: 768, 5: 640, 6: 640, 7: 512, 8: 512, 9: 320, 10: 192, 11: 192, 12: 995, 13: 930, 14: 928, 15: 931, 16: 872, 17: 787, 18: 672, 19: 579, 20: 409, 21: 291, 22: 300, 23: 308})])", "epoch": 20.0, "eval_accuracy": 0.8235294117647058, "eval_combined_score": 0.8509538950715421, "eval_f1": 0.8783783783783784, "eval_loss": 0.9081816673278809, "eval_runtime": 0.4205, "eval_samples_per_second": 970.314, "eval_steps_per_second": 9.513, "step": 2300 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 20.0, "eval_accuracy": 0.8627450980392157, "eval_combined_score": 0.883262583383869, "eval_f1": 0.9037800687285222, "eval_loss": 0.6761664748191833, "eval_runtime": 0.4936, "eval_samples_per_second": 826.651, "eval_steps_per_second": 8.104, "step": 2300 }, { "epoch": 20.0, "step": 2300, "total_flos": 4825456755302400.0, "train_loss": 0.18312535887179168, "train_runtime": 4630.0259, "train_samples_per_second": 15.844, "train_steps_per_second": 0.497 } ], "max_steps": 2300, "min_subnet_acc": null, "min_subnet_best_acc": null, "num_train_epochs": 20, "supernet_acc": null, "supernet_best_acc": null, "total_flos": 4825456755302400.0, "trial_name": null, "trial_params": null }