{ "best_metric": null, "best_model_checkpoint": null, "best_supernet_model_checkpoint": null, "epoch": 16.0, "global_step": 1248, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 1.0, "eval_accuracy": 0.5126353790613718, "eval_loss": 0.6907398700714111, "eval_runtime": 0.3187, "eval_samples_per_second": 869.169, "eval_steps_per_second": 9.413, "step": 78 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 1.0, "eval_accuracy": 0.631768953068592, "eval_loss": 0.6698081493377686, "eval_runtime": 0.3205, "eval_samples_per_second": 864.196, "eval_steps_per_second": 9.36, "step": 78 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 2.0, "eval_accuracy": 0.5884476534296029, "eval_loss": 0.7006412744522095, "eval_runtime": 0.313, "eval_samples_per_second": 884.931, "eval_steps_per_second": 9.584, "step": 156 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 2.0, "eval_accuracy": 0.6714801444043321, "eval_loss": 0.6682543158531189, "eval_runtime": 0.3443, "eval_samples_per_second": 804.534, "eval_steps_per_second": 8.713, "step": 156 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 3.0, "eval_accuracy": 0.5956678700361011, "eval_loss": 0.7282483577728271, "eval_runtime": 0.271, "eval_samples_per_second": 1022.293, "eval_steps_per_second": 11.072, "step": 234 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 3.0, "eval_accuracy": 0.7003610108303249, "eval_loss": 0.7188943028450012, "eval_runtime": 0.3007, "eval_samples_per_second": 921.259, "eval_steps_per_second": 9.978, "step": 234 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 4.0, "eval_accuracy": 0.6534296028880866, "eval_loss": 0.7194212675094604, "eval_runtime": 0.3156, "eval_samples_per_second": 877.769, "eval_steps_per_second": 9.507, "step": 312 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 4.0, "eval_accuracy": 0.7003610108303249, "eval_loss": 0.7804971933364868, "eval_runtime": 0.3328, "eval_samples_per_second": 832.291, "eval_steps_per_second": 9.014, "step": 312 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 5.0, "eval_accuracy": 0.6353790613718412, "eval_loss": 0.8791369199752808, "eval_runtime": 0.3055, "eval_samples_per_second": 906.57, "eval_steps_per_second": 9.818, "step": 390 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 5.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.8327999711036682, "eval_runtime": 0.3457, "eval_samples_per_second": 801.306, "eval_steps_per_second": 8.678, "step": 390 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 6.0, "eval_accuracy": 0.6173285198555957, "eval_loss": 1.0036966800689697, "eval_runtime": 0.366, "eval_samples_per_second": 756.88, "eval_steps_per_second": 8.197, "step": 468 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 6.0, "eval_accuracy": 0.6895306859205776, "eval_loss": 0.8701478242874146, "eval_runtime": 0.4386, "eval_samples_per_second": 631.518, "eval_steps_per_second": 6.84, "step": 468 }, { "compression_loss": 0.0, "epoch": 6.41, "learning_rate": 1.3285813580973207e-05, "loss": 0.5371, "step": 500 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 7.0, "eval_accuracy": 0.6245487364620939, "eval_loss": 0.9121482372283936, "eval_runtime": 0.3365, "eval_samples_per_second": 823.293, "eval_steps_per_second": 8.917, "step": 546 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 7.0, "eval_accuracy": 0.6859205776173285, "eval_loss": 0.8219799995422363, "eval_runtime": 0.458, "eval_samples_per_second": 604.827, "eval_steps_per_second": 6.55, "step": 546 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 8.0, "eval_accuracy": 0.6245487364620939, "eval_loss": 1.009196162223816, "eval_runtime": 0.323, "eval_samples_per_second": 857.674, "eval_steps_per_second": 9.289, "step": 624 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 8.0, "eval_accuracy": 0.6823104693140795, "eval_loss": 0.8340579867362976, "eval_runtime": 0.4417, "eval_samples_per_second": 627.172, "eval_steps_per_second": 6.792, "step": 624 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 9.0, "eval_accuracy": 0.6425992779783394, "eval_loss": 0.9687350392341614, "eval_runtime": 0.2576, "eval_samples_per_second": 1075.361, "eval_steps_per_second": 11.647, "step": 702 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 9.0, "eval_accuracy": 0.6642599277978339, "eval_loss": 0.8537887334823608, "eval_runtime": 0.4396, "eval_samples_per_second": 630.096, "eval_steps_per_second": 6.824, "step": 702 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 10.0, "eval_accuracy": 0.6353790613718412, "eval_loss": 1.0111019611358643, "eval_runtime": 0.3165, "eval_samples_per_second": 875.142, "eval_steps_per_second": 9.478, "step": 780 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 10.0, "eval_accuracy": 0.6967509025270758, "eval_loss": 0.8117498159408569, "eval_runtime": 0.4348, "eval_samples_per_second": 637.033, "eval_steps_per_second": 6.899, "step": 780 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 11.0, "eval_accuracy": 0.6498194945848376, "eval_loss": 0.9616022706031799, "eval_runtime": 0.3223, "eval_samples_per_second": 859.427, "eval_steps_per_second": 9.308, "step": 858 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 11.0, "eval_accuracy": 0.6895306859205776, "eval_loss": 0.8113434910774231, "eval_runtime": 0.4354, "eval_samples_per_second": 636.179, "eval_steps_per_second": 6.89, "step": 858 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 12.0, "eval_accuracy": 0.6462093862815884, "eval_loss": 0.9934073090553284, "eval_runtime": 0.3202, "eval_samples_per_second": 865.131, "eval_steps_per_second": 9.37, "step": 936 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 12.0, "eval_accuracy": 0.6895306859205776, "eval_loss": 0.8179090023040771, "eval_runtime": 0.4603, "eval_samples_per_second": 601.726, "eval_steps_per_second": 6.517, "step": 936 }, { "compression_loss": 0.0, "epoch": 12.82, "learning_rate": 2.1281651939094996e-06, "loss": 0.1174, "step": 1000 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 13.0, "eval_accuracy": 0.631768953068592, "eval_loss": 1.009687900543213, "eval_runtime": 0.3338, "eval_samples_per_second": 829.86, "eval_steps_per_second": 8.988, "step": 1014 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 13.0, "eval_accuracy": 0.7003610108303249, "eval_loss": 0.8190819025039673, "eval_runtime": 0.4638, "eval_samples_per_second": 597.268, "eval_steps_per_second": 6.469, "step": 1014 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 14.0, "eval_accuracy": 0.6462093862815884, "eval_loss": 1.0018943548202515, "eval_runtime": 0.2723, "eval_samples_per_second": 1017.137, "eval_steps_per_second": 11.016, "step": 1092 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 14.0, "eval_accuracy": 0.7003610108303249, "eval_loss": 0.8157313466072083, "eval_runtime": 0.4417, "eval_samples_per_second": 627.137, "eval_steps_per_second": 6.792, "step": 1092 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 15.0, "eval_accuracy": 0.631768953068592, "eval_loss": 1.012693166732788, "eval_runtime": 0.3213, "eval_samples_per_second": 862.201, "eval_steps_per_second": 9.338, "step": 1170 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 15.0, "eval_accuracy": 0.6895306859205776, "eval_loss": 0.8178156614303589, "eval_runtime": 0.4461, "eval_samples_per_second": 620.871, "eval_steps_per_second": 6.724, "step": 1170 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 768, 4: 704, 5: 704, 6: 768, 7: 576, 8: 704, 9: 704, 10: 512, 11: 640, 12: 608, 13: 571, 14: 589, 15: 542, 16: 576, 17: 589, 18: 568, 19: 537, 20: 562, 21: 453, 22: 376, 23: 147})])", "epoch": 16.0, "eval_accuracy": 0.6462093862815884, "eval_loss": 1.009476661682129, "eval_runtime": 0.3271, "eval_samples_per_second": 846.951, "eval_steps_per_second": 9.173, "step": 1248 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 16.0, "eval_accuracy": 0.6895306859205776, "eval_loss": 0.8178343772888184, "eval_runtime": 0.4589, "eval_samples_per_second": 603.661, "eval_steps_per_second": 6.538, "step": 1248 }, { "epoch": 16.0, "step": 1248, "total_flos": 2620586111385600.0, "train_loss": 0.27760597070058185, "train_runtime": 2463.0354, "train_samples_per_second": 16.175, "train_steps_per_second": 0.507 } ], "max_steps": 1248, "min_subnet_acc": null, "min_subnet_best_acc": null, "num_train_epochs": 16, "supernet_acc": null, "supernet_best_acc": null, "total_flos": 2620586111385600.0, "trial_name": null, "trial_params": null }