{ "best_metric": null, "best_model_checkpoint": null, "best_supernet_model_checkpoint": null, "epoch": 12.0, "global_step": 3216, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "Minimum SubNet": "OrderedDict([(, {0: 320, 1: 192, 2: 192, 3: 320, 4: 448, 5: 320, 6: 256, 7: 320, 8: 904, 9: 878, 10: 885, 11: 910, 12: 944, 13: 943, 14: 896, 15: 524})])", "epoch": 1.0, "eval_loss": 0.7594008445739746, "eval_matthews_correlation": 0.22693312638521376, "eval_runtime": 0.6384, "eval_samples_per_second": 1633.689, "eval_steps_per_second": 14.097, "step": 268 }, { "SuperNet": "OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", "epoch": 1.0, "eval_loss": 0.7153037786483765, "eval_matthews_correlation": 0.33478791021206633, "eval_runtime": 0.7177, "eval_samples_per_second": 1453.27, "eval_steps_per_second": 12.54, "step": 268 }, { "compression_loss": 0.0, "epoch": 1.87, "learning_rate": 1.884896652217917e-05, "loss": 1.1021, "step": 500 }, { "Minimum SubNet": "OrderedDict([(, {0: 320, 1: 192, 2: 192, 3: 320, 4: 448, 5: 320, 6: 256, 7: 320, 8: 904, 9: 878, 10: 885, 11: 910, 12: 944, 13: 943, 14: 896, 15: 524})])", "epoch": 2.0, "eval_loss": 0.6954232454299927, "eval_matthews_correlation": 0.4354344418941058, "eval_runtime": 0.7494, "eval_samples_per_second": 1391.709, "eval_steps_per_second": 12.009, "step": 536 }, { "SuperNet": "OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", "epoch": 2.0, "eval_loss": 0.6847538352012634, "eval_matthews_correlation": 0.4723897975346641, "eval_runtime": 0.8051, "eval_samples_per_second": 1295.486, "eval_steps_per_second": 11.179, "step": 536 }, { "Minimum SubNet": "OrderedDict([(, {0: 320, 1: 192, 2: 192, 3: 320, 4: 448, 5: 320, 6: 256, 7: 320, 8: 904, 9: 878, 10: 885, 11: 910, 12: 944, 13: 943, 14: 896, 15: 524})])", "epoch": 3.0, "eval_loss": 0.7816486954689026, "eval_matthews_correlation": 0.4156507632951837, "eval_runtime": 0.6601, "eval_samples_per_second": 1580.016, "eval_steps_per_second": 13.634, "step": 804 }, { "SuperNet": "OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", "epoch": 3.0, "eval_loss": 0.7744055986404419, "eval_matthews_correlation": 0.43824800688521975, "eval_runtime": 0.74, "eval_samples_per_second": 1409.462, "eval_steps_per_second": 12.162, "step": 804 }, { "compression_loss": 0.0, "epoch": 3.73, "learning_rate": 1.5644725411751336e-05, "loss": 0.6376, "step": 1000 }, { "Minimum SubNet": "OrderedDict([(, {0: 320, 1: 192, 2: 192, 3: 320, 4: 448, 5: 320, 6: 256, 7: 320, 8: 904, 9: 878, 10: 885, 11: 910, 12: 944, 13: 943, 14: 896, 15: 524})])", "epoch": 4.0, "eval_loss": 0.8254319429397583, "eval_matthews_correlation": 0.42409562062729317, "eval_runtime": 0.6033, "eval_samples_per_second": 1728.723, "eval_steps_per_second": 14.917, "step": 1072 }, { "SuperNet": "OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", "epoch": 4.0, "eval_loss": 0.8103329539299011, "eval_matthews_correlation": 0.45596899298288374, "eval_runtime": 0.6386, "eval_samples_per_second": 1633.35, "eval_steps_per_second": 14.094, "step": 1072 }, { "Minimum SubNet": "OrderedDict([(, {0: 320, 1: 192, 2: 192, 3: 320, 4: 448, 5: 320, 6: 256, 7: 320, 8: 904, 9: 878, 10: 885, 11: 910, 12: 944, 13: 943, 14: 896, 15: 524})])", "epoch": 5.0, "eval_loss": 0.8831449747085571, "eval_matthews_correlation": 0.4133635813145441, "eval_runtime": 0.6198, "eval_samples_per_second": 1682.88, "eval_steps_per_second": 14.521, "step": 1340 }, { "SuperNet": "OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", "epoch": 5.0, "eval_loss": 0.8358224034309387, "eval_matthews_correlation": 0.4637527852072172, "eval_runtime": 0.63, "eval_samples_per_second": 1655.659, "eval_steps_per_second": 14.287, "step": 1340 }, { "compression_loss": 0.0, "epoch": 5.6, "learning_rate": 1.1130738016122495e-05, "loss": 0.3925, "step": 1500 }, { "Minimum SubNet": "OrderedDict([(, {0: 320, 1: 192, 2: 192, 3: 320, 4: 448, 5: 320, 6: 256, 7: 320, 8: 904, 9: 878, 10: 885, 11: 910, 12: 944, 13: 943, 14: 896, 15: 524})])", "epoch": 6.0, "eval_loss": 0.8798311948776245, "eval_matthews_correlation": 0.43765045412808823, "eval_runtime": 0.6072, "eval_samples_per_second": 1717.727, "eval_steps_per_second": 14.822, "step": 1608 }, { "SuperNet": "OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", "epoch": 6.0, "eval_loss": 0.87115079164505, "eval_matthews_correlation": 0.4572856727029901, "eval_runtime": 0.6525, "eval_samples_per_second": 1598.36, "eval_steps_per_second": 13.792, "step": 1608 }, { "Minimum SubNet": "OrderedDict([(, {0: 320, 1: 192, 2: 192, 3: 320, 4: 448, 5: 320, 6: 256, 7: 320, 8: 904, 9: 878, 10: 885, 11: 910, 12: 944, 13: 943, 14: 896, 15: 524})])", "epoch": 7.0, "eval_loss": 0.9016041159629822, "eval_matthews_correlation": 0.43794163748101905, "eval_runtime": 0.5925, "eval_samples_per_second": 1760.294, "eval_steps_per_second": 15.19, "step": 1876 }, { "SuperNet": "OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", "epoch": 7.0, "eval_loss": 0.9116769433021545, "eval_matthews_correlation": 0.4523199346278705, "eval_runtime": 0.622, "eval_samples_per_second": 1676.901, "eval_steps_per_second": 14.47, "step": 1876 }, { "compression_loss": 0.0, "epoch": 7.46, "learning_rate": 6.354385348824488e-06, "loss": 0.2697, "step": 2000 }, { "Minimum SubNet": "OrderedDict([(, {0: 320, 1: 192, 2: 192, 3: 320, 4: 448, 5: 320, 6: 256, 7: 320, 8: 904, 9: 878, 10: 885, 11: 910, 12: 944, 13: 943, 14: 896, 15: 524})])", "epoch": 8.0, "eval_loss": 0.9721390008926392, "eval_matthews_correlation": 0.43577816963420296, "eval_runtime": 0.6725, "eval_samples_per_second": 1550.837, "eval_steps_per_second": 13.382, "step": 2144 }, { "SuperNet": "OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", "epoch": 8.0, "eval_loss": 1.0106229782104492, "eval_matthews_correlation": 0.43907744261562187, "eval_runtime": 0.7223, "eval_samples_per_second": 1443.926, "eval_steps_per_second": 12.46, "step": 2144 }, { "Minimum SubNet": "OrderedDict([(, {0: 320, 1: 192, 2: 192, 3: 320, 4: 448, 5: 320, 6: 256, 7: 320, 8: 904, 9: 878, 10: 885, 11: 910, 12: 944, 13: 943, 14: 896, 15: 524})])", "epoch": 9.0, "eval_loss": 0.9457906484603882, "eval_matthews_correlation": 0.430812413056651, "eval_runtime": 0.7253, "eval_samples_per_second": 1438.048, "eval_steps_per_second": 12.409, "step": 2412 }, { "SuperNet": "OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", "epoch": 9.0, "eval_loss": 0.969419538974762, "eval_matthews_correlation": 0.43758782623801895, "eval_runtime": 0.706, "eval_samples_per_second": 1477.441, "eval_steps_per_second": 12.749, "step": 2412 }, { "compression_loss": 0.0, "epoch": 9.33, "learning_rate": 2.4239250674893345e-06, "loss": 0.2133, "step": 2500 }, { "Minimum SubNet": "OrderedDict([(, {0: 320, 1: 192, 2: 192, 3: 320, 4: 448, 5: 320, 6: 256, 7: 320, 8: 904, 9: 878, 10: 885, 11: 910, 12: 944, 13: 943, 14: 896, 15: 524})])", "epoch": 10.0, "eval_loss": 0.9448842406272888, "eval_matthews_correlation": 0.44105796467098995, "eval_runtime": 0.611, "eval_samples_per_second": 1706.975, "eval_steps_per_second": 14.729, "step": 2680 }, { "SuperNet": "OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", "epoch": 10.0, "eval_loss": 0.9690240025520325, "eval_matthews_correlation": 0.4315719153344676, "eval_runtime": 0.617, "eval_samples_per_second": 1690.365, "eval_steps_per_second": 14.586, "step": 2680 }, { "Minimum SubNet": "OrderedDict([(, {0: 320, 1: 192, 2: 192, 3: 320, 4: 448, 5: 320, 6: 256, 7: 320, 8: 904, 9: 878, 10: 885, 11: 910, 12: 944, 13: 943, 14: 896, 15: 524})])", "epoch": 11.0, "eval_loss": 0.9587270021438599, "eval_matthews_correlation": 0.4401936795964168, "eval_runtime": 0.6843, "eval_samples_per_second": 1524.176, "eval_steps_per_second": 13.152, "step": 2948 }, { "SuperNet": "OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", "epoch": 11.0, "eval_loss": 0.9697301983833313, "eval_matthews_correlation": 0.4243567637161194, "eval_runtime": 0.6973, "eval_samples_per_second": 1495.775, "eval_steps_per_second": 12.907, "step": 2948 }, { "compression_loss": 0.0, "epoch": 11.19, "learning_rate": 2.5352335693478615e-07, "loss": 0.1818, "step": 3000 }, { "Minimum SubNet": "OrderedDict([(, {0: 320, 1: 192, 2: 192, 3: 320, 4: 448, 5: 320, 6: 256, 7: 320, 8: 904, 9: 878, 10: 885, 11: 910, 12: 944, 13: 943, 14: 896, 15: 524})])", "epoch": 12.0, "eval_loss": 0.9606204628944397, "eval_matthews_correlation": 0.4401936795964168, "eval_runtime": 0.7028, "eval_samples_per_second": 1484.092, "eval_steps_per_second": 12.806, "step": 3216 }, { "SuperNet": "OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})])", "epoch": 12.0, "eval_loss": 0.969758152961731, "eval_matthews_correlation": 0.4243567637161194, "eval_runtime": 0.7371, "eval_samples_per_second": 1415.06, "eval_steps_per_second": 12.21, "step": 3216 }, { "epoch": 12.0, "step": 3216, "total_flos": 2008275016808448.0, "train_loss": 0.4476036005352267, "train_runtime": 3049.2312, "train_samples_per_second": 33.652, "train_steps_per_second": 1.055 } ], "max_steps": 3216, "min_subnet_acc": null, "min_subnet_best_acc": null, "num_train_epochs": 12, "supernet_acc": null, "supernet_best_acc": null, "total_flos": 2008275016808448.0, "trial_name": null, "trial_params": null }