{ "best_metric": null, "best_model_checkpoint": null, "best_supernet_model_checkpoint": null, "epoch": 12.0, "global_step": 3216, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "Minimum SubNet": "OrderedDict([(, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", "epoch": 1.0, "eval_loss": 0.7149681448936462, "eval_matthews_correlation": 0.3947290430655349, "eval_runtime": 1.1039, "eval_samples_per_second": 944.822, "eval_steps_per_second": 8.153, "step": 268 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 1.0, "eval_loss": 0.6399039030075073, "eval_matthews_correlation": 0.5222017375430389, "eval_runtime": 1.6156, "eval_samples_per_second": 645.592, "eval_steps_per_second": 5.571, "step": 268 }, { "compression_loss": 0.0, "epoch": 1.87, "learning_rate": 1.884896652217917e-05, "loss": 0.8522, "step": 500 }, { "Minimum SubNet": "OrderedDict([(, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", "epoch": 2.0, "eval_loss": 0.7286580801010132, "eval_matthews_correlation": 0.4630242094821006, "eval_runtime": 1.1443, "eval_samples_per_second": 911.446, "eval_steps_per_second": 7.865, "step": 536 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 2.0, "eval_loss": 0.6622146964073181, "eval_matthews_correlation": 0.5624034645452709, "eval_runtime": 1.4549, "eval_samples_per_second": 716.899, "eval_steps_per_second": 6.186, "step": 536 }, { "Minimum SubNet": "OrderedDict([(, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", "epoch": 3.0, "eval_loss": 0.7320300340652466, "eval_matthews_correlation": 0.4774992729117021, "eval_runtime": 1.179, "eval_samples_per_second": 884.654, "eval_steps_per_second": 7.634, "step": 804 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 3.0, "eval_loss": 0.6782493591308594, "eval_matthews_correlation": 0.557289393094284, "eval_runtime": 1.4778, "eval_samples_per_second": 705.795, "eval_steps_per_second": 6.09, "step": 804 }, { "compression_loss": 0.0, "epoch": 3.73, "learning_rate": 1.5644725411751336e-05, "loss": 0.3135, "step": 1000 }, { "Minimum SubNet": "OrderedDict([(, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", "epoch": 4.0, "eval_loss": 0.8995152711868286, "eval_matthews_correlation": 0.48304108757820724, "eval_runtime": 1.126, "eval_samples_per_second": 926.26, "eval_steps_per_second": 7.993, "step": 1072 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 4.0, "eval_loss": 0.7692049741744995, "eval_matthews_correlation": 0.554912808282685, "eval_runtime": 1.5919, "eval_samples_per_second": 655.204, "eval_steps_per_second": 5.654, "step": 1072 }, { "Minimum SubNet": "OrderedDict([(, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", "epoch": 5.0, "eval_loss": 0.826237142086029, "eval_matthews_correlation": 0.5107428879748117, "eval_runtime": 1.1006, "eval_samples_per_second": 947.693, "eval_steps_per_second": 8.178, "step": 1340 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 5.0, "eval_loss": 0.6900777816772461, "eval_matthews_correlation": 0.5834463254140851, "eval_runtime": 1.7324, "eval_samples_per_second": 602.048, "eval_steps_per_second": 5.195, "step": 1340 }, { "compression_loss": 0.0, "epoch": 5.6, "learning_rate": 1.1130738016122495e-05, "loss": 0.155, "step": 1500 }, { "Minimum SubNet": "OrderedDict([(, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", "epoch": 6.0, "eval_loss": 0.8721849918365479, "eval_matthews_correlation": 0.5075813582663956, "eval_runtime": 0.8756, "eval_samples_per_second": 1191.142, "eval_steps_per_second": 10.278, "step": 1608 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 6.0, "eval_loss": 0.7215332388877869, "eval_matthews_correlation": 0.5924834238001306, "eval_runtime": 1.2201, "eval_samples_per_second": 854.87, "eval_steps_per_second": 7.377, "step": 1608 }, { "Minimum SubNet": "OrderedDict([(, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", "epoch": 7.0, "eval_loss": 0.9455906748771667, "eval_matthews_correlation": 0.5053968650343078, "eval_runtime": 0.88, "eval_samples_per_second": 1185.181, "eval_steps_per_second": 10.227, "step": 1876 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 7.0, "eval_loss": 0.8112756013870239, "eval_matthews_correlation": 0.5764508680057442, "eval_runtime": 1.2263, "eval_samples_per_second": 850.555, "eval_steps_per_second": 7.339, "step": 1876 }, { "compression_loss": 0.0, "epoch": 7.46, "learning_rate": 6.354385348824488e-06, "loss": 0.0957, "step": 2000 }, { "Minimum SubNet": "OrderedDict([(, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", "epoch": 8.0, "eval_loss": 0.9190700650215149, "eval_matthews_correlation": 0.5049093009936784, "eval_runtime": 0.8638, "eval_samples_per_second": 1207.492, "eval_steps_per_second": 10.419, "step": 2144 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 8.0, "eval_loss": 0.7810819149017334, "eval_matthews_correlation": 0.5885471185335819, "eval_runtime": 1.2436, "eval_samples_per_second": 838.718, "eval_steps_per_second": 7.237, "step": 2144 }, { "Minimum SubNet": "OrderedDict([(, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", "epoch": 9.0, "eval_loss": 0.9647462964057922, "eval_matthews_correlation": 0.49938409054607086, "eval_runtime": 0.8845, "eval_samples_per_second": 1179.158, "eval_steps_per_second": 10.175, "step": 2412 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 9.0, "eval_loss": 0.8086517453193665, "eval_matthews_correlation": 0.5598395777855655, "eval_runtime": 1.2424, "eval_samples_per_second": 839.53, "eval_steps_per_second": 7.244, "step": 2412 }, { "compression_loss": 0.0, "epoch": 9.33, "learning_rate": 2.4303047703271643e-06, "loss": 0.0729, "step": 2500 }, { "Minimum SubNet": "OrderedDict([(, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", "epoch": 10.0, "eval_loss": 0.9290460348129272, "eval_matthews_correlation": 0.49895377962487897, "eval_runtime": 0.8983, "eval_samples_per_second": 1161.144, "eval_steps_per_second": 10.019, "step": 2680 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 10.0, "eval_loss": 0.8078813552856445, "eval_matthews_correlation": 0.575435670477595, "eval_runtime": 1.2176, "eval_samples_per_second": 856.621, "eval_steps_per_second": 7.392, "step": 2680 }, { "Minimum SubNet": "OrderedDict([(, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", "epoch": 11.0, "eval_loss": 0.9496363997459412, "eval_matthews_correlation": 0.49816788996099576, "eval_runtime": 0.9041, "eval_samples_per_second": 1153.571, "eval_steps_per_second": 9.954, "step": 2948 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 11.0, "eval_loss": 0.8123825788497925, "eval_matthews_correlation": 0.5727969336224868, "eval_runtime": 1.2393, "eval_samples_per_second": 841.575, "eval_steps_per_second": 7.262, "step": 2948 }, { "compression_loss": 0.0, "epoch": 11.19, "learning_rate": 2.5352335693478615e-07, "loss": 0.0626, "step": 3000 }, { "Minimum SubNet": "OrderedDict([(, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", "epoch": 12.0, "eval_loss": 0.9496448040008545, "eval_matthews_correlation": 0.49816788996099576, "eval_runtime": 0.8999, "eval_samples_per_second": 1159.047, "eval_steps_per_second": 10.001, "step": 3216 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 12.0, "eval_loss": 0.8131102323532104, "eval_matthews_correlation": 0.5727969336224868, "eval_runtime": 1.2698, "eval_samples_per_second": 821.371, "eval_steps_per_second": 7.088, "step": 3216 }, { "epoch": 12.0, "step": 3216, "total_flos": 6749587903150080.0, "train_loss": 0.244898120265695, "train_runtime": 5953.5704, "train_samples_per_second": 17.235, "train_steps_per_second": 0.54 } ], "max_steps": 3216, "min_subnet_acc": null, "min_subnet_best_acc": null, "num_train_epochs": 12, "supernet_acc": null, "supernet_best_acc": null, "total_flos": 6749587903150080.0, "trial_name": null, "trial_params": null }