|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"best_supernet_model_checkpoint": null, |
|
"epoch": 12.0, |
|
"global_step": 3216, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", |
|
"epoch": 1.0, |
|
"eval_loss": 0.7149681448936462, |
|
"eval_matthews_correlation": 0.3947290430655349, |
|
"eval_runtime": 1.1039, |
|
"eval_samples_per_second": 944.822, |
|
"eval_steps_per_second": 8.153, |
|
"step": 268 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 1.0, |
|
"eval_loss": 0.6399039030075073, |
|
"eval_matthews_correlation": 0.5222017375430389, |
|
"eval_runtime": 1.6156, |
|
"eval_samples_per_second": 645.592, |
|
"eval_steps_per_second": 5.571, |
|
"step": 268 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.87, |
|
"learning_rate": 1.884896652217917e-05, |
|
"loss": 0.8522, |
|
"step": 500 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", |
|
"epoch": 2.0, |
|
"eval_loss": 0.7286580801010132, |
|
"eval_matthews_correlation": 0.4630242094821006, |
|
"eval_runtime": 1.1443, |
|
"eval_samples_per_second": 911.446, |
|
"eval_steps_per_second": 7.865, |
|
"step": 536 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 2.0, |
|
"eval_loss": 0.6622146964073181, |
|
"eval_matthews_correlation": 0.5624034645452709, |
|
"eval_runtime": 1.4549, |
|
"eval_samples_per_second": 716.899, |
|
"eval_steps_per_second": 6.186, |
|
"step": 536 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", |
|
"epoch": 3.0, |
|
"eval_loss": 0.7320300340652466, |
|
"eval_matthews_correlation": 0.4774992729117021, |
|
"eval_runtime": 1.179, |
|
"eval_samples_per_second": 884.654, |
|
"eval_steps_per_second": 7.634, |
|
"step": 804 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 3.0, |
|
"eval_loss": 0.6782493591308594, |
|
"eval_matthews_correlation": 0.557289393094284, |
|
"eval_runtime": 1.4778, |
|
"eval_samples_per_second": 705.795, |
|
"eval_steps_per_second": 6.09, |
|
"step": 804 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.73, |
|
"learning_rate": 1.5644725411751336e-05, |
|
"loss": 0.3135, |
|
"step": 1000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", |
|
"epoch": 4.0, |
|
"eval_loss": 0.8995152711868286, |
|
"eval_matthews_correlation": 0.48304108757820724, |
|
"eval_runtime": 1.126, |
|
"eval_samples_per_second": 926.26, |
|
"eval_steps_per_second": 7.993, |
|
"step": 1072 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 4.0, |
|
"eval_loss": 0.7692049741744995, |
|
"eval_matthews_correlation": 0.554912808282685, |
|
"eval_runtime": 1.5919, |
|
"eval_samples_per_second": 655.204, |
|
"eval_steps_per_second": 5.654, |
|
"step": 1072 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", |
|
"epoch": 5.0, |
|
"eval_loss": 0.826237142086029, |
|
"eval_matthews_correlation": 0.5107428879748117, |
|
"eval_runtime": 1.1006, |
|
"eval_samples_per_second": 947.693, |
|
"eval_steps_per_second": 8.178, |
|
"step": 1340 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 5.0, |
|
"eval_loss": 0.6900777816772461, |
|
"eval_matthews_correlation": 0.5834463254140851, |
|
"eval_runtime": 1.7324, |
|
"eval_samples_per_second": 602.048, |
|
"eval_steps_per_second": 5.195, |
|
"step": 1340 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 5.6, |
|
"learning_rate": 1.1130738016122495e-05, |
|
"loss": 0.155, |
|
"step": 1500 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", |
|
"epoch": 6.0, |
|
"eval_loss": 0.8721849918365479, |
|
"eval_matthews_correlation": 0.5075813582663956, |
|
"eval_runtime": 0.8756, |
|
"eval_samples_per_second": 1191.142, |
|
"eval_steps_per_second": 10.278, |
|
"step": 1608 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 6.0, |
|
"eval_loss": 0.7215332388877869, |
|
"eval_matthews_correlation": 0.5924834238001306, |
|
"eval_runtime": 1.2201, |
|
"eval_samples_per_second": 854.87, |
|
"eval_steps_per_second": 7.377, |
|
"step": 1608 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", |
|
"epoch": 7.0, |
|
"eval_loss": 0.9455906748771667, |
|
"eval_matthews_correlation": 0.5053968650343078, |
|
"eval_runtime": 0.88, |
|
"eval_samples_per_second": 1185.181, |
|
"eval_steps_per_second": 10.227, |
|
"step": 1876 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 7.0, |
|
"eval_loss": 0.8112756013870239, |
|
"eval_matthews_correlation": 0.5764508680057442, |
|
"eval_runtime": 1.2263, |
|
"eval_samples_per_second": 850.555, |
|
"eval_steps_per_second": 7.339, |
|
"step": 1876 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 7.46, |
|
"learning_rate": 6.354385348824488e-06, |
|
"loss": 0.0957, |
|
"step": 2000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", |
|
"epoch": 8.0, |
|
"eval_loss": 0.9190700650215149, |
|
"eval_matthews_correlation": 0.5049093009936784, |
|
"eval_runtime": 0.8638, |
|
"eval_samples_per_second": 1207.492, |
|
"eval_steps_per_second": 10.419, |
|
"step": 2144 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 8.0, |
|
"eval_loss": 0.7810819149017334, |
|
"eval_matthews_correlation": 0.5885471185335819, |
|
"eval_runtime": 1.2436, |
|
"eval_samples_per_second": 838.718, |
|
"eval_steps_per_second": 7.237, |
|
"step": 2144 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", |
|
"epoch": 9.0, |
|
"eval_loss": 0.9647462964057922, |
|
"eval_matthews_correlation": 0.49938409054607086, |
|
"eval_runtime": 0.8845, |
|
"eval_samples_per_second": 1179.158, |
|
"eval_steps_per_second": 10.175, |
|
"step": 2412 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 9.0, |
|
"eval_loss": 0.8086517453193665, |
|
"eval_matthews_correlation": 0.5598395777855655, |
|
"eval_runtime": 1.2424, |
|
"eval_samples_per_second": 839.53, |
|
"eval_steps_per_second": 7.244, |
|
"step": 2412 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 9.33, |
|
"learning_rate": 2.4303047703271643e-06, |
|
"loss": 0.0729, |
|
"step": 2500 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", |
|
"epoch": 10.0, |
|
"eval_loss": 0.9290460348129272, |
|
"eval_matthews_correlation": 0.49895377962487897, |
|
"eval_runtime": 0.8983, |
|
"eval_samples_per_second": 1161.144, |
|
"eval_steps_per_second": 10.019, |
|
"step": 2680 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 10.0, |
|
"eval_loss": 0.8078813552856445, |
|
"eval_matthews_correlation": 0.575435670477595, |
|
"eval_runtime": 1.2176, |
|
"eval_samples_per_second": 856.621, |
|
"eval_steps_per_second": 7.392, |
|
"step": 2680 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", |
|
"epoch": 11.0, |
|
"eval_loss": 0.9496363997459412, |
|
"eval_matthews_correlation": 0.49816788996099576, |
|
"eval_runtime": 0.9041, |
|
"eval_samples_per_second": 1153.571, |
|
"eval_steps_per_second": 9.954, |
|
"step": 2948 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 11.0, |
|
"eval_loss": 0.8123825788497925, |
|
"eval_matthews_correlation": 0.5727969336224868, |
|
"eval_runtime": 1.2393, |
|
"eval_samples_per_second": 841.575, |
|
"eval_steps_per_second": 7.262, |
|
"step": 2948 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 11.19, |
|
"learning_rate": 2.5352335693478615e-07, |
|
"loss": 0.0626, |
|
"step": 3000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])", |
|
"epoch": 12.0, |
|
"eval_loss": 0.9496448040008545, |
|
"eval_matthews_correlation": 0.49816788996099576, |
|
"eval_runtime": 0.8999, |
|
"eval_samples_per_second": 1159.047, |
|
"eval_steps_per_second": 10.001, |
|
"step": 3216 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 12.0, |
|
"eval_loss": 0.8131102323532104, |
|
"eval_matthews_correlation": 0.5727969336224868, |
|
"eval_runtime": 1.2698, |
|
"eval_samples_per_second": 821.371, |
|
"eval_steps_per_second": 7.088, |
|
"step": 3216 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"step": 3216, |
|
"total_flos": 6749587903150080.0, |
|
"train_loss": 0.244898120265695, |
|
"train_runtime": 5953.5704, |
|
"train_samples_per_second": 17.235, |
|
"train_steps_per_second": 0.54 |
|
} |
|
], |
|
"max_steps": 3216, |
|
"min_subnet_acc": null, |
|
"min_subnet_best_acc": null, |
|
"num_train_epochs": 12, |
|
"supernet_acc": null, |
|
"supernet_best_acc": null, |
|
"total_flos": 6749587903150080.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|