|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import argparse |
|
import glob |
|
import json |
|
import logging |
|
import os |
|
|
|
from dataset import consep_nuclei_dataset |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
def main(): |
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format="[%(asctime)s] [%(process)s] [%(threadName)s] [%(levelname)s] (%(name)s:%(lineno)d) - %(message)s", |
|
datefmt="%Y-%m-%d %H:%M:%S", |
|
force=True, |
|
) |
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
"--input", |
|
"-i", |
|
type=str, |
|
default=r"/workspace/data/CoNSeP", |
|
help="Input/Downloaded/Extracted dir for CoNSeP Dataset", |
|
) |
|
parser.add_argument( |
|
"--output", |
|
"-o", |
|
type=str, |
|
default=r"/workspace/data/CoNSePNuclei", |
|
help="Output dir to store pre-processed data", |
|
) |
|
parser.add_argument("--crop_size", "-s", type=int, default=128, help="Crop size for each Nuclei") |
|
parser.add_argument("--limit", "-n", type=int, default=0, help="Non-zero value to limit processing max records") |
|
|
|
args = parser.parse_args() |
|
dataset_json = {} |
|
for f, v in {"Train": "training", "Test": "validation"}.items(): |
|
logger.info("---------------------------------------------------------------------------------") |
|
if not os.path.exists(os.path.join(args.input, f)): |
|
logger.warning(f"Ignore {f} (NOT Exists in Input Folder)") |
|
continue |
|
|
|
logger.info(f"Processing Images/labels for: {f}") |
|
images_path = os.path.join(args.input, f, "Images", "*.png") |
|
labels_path = os.path.join(args.input, f, "Labels", "*.mat") |
|
images = sorted(glob.glob(images_path)) |
|
labels = sorted(glob.glob(labels_path)) |
|
ds = [{"image": i, "label": l} for i, l in zip(images, labels)] |
|
|
|
output_dir = os.path.join(args.output, f) if args.output else f |
|
crop_size = args.crop_size |
|
limit = args.limit |
|
|
|
ds_new = consep_nuclei_dataset(ds, output_dir, crop_size, limit=limit) |
|
logger.info(f"Total Generated/Extended Records: {len(ds)} => {len(ds_new)}") |
|
|
|
dataset_json[v] = ds_new |
|
|
|
ds_file = os.path.join(args.output, "dataset.json") |
|
with open(ds_file, "w") as fp: |
|
json.dump(dataset_json, fp, indent=2) |
|
logger.info(f"Dataset JSON Generated at: {ds_file}") |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|