""" Creates a meta-data file by combining the information from directory structure. """ import json from pathlib import Path from pydantic import Field from realfake.config import IMAGE_FORMATS from realfake.utils import inject_args, Args class CreateMetadataArgs(Args): root_dir: Path datasets: str = Field(..., help="Comma-separated list of datasets to include in the meta-data file") jsonl_file: Path = Field(..., help="Path to the output JSONL file") @inject_args def main(args: CreateMetadataArgs) -> None: datasets = args.datasets.split(",") records = [] for dataset in datasets: label = "real" if dataset.startswith("real") else "fake" dirpath = args.root_dir/dataset assert dirpath.exists(), f"dataset dir does not exist: {dirpath}" records.extend((parse_imagenet if "imagenet" in dataset else parse_flat)(dirpath, label)) with open(args.jsonl_file, "w") as f: for record in records: f.write(json.dumps(record) + "\n") def parse_imagenet(dirpath: Path, label: str) -> list: records = [] for classdir in dirpath.iterdir(): assert classdir.is_dir(), f"class directory is not a directory: {classdir}" for fn in classdir.iterdir(): if fn.suffix.lower() in IMAGE_FORMATS: records.append({"path": str(fn), "label": label, "class": classdir.name}) else: print("Not an image file:", fn) return records def parse_flat(dirpath: Path, label: str) -> list: records = [] for fn in dirpath.iterdir(): if fn.suffix.lower() in IMAGE_FORMATS: records.append({"path": str(fn), "label": label, "class": None}) else: print("Not an image file:", fn) return records if __name__ == "__main__": main()