realfake / realfake /bin /create_metadata.py
devforfu
Init
ea847ad
"""
Creates a meta-data file by combining the information from directory structure.
"""
import json
from pathlib import Path
from pydantic import Field
from realfake.config import IMAGE_FORMATS
from realfake.utils import inject_args, Args
class CreateMetadataArgs(Args):
root_dir: Path
datasets: str = Field(..., help="Comma-separated list of datasets to include in the meta-data file")
jsonl_file: Path = Field(..., help="Path to the output JSONL file")
@inject_args
def main(args: CreateMetadataArgs) -> None:
datasets = args.datasets.split(",")
records = []
for dataset in datasets:
label = "real" if dataset.startswith("real") else "fake"
dirpath = args.root_dir/dataset
assert dirpath.exists(), f"dataset dir does not exist: {dirpath}"
records.extend((parse_imagenet if "imagenet" in dataset else parse_flat)(dirpath, label))
with open(args.jsonl_file, "w") as f:
for record in records:
f.write(json.dumps(record) + "\n")
def parse_imagenet(dirpath: Path, label: str) -> list:
records = []
for classdir in dirpath.iterdir():
assert classdir.is_dir(), f"class directory is not a directory: {classdir}"
for fn in classdir.iterdir():
if fn.suffix.lower() in IMAGE_FORMATS:
records.append({"path": str(fn), "label": label, "class": classdir.name})
else:
print("Not an image file:", fn)
return records
def parse_flat(dirpath: Path, label: str) -> list:
records = []
for fn in dirpath.iterdir():
if fn.suffix.lower() in IMAGE_FORMATS:
records.append({"path": str(fn), "label": label, "class": None})
else:
print("Not an image file:", fn)
return records
if __name__ == "__main__":
main()