|
""" |
|
Creates a meta-data file by combining the information from directory structure. |
|
""" |
|
import json |
|
from pathlib import Path |
|
|
|
from pydantic import Field |
|
|
|
from realfake.config import IMAGE_FORMATS |
|
from realfake.utils import inject_args, Args |
|
|
|
|
|
class CreateMetadataArgs(Args): |
|
root_dir: Path |
|
datasets: str = Field(..., help="Comma-separated list of datasets to include in the meta-data file") |
|
jsonl_file: Path = Field(..., help="Path to the output JSONL file") |
|
|
|
|
|
@inject_args |
|
def main(args: CreateMetadataArgs) -> None: |
|
datasets = args.datasets.split(",") |
|
records = [] |
|
for dataset in datasets: |
|
label = "real" if dataset.startswith("real") else "fake" |
|
dirpath = args.root_dir/dataset |
|
assert dirpath.exists(), f"dataset dir does not exist: {dirpath}" |
|
records.extend((parse_imagenet if "imagenet" in dataset else parse_flat)(dirpath, label)) |
|
with open(args.jsonl_file, "w") as f: |
|
for record in records: |
|
f.write(json.dumps(record) + "\n") |
|
|
|
|
|
def parse_imagenet(dirpath: Path, label: str) -> list: |
|
records = [] |
|
for classdir in dirpath.iterdir(): |
|
assert classdir.is_dir(), f"class directory is not a directory: {classdir}" |
|
for fn in classdir.iterdir(): |
|
if fn.suffix.lower() in IMAGE_FORMATS: |
|
records.append({"path": str(fn), "label": label, "class": classdir.name}) |
|
else: |
|
print("Not an image file:", fn) |
|
return records |
|
|
|
|
|
def parse_flat(dirpath: Path, label: str) -> list: |
|
records = [] |
|
for fn in dirpath.iterdir(): |
|
if fn.suffix.lower() in IMAGE_FORMATS: |
|
records.append({"path": str(fn), "label": label, "class": None}) |
|
else: |
|
print("Not an image file:", fn) |
|
return records |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|