Spaces:

haakohu
/

deep_privacy2_face

Runtime error

App Files Files Community

haakohu commited on Mar 23, 2023

Commit

5d756f1

•

1 Parent(s): 24ca44a

initial

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +12 -0
.gitignore +51 -0
README.md +4 -4
app.py +31 -0
configs/anonymizers/FB_cse.py +28 -0
configs/anonymizers/FB_cse_mask.py +29 -0
configs/anonymizers/FB_cse_mask_face.py +29 -0
configs/anonymizers/deep_privacy1.py +15 -0
configs/anonymizers/face.py +17 -0
configs/anonymizers/face_fdf128.py +18 -0
configs/anonymizers/market1501/blackout.py +8 -0
configs/anonymizers/market1501/person.py +6 -0
configs/anonymizers/market1501/pixelation16.py +8 -0
configs/anonymizers/market1501/pixelation8.py +8 -0
configs/datasets/coco_cse.py +69 -0
configs/datasets/fdf128.py +24 -0
configs/datasets/fdf256.py +55 -0
configs/datasets/fdh.py +90 -0
configs/datasets/utils.py +21 -0
configs/defaults.py +53 -0
configs/discriminators/sg2_discriminator.py +43 -0
configs/fdf/deep_privacy1.py +9 -0
configs/fdf/stylegan.py +14 -0
configs/fdf/stylegan_fdf128.py +17 -0
configs/fdh/styleganL.py +16 -0
configs/fdh/styleganL_nocse.py +14 -0
configs/generators/stylegan_unet.py +22 -0
dp2/__init__.py +0 -0
dp2/anonymizer/__init__.py +1 -0
dp2/anonymizer/anonymizer.py +163 -0
dp2/anonymizer/histogram_match_anonymizers.py +93 -0
dp2/data/__init__.py +0 -0
dp2/data/build.py +40 -0
dp2/data/datasets/__init__.py +0 -0
dp2/data/datasets/coco_cse.py +68 -0
dp2/data/datasets/fdf.py +128 -0
dp2/data/datasets/fdf128_wds.py +96 -0
dp2/data/datasets/fdh.py +142 -0
dp2/data/transforms/__init__.py +2 -0
dp2/data/transforms/functional.py +57 -0
dp2/data/transforms/stylegan2_transform.py +394 -0
dp2/data/transforms/transforms.py +277 -0
dp2/data/utils.py +122 -0
dp2/detection/__init__.py +3 -0
dp2/detection/base.py +42 -0
dp2/detection/box_utils.py +104 -0
dp2/detection/box_utils_fdf.py +202 -0
dp2/detection/cse_mask_face_detector.py +116 -0
dp2/detection/deep_privacy1_detector.py +106 -0
dp2/detection/face_detector.py +62 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+torch_home/hub/checkpoints/21841da7-2546-4ce3-8460-909b3a63c58b13aac1a1-c778-4c8d-9b69-3e5ed2cde9de1524e76e-7aa6-4dd8-b643-52abc9f0792c filter=lfs diff=lfs merge=lfs -text
+torch_home/hub/checkpoints/Base-DensePose-RCNN-FPN-Human.yaml filter=lfs diff=lfs merge=lfs -text
+torch_home/hub/checkpoints/Base-DensePose-RCNN-FPN.yaml filter=lfs diff=lfs merge=lfs -text
+torch_home/hub/checkpoints/densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml filter=lfs diff=lfs merge=lfs -text
+torch_home/hub/checkpoints/model_final_1d3314.pkl filter=lfs diff=lfs merge=lfs -text
+torch_home/hub/checkpoints/89660f04-5c11-4dbf-adac-cbe2f11b0aeea25cbf78-7558-475a-b3c7-03f5c10b7934646b0720-ca0a-4d53-aded-daddbfa45c9e filter=lfs diff=lfs merge=lfs -text
+torch_home/hub/checkpoints/WIDERFace_DSFD_RES152.pth filter=lfs diff=lfs merge=lfs -text
+media2/stylemc_example.jpg filter=lfs diff=lfs merge=lfs -text
+media2/erling.jpg filter=lfs diff=lfs merge=lfs -text
+media2/g7_leaders.jpg filter=lfs diff=lfs merge=lfs -text
+media2/regjeringen.jpg filter=lfs diff=lfs merge=lfs -text
+media/ filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,51 @@

+# FILES
+*.flist
+*.zip
+*.out
+*.npy
+*.gz
+*.ckpt
+*.log
+*.pyc
+*.csv
+*.yml
+*.ods
+*.ods#
+*.json
+build_docker.sh
+# Images / Videos
+#*.png
+#*.jpg
+*.jpeg
+*.m4a
+*.mkv
+*.mp4
+# Directories created by inpaintron
+.cache/
+test_examples/
+.vscode
+__pycache__
+.debug/
+**/.ipynb_checkpoints/**
+outputs/
+# From pip setup
+build/
+*.egg-info
+*.egg
+.npm/
+# From dockerfile
+.bash_history
+.viminfo
+.local/
+*.pickle
+*.onnx
+sbatch_files/
+figures/
+image_dump/

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: Deep Privacy2 Face
-emoji: 👀
-colorFrom: purple
 colorTo: indigo
 sdk: gradio
-sdk_version: 3.23.0
 app_file: app.py
 pinned: false
 ---

 ---
+title: Deep Privacy2
+emoji: 📈
+colorFrom: gray
 colorTo: indigo
 sdk: gradio
+sdk_version: 3.9.1
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import gradio
+import os
+from tops.config import instantiate
+import gradio.inputs
+os.system("pip install --upgrade pip")
+os.system("pip install ftfy regex tqdm")
+os.system("pip install --no-deps git+https://github.com/openai/CLIP.git")
+os.system("pip install git+https://github.com/facebookresearch/detectron2@96c752ce821a3340e27edd51c28a00665dd32a30#subdirectory=projects/DensePose")
+os.system("pip install --no-deps git+https://github.com/hukkelas/DSFD-Pytorch-Inference")
+os.environ["TORCH_HOME"] = "torch_home"
+from dp2 import utils
+from gradio_demos.modules import ExampleDemo, WebcamDemo
+cfg_face = utils.load_config("configs/anonymizers/face.py")
+anonymizer_face = instantiate(cfg_face.anonymizer, load_cache=False)
+anonymizer_face.initialize_tracker(fps=1)
+with gradio.Blocks() as demo:
+    gradio.Markdown("# <center> DeepPrivacy2 - Realistic Image Anonymization </center>")
+    gradio.Markdown("### <center> Håkon Hukkelås, Rudolf Mester, Frank Lindseth </center>")
+    gradio.Markdown("<center> See more information at: <a href='https://github.com/hukkelas/deep_privacy2'> https://github.com/hukkelas/deep_privacy2 </a> </center>")
+    gradio.Markdown("<center> For a demo of face anonymization, see: <a href='https://huggingface.co/spaces/haakohu/deep_privacy2_face'> https://huggingface.co/spaces/haakohu/deep_privacy2_face </a> </center>")
+    with gradio.Tab("Face Anonymization"):
+        ExampleDemo(anonymizer_face)
+    with gradio.Tab("Live Webcam"):
+        WebcamDemo(anonymizer_face)
+demo.launch()

configs/anonymizers/FB_cse.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from dp2.anonymizer import Anonymizer
+from dp2.detection.person_detector import CSEPersonDetector
+from ..defaults import common
+from tops.config import LazyCall as L
+from dp2.generator.dummy_generators import MaskOutGenerator
+maskout_G = L(MaskOutGenerator)(noise="constant")
+detector = L(CSEPersonDetector)(
+    mask_rcnn_cfg=dict(),
+    cse_cfg=dict(),
+    cse_post_process_cfg=dict(
+        target_imsize=(288, 160),
+        exp_bbox_cfg=dict(percentage_background=0.3, axis_minimum_expansion=.1),
+        exp_bbox_filter=dict(minimum_area=32*32, min_bbox_ratio_inside=0, aspect_ratio_range=[0, 99999]),
+        iou_combine_threshold=0.4,
+        dilation_percentage=0.02,
+        normalize_embedding=False
+    ),
+    score_threshold=0.3,
+    cache_directory=common.output_dir.joinpath("cse_person_detection_cache")
+)
+anonymizer = L(Anonymizer)(
+    detector="${detector}",
+    cse_person_G_cfg="configs/fdh/styleganL.py",
+)

configs/anonymizers/FB_cse_mask.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from dp2.anonymizer import Anonymizer
+from dp2.detection.person_detector import CSEPersonDetector
+from ..defaults import common
+from tops.config import LazyCall as L
+from dp2.generator.dummy_generators import MaskOutGenerator
+maskout_G = L(MaskOutGenerator)(noise="constant")
+detector = L(CSEPersonDetector)(
+    mask_rcnn_cfg=dict(),
+    cse_cfg=dict(),
+    cse_post_process_cfg=dict(
+        target_imsize=(288, 160),
+        exp_bbox_cfg=dict(percentage_background=0.3, axis_minimum_expansion=.1),
+        exp_bbox_filter=dict(minimum_area=32*32, min_bbox_ratio_inside=0, aspect_ratio_range=[0, 99999]),
+        iou_combine_threshold=0.4,
+        dilation_percentage=0.02,
+        normalize_embedding=False
+    ),
+    score_threshold=0.3,
+    cache_directory=common.output_dir.joinpath("cse_person_detection_cache")
+)
+anonymizer = L(Anonymizer)(
+    detector="${detector}",
+    person_G_cfg="configs/fdh/styleganL_nocse.py",
+    cse_person_G_cfg="configs/fdh/styleganL.py",
+)

configs/anonymizers/FB_cse_mask_face.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from dp2.anonymizer import Anonymizer
+from dp2.detection.cse_mask_face_detector import CSeMaskFaceDetector
+from ..defaults import common
+from tops.config import LazyCall as L
+detector = L(CSeMaskFaceDetector)(
+    mask_rcnn_cfg=dict(),
+    face_detector_cfg=dict(),
+    face_post_process_cfg=dict(target_imsize=(256, 256), fdf128_expand=False),
+    cse_cfg=dict(),
+    cse_post_process_cfg=dict(
+        target_imsize=(288, 160),
+        exp_bbox_cfg=dict(percentage_background=0.3, axis_minimum_expansion=.1),
+        exp_bbox_filter=dict(minimum_area=32*32, min_bbox_ratio_inside=0, aspect_ratio_range=[0, 99999]),
+        iou_combine_threshold=0.4,
+        dilation_percentage=0.02,
+        normalize_embedding=False
+    ),
+    score_threshold=0.3,
+    cache_directory=common.output_dir.joinpath("cse_mask_face_detection_cache")
+)
+anonymizer = L(Anonymizer)(
+    detector="${detector}",
+    face_G_cfg="configs/fdf/stylegan.py",
+    person_G_cfg="configs/fdh/styleganL_nocse.py",
+    cse_person_G_cfg="configs/fdh/styleganL.py",
+    car_G_cfg="configs/generators/dummy/pixelation8.py"
+)

configs/anonymizers/deep_privacy1.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from .face_fdf128 import anonymizer, common, detector
+from dp2.detection.deep_privacy1_detector import DeepPrivacy1Detector
+from tops.config import LazyCall as L
+anonymizer.update(
+    face_G_cfg="configs/fdf/deep_privacy1.py",
+)
+anonymizer.detector = L(DeepPrivacy1Detector)(
+    face_detector_cfg=dict(name="DSFDDetector", clip_boxes=True),
+    face_post_process_cfg=dict(target_imsize=(128, 128), fdf128_expand=True),
+    score_threshold=0.3,
+    keypoint_threshold=0.3,
+    cache_directory=common.output_dir.joinpath("deep_privacy1_cache")
+)

configs/anonymizers/face.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from dp2.anonymizer import Anonymizer
+from dp2.detection.face_detector import FaceDetector
+from ..defaults import common
+from tops.config import LazyCall as L
+detector = L(FaceDetector)(
+    face_detector_cfg=dict(name="DSFDDetector", clip_boxes=True),
+    face_post_process_cfg=dict(target_imsize=(256, 256), fdf128_expand=False),
+    score_threshold=0.3,
+    cache_directory=common.output_dir.joinpath("face_detection_cache"),
+)
+anonymizer = L(Anonymizer)(
+    detector="${detector}",
+    face_G_cfg="configs/fdf/stylegan.py",
+)

configs/anonymizers/face_fdf128.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from dp2.anonymizer import Anonymizer
+from dp2.detection.face_detector import FaceDetector
+from ..defaults import common
+from tops.config import LazyCall as L
+detector = L(FaceDetector)(
+    face_detector_cfg=dict(name="DSFDDetector", clip_boxes=True),
+    face_post_process_cfg=dict(target_imsize=(128, 128), fdf128_expand=True),
+    score_threshold=0.3,
+    cache_directory=common.output_dir.joinpath("face_detection_cache")
+)
+anonymizer = L(Anonymizer)(
+    detector="${detector}",
+    face_G_cfg="configs/fdf/stylegan_fdf128.py",
+)

configs/anonymizers/market1501/blackout.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from ..FB_cse_mask_face import anonymizer, detector, common
+detector.score_threshold = .1
+detector.face_detector_cfg.confidence_threshold = .5
+detector.cse_cfg.score_thres = 0.3
+anonymizer.generators.face_G_cfg = None
+anonymizer.generators.person_G_cfg = "configs/generators/dummy/maskout.py"
+anonymizer.generators.cse_person_G_cfg = "configs/generators/dummy/maskout.py"

configs/anonymizers/market1501/person.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from ..FB_cse_mask_face import anonymizer, detector, common
+detector.score_threshold = .1
+detector.face_detector_cfg.confidence_threshold = .5
+detector.cse_cfg.score_thres = 0.3
+anonymizer.generators.face_G_cfg = None

configs/anonymizers/market1501/pixelation16.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from ..FB_cse_mask_face import anonymizer, detector, common
+detector.score_threshold = .1
+detector.face_detector_cfg.confidence_threshold = .5
+detector.cse_cfg.score_thres = 0.3
+anonymizer.generators.face_G_cfg = None
+anonymizer.generators.person_G_cfg = "configs/generators/dummy/pixelation16.py"
+anonymizer.generators.cse_person_G_cfg = "configs/generators/dummy/pixelation16.py"

configs/anonymizers/market1501/pixelation8.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from ..FB_cse_mask_face import anonymizer, detector, common
+detector.score_threshold = .1
+detector.face_detector_cfg.confidence_threshold = .5
+detector.cse_cfg.score_thres = 0.3
+anonymizer.generators.face_G_cfg = None
+anonymizer.generators.person_G_cfg = "configs/generators/dummy/pixelation8.py"
+anonymizer.generators.cse_person_G_cfg = "configs/generators/dummy/pixelation8.py"

configs/datasets/coco_cse.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+from pathlib import Path
+from tops.config import LazyCall as L
+import torch
+import functools
+from dp2.data.datasets.coco_cse import CocoCSE
+from dp2.data.build import get_dataloader
+from dp2.data.transforms.transforms import CreateEmbedding, Normalize, Resize, ToFloat, CreateCondition, RandomHorizontalFlip
+from dp2.data.transforms.stylegan2_transform import StyleGANAugmentPipe
+from dp2.metrics.torch_metrics import compute_metrics_iteratively
+from .utils import final_eval_fn
+dataset_base_dir = os.environ["BASE_DATASET_DIR"] if "BASE_DATASET_DIR" in os.environ else "data"
+metrics_cache = os.environ["FBA_METRICS_CACHE"] if "FBA_METRICS_CACHE" in os.environ else ".cache"
+data_dir = Path(dataset_base_dir, "coco_cse")
+data = dict(
+    imsize=(288, 160),
+    im_channels=3,
+    semantic_nc=26,
+    cse_nc=16,
+    train=dict(
+        dataset=L(CocoCSE)(data_dir.joinpath("train"), transform=None, normalize_E=False),
+        loader=L(get_dataloader)(
+            shuffle=True, num_workers=6, drop_last=True, prefetch_factor=2,
+            batch_size="${train.batch_size}",
+            dataset="${..dataset}",
+            infinite=True,
+            gpu_transform=L(torch.nn.Sequential)(*[
+                L(ToFloat)(),
+                L(StyleGANAugmentPipe)(
+                    rotate=0.5, rotate_max=.05,
+                    xint=.5, xint_max=0.05,
+                    scale=.5, scale_std=.05,
+                    aniso=0.5, aniso_std=.05,
+                    xfrac=.5, xfrac_std=.05,
+                    brightness=.5, brightness_std=.05,
+                    contrast=.5, contrast_std=.1,
+                    hue=.5, hue_max=.05,
+                    saturation=.5, saturation_std=.5,
+                    imgfilter=.5, imgfilter_std=.1),
+                L(RandomHorizontalFlip)(p=0.5),
+                L(CreateEmbedding)(),
+                L(Resize)(size="${data.imsize}"),
+                L(Normalize)(mean=[.5, .5, .5], std=[.5, .5, .5], inplace=True),
+                L(CreateCondition)(),
+            ])
+        )
+    ),
+    val=dict(
+        dataset=L(CocoCSE)(data_dir.joinpath("val"), transform=None, normalize_E=False),
+        loader=L(get_dataloader)(
+            shuffle=False, num_workers=6, drop_last=True, prefetch_factor=2,
+            batch_size="${train.batch_size}",
+            dataset="${..dataset}",
+            infinite=False,
+            gpu_transform=L(torch.nn.Sequential)(*[
+                L(ToFloat)(),
+                L(CreateEmbedding)(),
+                L(Resize)(size="${data.imsize}"),
+                L(Normalize)(mean=[.5, .5, .5], std=[.5, .5, .5], inplace=True),
+                L(CreateCondition)(),
+            ])
+        )
+    ),
+    # Training evaluation might do optimizations to reduce compute overhead. E.g. compute with AMP.
+    train_evaluation_fn=functools.partial(compute_metrics_iteratively, cache_directory=Path(metrics_cache, "coco_cse_val"), include_two_fake=False),
+    evaluation_fn=functools.partial(final_eval_fn, cache_directory=Path(metrics_cache, "coco_cse_val_final"), include_two_fake=True)
+)

configs/datasets/fdf128.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from pathlib import Path
+from functools import partial
+from dp2.data.datasets.fdf import FDFDataset
+from .fdf256 import data, dataset_base_dir, metrics_cache, final_eval_fn, train_eval_fn
+data_dir = Path(dataset_base_dir, "fdf")
+data.train.dataset.dirpath = data_dir.joinpath("train")
+data.val.dataset.dirpath = data_dir.joinpath("val")
+data.imsize = (128, 128)
+data.train_evaluation_fn = partial(
+    train_eval_fn, cache_directory=Path(metrics_cache, "fdf128_val_train"))
+data.evaluation_fn = partial(
+    final_eval_fn, cache_directory=Path(metrics_cache, "fdf128_val_final"))
+data.train.dataset.update(
+    _target_ = FDFDataset,
+    imsize="${data.imsize}"
+)
+data.val.dataset.update(
+    _target_ = FDFDataset,
+    imsize="${data.imsize}"
+)

configs/datasets/fdf256.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+from pathlib import Path
+from tops.config import LazyCall as L
+import torch
+import functools
+from dp2.data.datasets.fdf import FDF256Dataset
+from dp2.data.build import get_dataloader
+from dp2.data.transforms.transforms import Normalize, Resize, ToFloat, CreateCondition, RandomHorizontalFlip
+from .utils import final_eval_fn, train_eval_fn
+dataset_base_dir = os.environ["BASE_DATASET_DIR"] if "BASE_DATASET_DIR" in os.environ else "data"
+metrics_cache = os.environ["FBA_METRICS_CACHE"] if "FBA_METRICS_CACHE" in os.environ else ".cache"
+data_dir = Path(dataset_base_dir, "fdf256")
+data = dict(
+    imsize=(256, 256),
+    im_channels=3,
+    semantic_nc=None,
+    cse_nc=None,
+    n_keypoints=None,
+    train=dict(
+        dataset=L(FDF256Dataset)(dirpath=data_dir.joinpath("train"), transform=None, load_keypoints=False),
+        loader=L(get_dataloader)(
+            shuffle=True, num_workers=3, drop_last=True, prefetch_factor=2,
+            batch_size="${train.batch_size}",
+            dataset="${..dataset}",
+            infinite=True,
+            gpu_transform=L(torch.nn.Sequential)(*[
+                L(ToFloat)(),
+                L(RandomHorizontalFlip)(p=0.5),
+                L(Resize)(size="${data.imsize}"),
+                L(Normalize)(mean=[.5, .5, .5], std=[.5, .5, .5], inplace=True),
+                L(CreateCondition)(),
+            ])
+        )
+    ),
+    val=dict(
+        dataset=L(FDF256Dataset)(dirpath=data_dir.joinpath("val"), transform=None, load_keypoints=False),
+        loader=L(get_dataloader)(
+            shuffle=False, num_workers=3, drop_last=False, prefetch_factor=2,
+            batch_size="${train.batch_size}",
+            dataset="${..dataset}",
+            infinite=False,
+            gpu_transform=L(torch.nn.Sequential)(*[
+                L(ToFloat)(),
+                L(Resize)(size="${data.imsize}"),
+                L(Normalize)(mean=[.5, .5, .5], std=[.5, .5, .5], inplace=True),
+                L(CreateCondition)(),
+            ])
+        )
+    ),
+    # Training evaluation might do optimizations to reduce compute overhead. E.g. compute with AMP.
+    train_evaluation_fn=functools.partial(train_eval_fn, cache_directory=Path(metrics_cache, "fdf_val_train")),
+    evaluation_fn=functools.partial(final_eval_fn, cache_directory=Path(metrics_cache, "fdf_val"))
+)

configs/datasets/fdh.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import os
+from pathlib import Path
+from tops.config import LazyCall as L
+import torch
+import functools
+from dp2.data.datasets.fdh import get_dataloader_fdh_wds
+from dp2.data.utils import get_coco_flipmap
+from dp2.data.transforms.transforms import (
+    Normalize,
+    ToFloat,
+    CreateCondition,
+    RandomHorizontalFlip,
+    CreateEmbedding,
+)
+from dp2.metrics.torch_metrics import compute_metrics_iteratively
+from dp2.metrics.fid_clip import compute_fid_clip
+from dp2.metrics.ppl import calculate_ppl
+from .utils import train_eval_fn
+def final_eval_fn(*args, **kwargs):
+    result = compute_metrics_iteratively(*args, **kwargs)
+    result2 = calculate_ppl(*args, **kwargs, upsample_size=(288, 160))
+    result3 = compute_fid_clip(*args, **kwargs)
+    assert all(key not in result for key in result2)
+    result.update(result2)
+    result.update(result3)
+    return result
+def get_cache_directory(imsize, subset):
+    return Path(metrics_cache, f"{subset}{imsize[0]}")
+dataset_base_dir = (
+    os.environ["BASE_DATASET_DIR"] if "BASE_DATASET_DIR" in os.environ else "data"
+)
+metrics_cache = (
+    os.environ["FBA_METRICS_CACHE"] if "FBA_METRICS_CACHE" in os.environ else ".cache"
+)
+data_dir = Path(dataset_base_dir, "fdh")
+data = dict(
+    imsize=(288, 160),
+    im_channels=3,
+    cse_nc=16,
+    n_keypoints=17,
+    train=dict(
+        loader=L(get_dataloader_fdh_wds)(
+            path=data_dir.joinpath("train", "out-{000000..001423}.tar"),
+            batch_size="${train.batch_size}",
+            num_workers=6,
+            transform=L(torch.nn.Sequential)(
+                L(RandomHorizontalFlip)(p=0.5, flip_map=get_coco_flipmap()),
+            ),
+            gpu_transform=L(torch.nn.Sequential)(
+                L(ToFloat)(norm=False, keys=["img", "mask", "E_mask", "maskrcnn_mask"]),
+                L(CreateEmbedding)(embed_path=data_dir.joinpath("embed_map.torch")),
+                L(Normalize)(mean=[0.5*255, 0.5*255, 0.5*255], std=[0.5*255, 0.5*255, 0.5*255], inplace=True),
+                L(CreateCondition)(),
+            ),
+            infinite=True,
+            shuffle=True,
+            partial_batches=False,
+            load_embedding=True,
+            keypoints_split="train",
+            load_new_keypoints=False
+        )
+    ),
+    val=dict(
+        loader=L(get_dataloader_fdh_wds)(
+            path=data_dir.joinpath("val", "out-{000000..000023}.tar"),
+            batch_size="${train.batch_size}",
+            num_workers=6,
+            transform=None,
+            gpu_transform="${data.train.loader.gpu_transform}",
+            infinite=False,
+            shuffle=False,
+            partial_batches=True,
+            load_embedding=True,
+            keypoints_split="val",
+            load_new_keypoints="${data.train.loader.load_new_keypoints}"
+        )
+    ),
+    # Training evaluation might do optimizations to reduce compute overhead. E.g. compute with AMP.
+    train_evaluation_fn=L(functools.partial)(
+        train_eval_fn, cache_directory=L(get_cache_directory)(imsize="${data.imsize}", subset="fdh"),
+        data_len=30_000),
+    evaluation_fn=L(functools.partial)(
+        final_eval_fn, cache_directory=L(get_cache_directory)(imsize="${data.imsize}", subset="fdh_eval"),
+        data_len=30_000)
+)

configs/datasets/utils.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from dp2.metrics.ppl import calculate_ppl
+from dp2.metrics.torch_metrics import compute_metrics_iteratively
+from dp2.metrics.fid_clip import compute_fid_clip
+def final_eval_fn(*args, **kwargs):
+    result = compute_metrics_iteratively(*args, **kwargs)
+    result2 = calculate_ppl(*args, **kwargs,)
+    result3 = compute_fid_clip(*args, **kwargs)
+    assert all(key not in result for key in result2)
+    result.update(result2)
+    result.update(result3)
+    return result
+def train_eval_fn(*args, **kwargs):
+    result = compute_metrics_iteratively(*args, **kwargs)
+    result2 = compute_fid_clip(*args, **kwargs)
+    assert all(key not in result for key in result2)
+    result.update(result2)
+    return result

configs/defaults.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import pathlib
+import os
+import torch
+from tops.config import LazyCall as L
+if "PRETRAINED_CHECKPOINTS_PATH" in os.environ:
+    PRETRAINED_CHECKPOINTS_PATH = pathlib.Path(os.environ["PRETRAINED_CHECKPOINTS_PATH"])
+else:
+    PRETRAINED_CHECKPOINTS_PATH = pathlib.Path("pretrained_checkpoints")
+if "BASE_OUTPUT_DIR" in os.environ:
+    BASE_OUTPUT_DIR = pathlib.Path(os.environ["BASE_OUTPUT_DIR"])
+else:
+    BASE_OUTPUT_DIR = pathlib.Path("outputs")
+common = dict(
+    logger_backend=["wandb", "stdout", "json", "image_dumper"],
+    wandb_project="deep_privacy2",
+    output_dir=BASE_OUTPUT_DIR,
+    experiment_name=None, # Optional experiment name to show on wandb
+)
+train = dict(
+    batch_size=32,
+    seed=0,
+    ims_per_log=1024,
+    ims_per_val=int(200e3),
+    max_images_to_train=int(12e6),
+    amp=dict(
+        enabled=True,
+        scaler_D=L(torch.cuda.amp.GradScaler)(init_scale=2**16, growth_factor=4, growth_interval=100, enabled="${..enabled}"),
+        scaler_G=L(torch.cuda.amp.GradScaler)(init_scale=2**16, growth_factor=4, growth_interval=100, enabled="${..enabled}"),
+    ),
+    fp16_ddp_accumulate=False, # All gather gradients in fp16?
+    broadcast_buffers=False,
+    bias_act_plugin_enabled=True,
+    grid_sample_gradfix_enabled=True,
+    conv2d_gradfix_enabled=False,
+    channels_last=False,
+    compile_G=dict(
+        enabled=False,
+        mode="default" # default, reduce-overhead or max-autotune
+    ),
+    compile_D=dict(
+        enabled=False,
+        mode="default" # default, reduce-overhead or max-autotune
+    )
+)
+# exponential moving average
+EMA = dict(rampup=0.05)

configs/discriminators/sg2_discriminator.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from tops.config import LazyCall as L
+from dp2.discriminator import SG2Discriminator
+import torch
+from dp2.loss import StyleGAN2Loss
+discriminator = L(SG2Discriminator)(
+    imsize="${data.imsize}",
+    im_channels="${data.im_channels}",
+    min_fmap_resolution=4,
+    max_cnum_mul=8,
+    cnum=80,
+    input_condition=True,
+    conv_clamp=256,
+    input_cse=False,
+    cse_nc="${data.cse_nc}",
+    fix_residual=False,
+)
+loss_fnc = L(StyleGAN2Loss)(
+    lazy_regularization=True,
+    lazy_reg_interval=16,
+    r1_opts=dict(lambd=5, mask_out=False, mask_out_scale=False),
+    EP_lambd=0.001,
+    pl_reg_opts=dict(weight=0, batch_shrink=2,start_nimg=int(1e6), pl_decay=0.01)
+)
+def build_D_optim(type, lr, betas, lazy_regularization, lazy_reg_interval, **kwargs):
+    if lazy_regularization:
+        # From Analyzing and improving the image quality of stylegan, CVPR 2020
+        c = lazy_reg_interval / (lazy_reg_interval + 1)
+        betas = [beta ** c for beta in betas]
+        lr *= c
+        print(f"Lazy regularization on. Setting lr to: {lr}, betas to: {betas}")
+    return type(lr=lr, betas=betas, **kwargs)
+D_optim = L(build_D_optim)(
+    type=torch.optim.Adam, lr=0.001, betas=(0.0, 0.99),
+    lazy_regularization="${loss_fnc.lazy_regularization}",
+    lazy_reg_interval="${loss_fnc.lazy_reg_interval}")
+G_optim = L(torch.optim.Adam)(lr=0.001, betas=(0.0, 0.99))

configs/fdf/deep_privacy1.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from tops.config import LazyCall as L
+from dp2.generator.deep_privacy1 import MSGGenerator
+from ..datasets.fdf128 import data
+from ..defaults import common, train
+generator = L(MSGGenerator)()
+common.model_url = "https://folk.ntnu.no/haakohu/checkpoints/fdf128_model512.ckpt"
+common.model_md5sum = "6cc8b285bdc1fcdfc64f5db7c521d0a6"

configs/fdf/stylegan.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from ..generators.stylegan_unet import generator
+from ..datasets.fdf256 import data
+from ..discriminators.sg2_discriminator import discriminator, G_optim, D_optim, loss_fnc
+from ..defaults import train, common, EMA
+train.max_images_to_train = int(35e6)
+G_optim.lr = 0.002
+D_optim.lr = 0.002
+generator.input_cse = False
+loss_fnc.r1_opts.lambd = 1
+train.ims_per_val = int(2e6)
+common.model_url = "https://api.loke.aws.unit.no/dlr-gui-backend-resources-content/v2/contents/links/89660f04-5c11-4dbf-adac-cbe2f11b0aeea25cbf78-7558-475a-b3c7-03f5c10b7934646b0720-ca0a-4d53-aded-daddbfa45c9e"
+common.model_md5sum = "e8e32190528af2ed75f0cb792b7f2b07"

configs/fdf/stylegan_fdf128.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from ..discriminators.sg2_discriminator import discriminator, G_optim, D_optim, loss_fnc
+from ..datasets.fdf128 import data
+from ..generators.stylegan_unet import generator
+from ..defaults import train, common, EMA
+from tops.config import LazyCall as L
+G_optim.lr = 0.002
+D_optim.lr = 0.002
+generator.update(cnum=128, max_cnum_mul=4, input_cse=False)
+loss_fnc.r1_opts.lambd = 0.1
+train.update(ims_per_val=int(2e6), batch_size=64, max_images_to_train=int(35e6))
+common.update(
+    model_url="https://api.loke.aws.unit.no/dlr-gui-backend-resources-content/v2/contents/links/66d803c0-55ce-44c0-9d53-815c2c0e6ba4eb458409-9e91-45d1-bce0-95c8a47a57218b102fdf-bea3-44dc-aac4-0fb1d370ef1c",
+    model_md5sum="bccd4403e7c9bca682566ff3319e8176"
+)

configs/fdh/styleganL.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from tops.config import LazyCall as L
+from ..generators.stylegan_unet import generator
+from ..datasets.fdh import data
+from ..discriminators.sg2_discriminator import discriminator, G_optim, D_optim, loss_fnc
+from ..defaults import train, common, EMA
+train.max_images_to_train = int(50e6)
+train.batch_size = 64
+G_optim.lr = 0.002
+D_optim.lr = 0.002
+data.train.loader.num_workers = 4
+train.ims_per_val = int(1e6)
+loss_fnc.r1_opts.lambd = .1
+common.model_url = "https://api.loke.aws.unit.no/dlr-gui-backend-resources-content/v2/contents/links/21841da7-2546-4ce3-8460-909b3a63c58b13aac1a1-c778-4c8d-9b69-3e5ed2cde9de1524e76e-7aa6-4dd8-b643-52abc9f0792c"
+common.model_md5sum = "3411478b5ec600a4219cccf4499732bd"

configs/fdh/styleganL_nocse.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from tops.config import LazyCall as L
+from ..generators.stylegan_unet import generator
+from ..datasets.fdh import data
+from ..discriminators.sg2_discriminator import discriminator, G_optim, D_optim, loss_fnc
+from ..defaults import train, common, EMA
+train.max_images_to_train = int(50e6)
+G_optim.lr = 0.002
+D_optim.lr = 0.002
+generator.input_cse = False
+data.load_embeddings = False
+common.model_url = "https://folk.ntnu.no/haakohu/checkpoints/deep_privacy2/fdh_styleganL_nocse.ckpt"
+common.model_md5sum = "fda0d809741bc67487abada793975c37"
+generator.fix_errors = False

configs/generators/stylegan_unet.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from dp2.generator.stylegan_unet import StyleGANUnet
+from tops.config import LazyCall as L
+generator = L(StyleGANUnet)(
+    imsize="${data.imsize}",
+    im_channels="${data.im_channels}",
+    min_fmap_resolution=8,
+    cnum=64,
+    max_cnum_mul=8,
+    n_middle_blocks=0,
+    z_channels=512,
+    mask_output=True,
+    conv_clamp=256,
+    input_cse=True,
+    scale_grad=True,
+    cse_nc="${data.cse_nc}",
+    w_dim=512,
+    n_keypoints="${data.n_keypoints}",
+    input_keypoints=False,
+    input_keypoint_indices=[],
+    fix_errors=True
+)

dp2/__init__.py ADDED Viewed

File without changes

dp2/anonymizer/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .anonymizer import Anonymizer

dp2/anonymizer/anonymizer.py ADDED Viewed

	@@ -0,0 +1,163 @@

+from pathlib import Path
+from typing import Union, Optional
+import numpy as np
+import torch
+import tops
+import torchvision.transforms.functional as F
+from motpy import Detection, MultiObjectTracker
+from dp2.utils import load_config
+from dp2.infer import build_trained_generator
+from dp2.detection.structures import CSEPersonDetection, FaceDetection, PersonDetection, VehicleDetection
+def load_generator_from_cfg_path(cfg_path: Union[str, Path]):
+    cfg = load_config(cfg_path)
+    G = build_trained_generator(cfg)
+    tops.logger.log(f"Loaded generator from: {cfg_path}")
+    return G
+class Anonymizer:
+    def __init__(
+            self,
+            detector,
+            load_cache: bool = False,
+            person_G_cfg: Optional[Union[str, Path]] = None,
+            cse_person_G_cfg: Optional[Union[str, Path]] = None,
+            face_G_cfg: Optional[Union[str, Path]] = None,
+            car_G_cfg: Optional[Union[str, Path]] = None,
+    ) -> None:
+        self.detector = detector
+        self.generators = {k: None for k in [CSEPersonDetection, PersonDetection, FaceDetection, VehicleDetection]}
+        self.load_cache = load_cache
+        if cse_person_G_cfg is not None:
+            self.generators[CSEPersonDetection] = load_generator_from_cfg_path(cse_person_G_cfg)
+        if person_G_cfg is not None:
+            self.generators[PersonDetection] = load_generator_from_cfg_path(person_G_cfg)
+        if face_G_cfg is not None:
+            self.generators[FaceDetection] = load_generator_from_cfg_path(face_G_cfg)
+        if car_G_cfg is not None:
+            self.generators[VehicleDetection] = load_generator_from_cfg_path(car_G_cfg)
+    def initialize_tracker(self, fps: float):
+        self.tracker = MultiObjectTracker(dt=1/fps)
+        self.track_to_z_idx = dict()
+    def reset_tracker(self):
+        self.track_to_z_idx = dict()
+    def forward_G(self,
+                  G,
+                  batch,
+                  multi_modal_truncation: bool,
+                  amp: bool,
+                  z_idx: int,
+                  truncation_value: float,
+                  idx: int,
+                  all_styles=None):
+        batch["img"] = F.normalize(batch["img"].float(), [0.5*255, 0.5*255, 0.5*255], [0.5*255, 0.5*255, 0.5*255])
+        batch["img"] = batch["img"].float()
+        batch["condition"] = batch["mask"].float() * batch["img"]
+        with torch.cuda.amp.autocast(amp):
+            z = None
+            if z_idx is not None:
+                state = np.random.RandomState(seed=z_idx[idx])
+                z = state.normal(size=(1, G.z_channels)).astype(np.float32)
+                z = tops.to_cuda(torch.from_numpy(z))
+            if all_styles is not None:
+                anonymized_im = G(**batch, s=iter(all_styles[idx]))["img"]
+            elif multi_modal_truncation:
+                w_indices = None
+                if z_idx is not None:
+                    w_indices = [z_idx[idx] % len(G.style_net.w_centers)]
+                anonymized_im = G.multi_modal_truncate(
+                    **batch, truncation_value=truncation_value,
+                    w_indices=w_indices,
+                    z=z
+                )["img"]
+            else:
+                anonymized_im = G.sample(**batch, truncation_value=truncation_value, z=z)["img"]
+        anonymized_im = (anonymized_im+1).div(2).clamp(0, 1).mul(255)
+        return anonymized_im
+    @torch.no_grad()
+    def anonymize_detections(self,
+                             im, detection,
+                             update_identity=None,
+                             **synthesis_kwargs
+                             ):
+        G = self.generators[type(detection)]
+        if G is None:
+            return im
+        C, H, W = im.shape
+        if update_identity is None:
+            update_identity = [True for i in range(len(detection))]
+        for idx in range(len(detection)):
+            if not update_identity[idx]:
+                continue
+            batch = detection.get_crop(idx, im)
+            x0, y0, x1, y1 = batch.pop("boxes")[0]
+            batch = {k: tops.to_cuda(v) for k, v in batch.items()}
+            anonymized_im = self.forward_G(G, batch, **synthesis_kwargs, idx=idx)
+            gim = F.resize(anonymized_im[0], (y1-y0, x1-x0), interpolation=F.InterpolationMode.BICUBIC, antialias=True)
+            mask = F.resize(batch["mask"][0], (y1-y0, x1-x0), interpolation=F.InterpolationMode.NEAREST).squeeze(0)
+            # Remove padding
+            pad = [max(-x0, 0), max(-y0, 0)]
+            pad = [*pad, max(x1-W, 0), max(y1-H, 0)]
+            def remove_pad(x): return x[..., pad[1]:x.shape[-2]-pad[3], pad[0]:x.shape[-1]-pad[2]]
+            gim = remove_pad(gim)
+            mask = remove_pad(mask) > 0.5
+            x0, y0 = max(x0, 0), max(y0, 0)
+            x1, y1 = min(x1, W), min(y1, H)
+            mask = mask.logical_not()[None].repeat(3, 1, 1)
+            im[:, y0:y1, x0:x1][mask] = gim[mask].round().clamp(0, 255).byte()
+        return im
+    def visualize_detection(self, im: torch.Tensor, cache_id: str = None) -> torch.Tensor:
+        all_detections = self.detector.forward_and_cache(im, cache_id, load_cache=self.load_cache)
+        im = im.cpu()
+        for det in all_detections:
+            im = det.visualize(im)
+        return im
+    @torch.no_grad()
+    def forward(self, im: torch.Tensor, cache_id: str = None, track=True, detections=None, **synthesis_kwargs) -> torch.Tensor:
+        assert im.dtype == torch.uint8
+        im = tops.to_cuda(im)
+        all_detections = detections
+        if detections is None:
+            if self.load_cache:
+                all_detections = self.detector.forward_and_cache(im, cache_id)
+            else:
+                all_detections = self.detector(im)
+        if hasattr(self, "tracker") and track:
+            [_.pre_process() for _ in all_detections]
+            boxes = np.concatenate([_.boxes for _ in all_detections])
+            boxes = [Detection(box) for box in boxes]
+            self.tracker.step(boxes)
+            track_ids = self.tracker.detections_matched_ids
+            z_idx = []
+            for track_id in track_ids:
+                if track_id not in self.track_to_z_idx:
+                    self.track_to_z_idx[track_id] = np.random.randint(0, 2**32-1)
+                z_idx.append(self.track_to_z_idx[track_id])
+            z_idx = np.array(z_idx)
+            idx_offset = 0
+        for detection in all_detections:
+            zs = None
+            if hasattr(self, "tracker") and track:
+                zs = z_idx[idx_offset:idx_offset+len(detection)]
+                idx_offset += len(detection)
+            im = self.anonymize_detections(im, detection, z_idx=zs, **synthesis_kwargs)
+        return im.cpu()
+    def __call__(self, *args, **kwargs):
+        return self.forward(*args, **kwargs)

dp2/anonymizer/histogram_match_anonymizers.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import torch
+import tops
+import numpy as np
+from kornia.color import rgb_to_hsv
+from dp2 import utils
+from kornia.enhance import histogram
+from .anonymizer import Anonymizer
+import torchvision.transforms.functional as F
+from skimage.exposure import match_histograms
+from kornia.filters import gaussian_blur2d
+class LatentHistogramMatchAnonymizer(Anonymizer):
+    def forward_G(
+        self,
+        G,
+        batch,
+        multi_modal_truncation: bool,
+        amp: bool,
+        z_idx: int,
+        truncation_value: float,
+        idx: int,
+        n_sampling_steps: int = 1,
+        all_styles=None,
+    ):
+        batch["img"] = F.normalize(batch["img"].float(), [0.5*255, 0.5*255, 0.5*255], [0.5*255, 0.5*255, 0.5*255])
+        batch["img"] = batch["img"].float()
+        batch["condition"] = batch["mask"].float() * batch["img"]
+        assert z_idx is None and all_styles is None, "Arguments not supported with n_sampling_steps > 1."
+        real_hls = rgb_to_hsv(utils.denormalize_img(batch["img"]))
+        real_hls[:, 0] /= 2 * torch.pi
+        indices = [1, 2]
+        hist_kwargs = dict(
+            bins=torch.linspace(0, 1, 256, dtype=torch.float32, device=tops.get_device()),
+            bandwidth=torch.tensor(1., device=tops.get_device()))
+        real_hist = [histogram(real_hls[:, i].flatten(start_dim=1), **hist_kwargs) for i in indices]
+        for j in range(n_sampling_steps):
+            if j == 0:
+                if multi_modal_truncation:
+                    w = G.style_net.multi_modal_truncate(
+                        truncation_value=truncation_value, **batch, w_indices=None).detach()
+                else:
+                    w = G.style_net.get_truncated(truncation_value, **batch).detach()
+                assert z_idx is None and all_styles is None, "Arguments not supported with n_sampling_steps > 1."
+                w.requires_grad = True
+                optim = torch.optim.Adam([w])
+            with torch.set_grad_enabled(True):
+                with torch.cuda.amp.autocast(amp):
+                    anonymized_im = G(**batch, truncation_value=None, w=w)["img"]
+                fake_hls = rgb_to_hsv(anonymized_im*0.5 + 0.5)
+                fake_hls[:, 0] /= 2 * torch.pi
+                fake_hist = [histogram(fake_hls[:, i].flatten(start_dim=1), **hist_kwargs) for i in indices]
+                dist = sum([utils.torch_wasserstein_loss(r, f) for r, f in zip(real_hist, fake_hist)])
+                dist.backward()
+                if w.grad.sum() == 0:
+                    break
+                assert w.grad.sum() != 0
+                optim.step()
+                optim.zero_grad()
+                if dist < 0.02:
+                    break
+        anonymized_im = (anonymized_im+1).div(2).clamp(0, 1).mul(255)
+        return anonymized_im
+class HistogramMatchAnonymizer(Anonymizer):
+    def forward_G(self, batch, *args, **kwargs):
+        rimg = batch["img"]
+        batch["img"] = F.normalize(batch["img"].float(), [0.5*255, 0.5*255, 0.5*255], [0.5*255, 0.5*255, 0.5*255])
+        batch["img"] = batch["img"].float()
+        batch["condition"] = batch["mask"].float() * batch["img"]
+        anonymized_im = super().forward_G(batch, *args, **kwargs)
+        equalized_gim = match_histograms(tops.im2numpy(anonymized_im.round().clamp(0, 255).byte()), tops.im2numpy(rimg))
+        if equalized_gim.dtype != np.uint8:
+            equalized_gim = equalized_gim.astype(np.float32)
+            assert equalized_gim.dtype == np.float32, equalized_gim.dtype
+            equalized_gim = tops.im2torch(equalized_gim, to_float=False)[0]
+        else:
+            equalized_gim = tops.im2torch(equalized_gim, to_float=False).float()[0]
+        equalized_gim = equalized_gim.to(device=rimg.device)
+        assert equalized_gim.dtype == torch.float32
+        gaussian_mask = 1 - (batch["maskrcnn_mask"][0].repeat(3, 1, 1) > 0.5).float()
+        gaussian_mask = gaussian_blur2d(gaussian_mask[None], kernel_size=[19, 19], sigma=[10, 10])[0]
+        gaussian_mask = gaussian_mask / gaussian_mask.max()
+        anonymized_im = gaussian_mask * equalized_gim + (1-gaussian_mask) * anonymized_im
+        return anonymized_im

dp2/data/__init__.py ADDED Viewed

File without changes

dp2/data/build.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import torch
+import tops
+from .utils import collate_fn
+def get_dataloader(
+        dataset, gpu_transform: torch.nn.Module,
+        num_workers,
+        batch_size,
+        infinite: bool,
+        drop_last: bool,
+        prefetch_factor: int,
+        shuffle,
+        channels_last=False
+    ):
+    sampler = None
+    dl_kwargs = dict(
+        pin_memory=True,
+    )
+    if infinite:
+        sampler = tops.InfiniteSampler(
+            dataset, rank=tops.rank(),
+            num_replicas=tops.world_size(),
+            shuffle=shuffle
+        )
+    elif tops.world_size() > 1:
+        sampler = torch.utils.data.DistributedSampler(
+            dataset, shuffle=shuffle, num_replicas=tops.world_size(), rank=tops.rank())
+        dl_kwargs["drop_last"] = drop_last
+    else:
+        dl_kwargs["shuffle"] = shuffle
+        dl_kwargs["drop_last"] = drop_last
+    dataloader = torch.utils.data.DataLoader(
+        dataset, sampler=sampler, collate_fn=collate_fn,
+        batch_size=batch_size,
+        num_workers=num_workers, prefetch_factor=prefetch_factor,
+        **dl_kwargs
+    )
+    dataloader = tops.DataPrefetcher(dataloader, gpu_transform, channels_last=channels_last)
+    return dataloader

dp2/data/datasets/__init__.py ADDED Viewed

File without changes

dp2/data/datasets/coco_cse.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import pickle
+import torchvision
+import torch
+import pathlib
+import numpy as np
+from typing import Callable, Optional, Union
+from torch.hub import get_dir as get_hub_dir
+def cache_embed_stats(embed_map: torch.Tensor):
+    mean = embed_map.mean(dim=0, keepdim=True)
+    rstd = ((embed_map - mean).square().mean(dim=0, keepdim=True)+1e-8).rsqrt()
+    cache = dict(mean=mean, rstd=rstd, embed_map=embed_map)
+    path = pathlib.Path(get_hub_dir(), f"embed_map_stats.torch")
+    path.parent.mkdir(exist_ok=True, parents=True)
+    torch.save(cache, path)
+class CocoCSE(torch.utils.data.Dataset):
+    def __init__(self,
+                 dirpath: Union[str, pathlib.Path],
+                 transform: Optional[Callable],
+                 normalize_E: bool,):
+        dirpath = pathlib.Path(dirpath)
+        self.dirpath = dirpath
+        self.transform = transform
+        assert self.dirpath.is_dir(),\
+            f"Did not find dataset at: {dirpath}"
+        self.image_paths, self.embedding_paths = self._load_impaths()
+        self.embed_map = torch.from_numpy(np.load(self.dirpath.joinpath("embed_map.npy")))
+        mean = self.embed_map.mean(dim=0, keepdim=True)
+        rstd = ((self.embed_map - mean).square().mean(dim=0, keepdim=True)+1e-8).rsqrt()
+        self.embed_map = (self.embed_map - mean) * rstd
+        cache_embed_stats(self.embed_map)
+    def _load_impaths(self):
+        image_dir = self.dirpath.joinpath("images")
+        image_paths = list(image_dir.glob("*.png"))
+        image_paths.sort()
+        embedding_paths = [
+            self.dirpath.joinpath("embedding", x.stem + ".npy") for x in image_paths
+        ]
+        return image_paths, embedding_paths
+    def __len__(self):
+        return len(self.image_paths)
+    def __getitem__(self, idx):
+        im = torchvision.io.read_image(str(self.image_paths[idx]))
+        vertices, mask, border = np.split(np.load(self.embedding_paths[idx]), 3, axis=-1)
+        vertices = torch.from_numpy(vertices.squeeze()).long()
+        mask = torch.from_numpy(mask.squeeze()).float()
+        border = torch.from_numpy(border.squeeze()).float()
+        E_mask = 1 - mask - border
+        batch = {
+            "img": im,
+            "vertices": vertices[None],
+            "mask": mask[None],
+            "embed_map": self.embed_map,
+            "border": border[None],
+            "E_mask": E_mask[None]
+        }
+        if self.transform is None:
+            return batch
+        return self.transform(batch)

dp2/data/datasets/fdf.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import pathlib
+from typing import Tuple
+import numpy as np
+import torch
+import pathlib
+try:
+    import pyspng
+    PYSPNG_IMPORTED = True
+except ImportError:
+    PYSPNG_IMPORTED = False
+    print("Could not load pyspng. Defaulting to pillow image backend.")
+    from PIL import Image
+from tops import logger
+class FDFDataset:
+    def __init__(self,
+                 dirpath,
+                 imsize: Tuple[int],
+                 load_keypoints: bool,
+                 transform):
+        dirpath = pathlib.Path(dirpath)
+        self.dirpath = dirpath
+        self.transform = transform
+        self.imsize = imsize[0]
+        self.load_keypoints = load_keypoints
+        assert self.dirpath.is_dir(),\
+            f"Did not find dataset at: {dirpath}"
+        image_dir = self.dirpath.joinpath("images", str(self.imsize))
+        self.image_paths = list(image_dir.glob("*.png"))
+        assert len(self.image_paths) > 0,\
+            f"Did not find images in: {image_dir}"
+        self.image_paths.sort(key=lambda x: int(x.stem))
+        self.landmarks = np.load(self.dirpath.joinpath("landmarks.npy")).reshape(-1, 7, 2).astype(np.float32)
+        self.bounding_boxes = torch.load(self.dirpath.joinpath("bounding_box", f"{self.imsize}.torch"))
+        assert len(self.image_paths) == len(self.bounding_boxes)
+        assert len(self.image_paths) == len(self.landmarks)
+        logger.log(
+            f"Dataset loaded from: {dirpath}. Number of samples:{len(self)}, imsize={imsize}")
+    def get_mask(self, idx):
+        mask = torch.ones((1, self.imsize, self.imsize), dtype=torch.bool)
+        bounding_box = self.bounding_boxes[idx]
+        x0, y0, x1, y1 = bounding_box
+        mask[:, y0:y1, x0:x1] = 0
+        return mask
+    def __len__(self):
+        return len(self.image_paths)
+    def __getitem__(self, index):
+        impath = self.image_paths[index]
+        if PYSPNG_IMPORTED:
+            with open(impath, "rb") as fp:
+                im = pyspng.load(fp.read())
+        else:
+            with Image.open(impath) as fp:
+                im = np.array(fp)
+        im = torch.from_numpy(np.rollaxis(im, -1, 0))
+        masks = self.get_mask(index)
+        landmark = self.landmarks[index]
+        batch = {
+            "img": im,
+            "mask": masks,
+        }
+        if self.load_keypoints:
+            batch["keypoints"] = landmark
+        if self.transform is None:
+            return batch
+        return self.transform(batch)
+class FDF256Dataset:
+    def __init__(self,
+                 dirpath,
+                 load_keypoints: bool,
+                 transform):
+        dirpath = pathlib.Path(dirpath)
+        self.dirpath = dirpath
+        self.transform = transform
+        self.load_keypoints = load_keypoints
+        assert self.dirpath.is_dir(),\
+            f"Did not find dataset at: {dirpath}"
+        image_dir = self.dirpath.joinpath("images")
+        self.image_paths = list(image_dir.glob("*.png"))
+        assert len(self.image_paths) > 0,\
+            f"Did not find images in: {image_dir}"
+        self.image_paths.sort(key=lambda x: int(x.stem))
+        self.landmarks = np.load(self.dirpath.joinpath("landmarks.npy")).reshape(-1, 7, 2).astype(np.float32)
+        self.bounding_boxes = torch.from_numpy(np.load(self.dirpath.joinpath("bounding_box.npy")))
+        assert len(self.image_paths) == len(self.bounding_boxes)
+        assert len(self.image_paths) == len(self.landmarks)
+        logger.log(
+            f"Dataset loaded from: {dirpath}. Number of samples:{len(self)}")
+    def get_mask(self, idx):
+        mask = torch.ones((1, 256, 256), dtype=torch.bool)
+        bounding_box = self.bounding_boxes[idx]
+        x0, y0, x1, y1 = bounding_box
+        mask[:, y0:y1, x0:x1] = 0
+        return mask
+    def __len__(self):
+        return len(self.image_paths)
+    def __getitem__(self, index):
+        impath = self.image_paths[index]
+        if PYSPNG_IMPORTED:
+            with open(impath, "rb") as fp:
+                im = pyspng.load(fp.read())
+        else:
+            with Image.open(impath) as fp:
+                im = np.array(fp)
+        im = torch.from_numpy(np.rollaxis(im, -1, 0))
+        masks = self.get_mask(index)
+        landmark = self.landmarks[index]
+        batch = {
+            "img": im,
+            "mask": masks,
+        }
+        if self.load_keypoints:
+            batch["keypoints"] = landmark
+        if self.transform is None:
+            return batch
+        return self.transform(batch)

dp2/data/datasets/fdf128_wds.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import torch
+import tops
+import numpy as np
+import io
+import webdataset as wds
+import os
+from ..utils import png_decoder, get_num_workers, collate_fn
+def kp_decoder(x):
+    # Keypoints are between [0, 1] for webdataset
+    keypoints = torch.from_numpy(np.load(io.BytesIO(x))).float().view(7, 2).clamp(0, 1)
+    keypoints = torch.cat((keypoints, torch.ones((7, 1))), dim=-1)
+    return keypoints
+def bbox_decoder(x):
+    return torch.from_numpy(np.load(io.BytesIO(x))).float().view(4)
+class BBoxToMask:
+    def __call__(self, sample):
+        imsize = sample["image.png"].shape[-1]
+        bbox = sample["bounding_box.npy"] * imsize
+        x0, y0, x1, y1 = np.round(bbox).astype(np.int64)
+        mask = torch.ones((1, imsize, imsize), dtype=torch.bool)
+        mask[:, y0:y1, x0:x1] = 0
+        sample["mask"] = mask
+        return sample
+def get_dataloader_fdf_wds(
+        path,
+        batch_size: int,
+        num_workers: int,
+        transform: torch.nn.Module,
+        gpu_transform: torch.nn.Module,
+        infinite: bool,
+        shuffle: bool,
+        partial_batches: bool,
+        sample_shuffle=10_000,
+        tar_shuffle=100,
+        channels_last=False,
+    ):
+    # Need to set this for split_by_node to work.
+    os.environ["RANK"] = str(tops.rank())
+    os.environ["WORLD_SIZE"] = str(tops.world_size())
+    if infinite:
+        pipeline = [wds.ResampledShards(str(path))]
+    else:
+        pipeline = [wds.SimpleShardList(str(path))]
+    if shuffle:
+        pipeline.append(wds.shuffle(tar_shuffle))
+    pipeline.extend([
+        wds.split_by_node,
+        wds.split_by_worker,
+    ])
+    if shuffle:
+        pipeline.append(wds.shuffle(sample_shuffle))
+    decoder = [
+        wds.handle_extension("image.png", png_decoder),
+        wds.handle_extension("keypoints.npy", kp_decoder),
+    ]
+    rename_keys = [
+        ["img", "image.png"],
+        ["keypoints", "keypoints.npy"],
+        ["__key__", "__key__"],
+        ["mask", "mask"]
+    ]
+    pipeline.extend([
+        wds.tarfile_to_samples(),
+        wds.decode(*decoder),
+    ])
+    pipeline.append(wds.map(BBoxToMask()))
+    pipeline.extend([
+        wds.batched(batch_size, collation_fn=collate_fn, partial=partial_batches),
+        wds.rename_keys(*rename_keys),
+    ])
+    if transform is not None:
+        pipeline.append(wds.map(transform))
+    pipeline = wds.DataPipeline(*pipeline)
+    if infinite:
+        pipeline = pipeline.repeat(nepochs=1000000)
+    loader = wds.WebLoader(
+        pipeline, batch_size=None, shuffle=False,
+        num_workers=get_num_workers(num_workers),
+        persistent_workers=True,
+    )
+    loader = tops.DataPrefetcher(loader, gpu_transform, channels_last=channels_last, to_float=False)
+    return loader

dp2/data/datasets/fdh.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import torch
+import tops
+import numpy as np
+import io
+import webdataset as wds
+import os
+import json
+from pathlib import Path
+from ..utils import png_decoder, mask_decoder, get_num_workers, collate_fn
+def kp_decoder(x):
+    # Keypoints are between [0, 1] for webdataset
+    keypoints = torch.from_numpy(np.load(io.BytesIO(x))).float()
+    def check_outside(x): return (x < 0).logical_or(x > 1)
+    is_outside = check_outside(keypoints[:, 0]).logical_or(
+        check_outside(keypoints[:, 1])
+    )
+    keypoints[:, 2] = (keypoints[:, 2] > 0).logical_and(is_outside.logical_not())
+    return keypoints
+def vertices_decoder(x):
+    vertices = torch.from_numpy(np.load(io.BytesIO(x)).astype(np.int32))
+    return vertices.squeeze()[None]
+class InsertNewKeypoints:
+    def __init__(self, keypoints_path: Path) -> None:
+        with open(keypoints_path, "r") as fp:
+            self.keypoints = json.load(fp)
+    def __call__(self, sample):
+        key = sample["__key__"]
+        keypoints = torch.tensor(self.keypoints[key], dtype=torch.float32)
+        def check_outside(x): return (x < 0).logical_or(x > 1)
+        is_outside = check_outside(keypoints[:, 0]).logical_or(
+            check_outside(keypoints[:, 1])
+        )
+        keypoints[:, 2] = (keypoints[:, 2] > 0).logical_and(is_outside.logical_not())
+        sample["keypoints.npy"] = keypoints
+        return sample
+def get_dataloader_fdh_wds(
+        path,
+        batch_size: int,
+        num_workers: int,
+        transform: torch.nn.Module,
+        gpu_transform: torch.nn.Module,
+        infinite: bool,
+        shuffle: bool,
+        partial_batches: bool,
+        load_embedding: bool,
+        sample_shuffle=10_000,
+        tar_shuffle=100,
+        read_condition=False,
+        channels_last=False,
+        load_new_keypoints=False,
+        keypoints_split=None,
+    ):
+    # Need to set this for split_by_node to work.
+    os.environ["RANK"] = str(tops.rank())
+    os.environ["WORLD_SIZE"] = str(tops.world_size())
+    if infinite:
+        pipeline = [wds.ResampledShards(str(path))]
+    else:
+        pipeline = [wds.SimpleShardList(str(path))]
+    if shuffle:
+        pipeline.append(wds.shuffle(tar_shuffle))
+    pipeline.extend([
+        wds.split_by_node,
+        wds.split_by_worker,
+    ])
+    if shuffle:
+        pipeline.append(wds.shuffle(sample_shuffle))
+    decoder = [
+        wds.handle_extension("image.png", png_decoder),
+        wds.handle_extension("mask.png", mask_decoder),
+        wds.handle_extension("maskrcnn_mask.png", mask_decoder),
+        wds.handle_extension("keypoints.npy", kp_decoder),
+    ]
+    rename_keys = [
+        ["img", "image.png"], ["mask", "mask.png"],
+        ["keypoints", "keypoints.npy"], ["maskrcnn_mask", "maskrcnn_mask.png"],
+        ["__key__", "__key__"]
+    ]
+    if load_embedding:
+        decoder.extend([
+            wds.handle_extension("vertices.npy", vertices_decoder),
+            wds.handle_extension("E_mask.png", mask_decoder)
+        ])
+        rename_keys.extend([
+            ["vertices", "vertices.npy"],
+            ["E_mask", "e_mask.png"]
+        ])
+    if read_condition:
+        decoder.append(
+            wds.handle_extension("condition.png", png_decoder)
+        )
+        rename_keys.append(["condition", "condition.png"])
+    pipeline.extend([
+        wds.tarfile_to_samples(),
+        wds.decode(*decoder),
+    ])
+    if load_new_keypoints:
+        assert keypoints_split in ["train", "val"]
+        keypoint_url = "https://api.loke.aws.unit.no/dlr-gui-backend-resources-content/v2/contents/links/1eb88522-8b91-49c7-b56a-ed98a9c7888cef9c0429-a385-4248-abe3-8682de26d041f268aed1-7c88-4677-baad-7623c2ee330f"
+        file_name = "fdh_keypoints_val-050133b34d.json"
+        if keypoints_split == "train":
+            keypoint_url = "https://api.loke.aws.unit.no/dlr-gui-backend-resources-content/v2/contents/links/3e828b1c-d6c0-4622-90bc-1b2cce48ccfff14ab45d-0a5c-431d-be13-7e60580765bd7938601c-e72e-41d9-8836-fffc49e76f58"
+            file_name = "fdh_keypoints_train-2cff11f69a.json"
+        # Set check_hash=True if you suspect download is incorrect.
+        filepath = tops.download_file(keypoint_url, file_name=file_name, check_hash=False)
+        pipeline.append(
+            wds.map(InsertNewKeypoints(filepath))
+        )
+    pipeline.extend([
+        wds.batched(batch_size, collation_fn=collate_fn, partial=partial_batches),
+        wds.rename_keys(*rename_keys),
+    ])
+    if transform is not None:
+        pipeline.append(wds.map(transform))
+    pipeline = wds.DataPipeline(*pipeline)
+    if infinite:
+        pipeline = pipeline.repeat(nepochs=1000000)
+    loader = wds.WebLoader(
+        pipeline, batch_size=None, shuffle=False,
+        num_workers=get_num_workers(num_workers),
+        persistent_workers=True,
+    )
+    loader = tops.DataPrefetcher(loader, gpu_transform, channels_last=channels_last, to_float=False)
+    return loader

dp2/data/transforms/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .transforms import RandomCrop, CreateCondition, CreateEmbedding, Resize, ToFloat, Normalize
2	+ from .stylegan2_transform import StyleGANAugmentPipe

dp2/data/transforms/functional.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import torchvision.transforms.functional as F
+import torch
+import pickle
+from tops import download_file, assert_shape
+from typing import Dict
+from functools import lru_cache
+global symmetry_transform
+@lru_cache(maxsize=1)
+def get_symmetry_transform(symmetry_url):
+    file_name = download_file(symmetry_url)
+    with open(file_name, "rb") as fp:
+        symmetry = pickle.load(fp)
+    return torch.from_numpy(symmetry["vertex_transforms"]).long()
+hflip_handled_cases = set([
+    "keypoints", "img", "mask", "border", "semantic_mask", "vertices", "E_mask", "embed_map", "condition",
+    "embedding", "vertx2cat", "maskrcnn_mask", "__key__"])
+def hflip(container: Dict[str, torch.Tensor], flip_map=None) -> Dict[str, torch.Tensor]:
+    container["img"] = F.hflip(container["img"])
+    if "condition" in container:
+        container["condition"] = F.hflip(container["condition"])
+    if "embedding" in container:
+        container["embedding"] = F.hflip(container["embedding"])
+    assert all([key in hflip_handled_cases for key in container]), container.keys()
+    if "keypoints" in container:
+        assert flip_map is not None
+        if container["keypoints"].ndim == 3:
+            keypoints = container["keypoints"][:, flip_map, :]
+            keypoints[:, :,  0] = 1 - keypoints[:, :,  0]
+        else:
+            assert_shape(container["keypoints"], (None, 3))
+            keypoints = container["keypoints"][flip_map, :]
+            keypoints[:, 0] = 1 - keypoints[:, 0]
+        container["keypoints"] = keypoints
+    if "mask" in container:
+        container["mask"] = F.hflip(container["mask"])
+    if "border" in container:
+        container["border"] = F.hflip(container["border"])
+    if "semantic_mask" in container:
+        container["semantic_mask"] = F.hflip(container["semantic_mask"])
+    if "vertices" in container:
+        symmetry_transform = get_symmetry_transform(
+            "https://dl.fbaipublicfiles.com/densepose/meshes/symmetry/symmetry_smpl_27554.pkl")
+        container["vertices"] = F.hflip(container["vertices"])
+        symmetry_transform_ = symmetry_transform.to(container["vertices"].device)
+        container["vertices"] = symmetry_transform_[container["vertices"].long()]
+    if "E_mask" in container:
+        container["E_mask"] = F.hflip(container["E_mask"])
+    if "maskrcnn_mask" in container:
+        container["maskrcnn_mask"] = F.hflip(container["maskrcnn_mask"])
+    return container

dp2/data/transforms/stylegan2_transform.py ADDED Viewed

	@@ -0,0 +1,394 @@

+import numpy as np
+import scipy.signal
+import torch
+try:
+    from sg3_torch_utils import misc
+    from sg3_torch_utils.ops import upfirdn2d
+    from sg3_torch_utils.ops import grid_sample_gradfix
+    from sg3_torch_utils.ops import conv2d_gradfix
+except:
+    pass
+#----------------------------------------------------------------------------
+# Coefficients of various wavelet decomposition low-pass filters.
+wavelets = {
+    'haar': [0.7071067811865476, 0.7071067811865476],
+    'db1':  [0.7071067811865476, 0.7071067811865476],
+    'db2':  [-0.12940952255092145, 0.22414386804185735, 0.836516303737469, 0.48296291314469025],
+    'db3':  [0.035226291882100656, -0.08544127388224149, -0.13501102001039084, 0.4598775021193313, 0.8068915093133388, 0.3326705529509569],
+    'db4':  [-0.010597401784997278, 0.032883011666982945, 0.030841381835986965, -0.18703481171888114, -0.02798376941698385, 0.6308807679295904, 0.7148465705525415, 0.23037781330885523],
+    'db5':  [0.003335725285001549, -0.012580751999015526, -0.006241490213011705, 0.07757149384006515, -0.03224486958502952, -0.24229488706619015, 0.13842814590110342, 0.7243085284385744, 0.6038292697974729, 0.160102397974125],
+    'db6':  [-0.00107730108499558, 0.004777257511010651, 0.0005538422009938016, -0.031582039318031156, 0.02752286553001629, 0.09750160558707936, -0.12976686756709563, -0.22626469396516913, 0.3152503517092432, 0.7511339080215775, 0.4946238903983854, 0.11154074335008017],
+    'db7':  [0.0003537138000010399, -0.0018016407039998328, 0.00042957797300470274, 0.012550998556013784, -0.01657454163101562, -0.03802993693503463, 0.0806126091510659, 0.07130921926705004, -0.22403618499416572, -0.14390600392910627, 0.4697822874053586, 0.7291320908465551, 0.39653931948230575, 0.07785205408506236],
+    'db8':  [-0.00011747678400228192, 0.0006754494059985568, -0.0003917403729959771, -0.00487035299301066, 0.008746094047015655, 0.013981027917015516, -0.04408825393106472, -0.01736930100202211, 0.128747426620186, 0.00047248457399797254, -0.2840155429624281, -0.015829105256023893, 0.5853546836548691, 0.6756307362980128, 0.3128715909144659, 0.05441584224308161],
+    'sym2': [-0.12940952255092145, 0.22414386804185735, 0.836516303737469, 0.48296291314469025],
+    'sym3': [0.035226291882100656, -0.08544127388224149, -0.13501102001039084, 0.4598775021193313, 0.8068915093133388, 0.3326705529509569],
+    'sym4': [-0.07576571478927333, -0.02963552764599851, 0.49761866763201545, 0.8037387518059161, 0.29785779560527736, -0.09921954357684722, -0.012603967262037833, 0.0322231006040427],
+    'sym5': [0.027333068345077982, 0.029519490925774643, -0.039134249302383094, 0.1993975339773936, 0.7234076904024206, 0.6339789634582119, 0.01660210576452232, -0.17532808990845047, -0.021101834024758855, 0.019538882735286728],
+    'sym6': [0.015404109327027373, 0.0034907120842174702, -0.11799011114819057, -0.048311742585633, 0.4910559419267466, 0.787641141030194, 0.3379294217276218, -0.07263752278646252, -0.021060292512300564, 0.04472490177066578, 0.0017677118642428036, -0.007800708325034148],
+    'sym7': [0.002681814568257878, -0.0010473848886829163, -0.01263630340325193, 0.03051551316596357, 0.0678926935013727, -0.049552834937127255, 0.017441255086855827, 0.5361019170917628, 0.767764317003164, 0.2886296317515146, -0.14004724044296152, -0.10780823770381774, 0.004010244871533663, 0.010268176708511255],
+    'sym8': [-0.0033824159510061256, -0.0005421323317911481, 0.03169508781149298, 0.007607487324917605, -0.1432942383508097, -0.061273359067658524, 0.4813596512583722, 0.7771857517005235, 0.3644418948353314, -0.05194583810770904, -0.027219029917056003, 0.049137179673607506, 0.003808752013890615, -0.01495225833704823, -0.0003029205147213668, 0.0018899503327594609],
+}
+#----------------------------------------------------------------------------
+# Helpers for constructing transformation matrices.
+def matrix(*rows, device=None):
+    assert all(len(row) == len(rows[0]) for row in rows)
+    elems = [x for row in rows for x in row]
+    ref = [x for x in elems if isinstance(x, torch.Tensor)]
+    if len(ref) == 0:
+        return misc.constant(np.asarray(rows), device=device)
+    assert device is None or device == ref[0].device
+    elems = [x if isinstance(x, torch.Tensor) else misc.constant(x, shape=ref[0].shape, device=ref[0].device) for x in elems]
+    return torch.stack(elems, dim=-1).reshape(ref[0].shape + (len(rows), -1))
+def translate2d(tx, ty, **kwargs):
+    return matrix(
+        [1, 0, tx],
+        [0, 1, ty],
+        [0, 0, 1],
+        **kwargs)
+def translate3d(tx, ty, tz, **kwargs):
+    return matrix(
+        [1, 0, 0, tx],
+        [0, 1, 0, ty],
+        [0, 0, 1, tz],
+        [0, 0, 0, 1],
+        **kwargs)
+def scale2d(sx, sy, **kwargs):
+    return matrix(
+        [sx, 0,  0],
+        [0,  sy, 0],
+        [0,  0,  1],
+        **kwargs)
+def scale3d(sx, sy, sz, **kwargs):
+    return matrix(
+        [sx, 0,  0,  0],
+        [0,  sy, 0,  0],
+        [0,  0,  sz, 0],
+        [0,  0,  0,  1],
+        **kwargs)
+def rotate2d(theta, **kwargs):
+    return matrix(
+        [torch.cos(theta), torch.sin(-theta), 0],
+        [torch.sin(theta), torch.cos(theta),  0],
+        [0,                0,                 1],
+        **kwargs)
+def rotate3d(v, theta, **kwargs):
+    vx = v[..., 0]; vy = v[..., 1]; vz = v[..., 2]
+    s = torch.sin(theta); c = torch.cos(theta); cc = 1 - c
+    return matrix(
+        [vx*vx*cc+c,    vx*vy*cc-vz*s, vx*vz*cc+vy*s, 0],
+        [vy*vx*cc+vz*s, vy*vy*cc+c,    vy*vz*cc-vx*s, 0],
+        [vz*vx*cc-vy*s, vz*vy*cc+vx*s, vz*vz*cc+c,    0],
+        [0,             0,             0,             1],
+        **kwargs)
+def translate2d_inv(tx, ty, **kwargs):
+    return translate2d(-tx, -ty, **kwargs)
+def scale2d_inv(sx, sy, **kwargs):
+    return scale2d(1 / sx, 1 / sy, **kwargs)
+def rotate2d_inv(theta, **kwargs):
+    return rotate2d(-theta, **kwargs)
+class StyleGANAugmentPipe(torch.nn.Module):
+    def __init__(self,
+        rotate90=0, xint=0, xint_max=0.125,
+        scale=0, rotate=0, aniso=0, xfrac=0, scale_std=0.2, rotate_max=1, aniso_std=0.2, xfrac_std=0.125,
+        brightness=0, contrast=0, lumaflip=0, hue=0, saturation=0, brightness_std=0.2, contrast_std=0.5,
+        hue_max=1, saturation_std=1,
+        imgfilter=0, imgfilter_bands=[1,1,1,1], imgfilter_std=1,
+        ):
+        super().__init__()
+        self.register_buffer('p', torch.ones([]))       # Overall multiplier for augmentation probability.
+        # Pixel blitting.
+        self.rotate90         = float(rotate90)         # Probability multiplier for 90 degree rotations.
+        self.xint             = float(xint)             # Probability multiplier for integer translation.
+        self.xint_max         = float(xint_max)         # Range of integer translation, relative to image dimensions.
+        # General geometric transformations.
+        self.scale            = float(scale)            # Probability multiplier for isotropic scaling.
+        self.rotate           = float(rotate)           # Probability multiplier for arbitrary rotation.
+        self.aniso            = float(aniso)            # Probability multiplier for anisotropic scaling.
+        self.xfrac            = float(xfrac)            # Probability multiplier for fractional translation.
+        self.scale_std        = float(scale_std)        # Log2 standard deviation of isotropic scaling.
+        self.rotate_max       = float(rotate_max)       # Range of arbitrary rotation, 1 = full circle.
+        self.aniso_std        = float(aniso_std)        # Log2 standard deviation of anisotropic scaling.
+        self.xfrac_std        = float(xfrac_std)        # Standard deviation of frational translation, relative to image dimensions.
+        # Color transformations.
+        self.brightness       = float(brightness)       # Probability multiplier for brightness.
+        self.contrast         = float(contrast)         # Probability multiplier for contrast.
+        self.lumaflip         = float(lumaflip)         # Probability multiplier for luma flip.
+        self.hue              = float(hue)              # Probability multiplier for hue rotation.
+        self.saturation       = float(saturation)       # Probability multiplier for saturation.
+        self.brightness_std   = float(brightness_std)   # Standard deviation of brightness.
+        self.contrast_std     = float(contrast_std)     # Log2 standard deviation of contrast.
+        self.hue_max          = float(hue_max)          # Range of hue rotation, 1 = full circle.
+        self.saturation_std   = float(saturation_std)   # Log2 standard deviation of saturation.
+        # Image-space filtering.
+        self.imgfilter        = float(imgfilter)        # Probability multiplier for image-space filtering.
+        self.imgfilter_bands  = list(imgfilter_bands)   # Probability multipliers for individual frequency bands.
+        self.imgfilter_std    = float(imgfilter_std)    # Log2 standard deviation of image-space filter amplification.
+        # Setup orthogonal lowpass filter for geometric augmentations.
+        self.register_buffer('Hz_geom', upfirdn2d.setup_filter(wavelets['sym6']))
+        # Construct filter bank for image-space filtering.
+        Hz_lo = np.asarray(wavelets['sym2'])            # H(z)
+        Hz_hi = Hz_lo * ((-1) ** np.arange(Hz_lo.size)) # H(-z)
+        Hz_lo2 = np.convolve(Hz_lo, Hz_lo[::-1]) / 2    # H(z) * H(z^-1) / 2
+        Hz_hi2 = np.convolve(Hz_hi, Hz_hi[::-1]) / 2    # H(-z) * H(-z^-1) / 2
+        Hz_fbank = np.eye(4, 1)                         # Bandpass(H(z), b_i)
+        for i in range(1, Hz_fbank.shape[0]):
+            Hz_fbank = np.dstack([Hz_fbank, np.zeros_like(Hz_fbank)]).reshape(Hz_fbank.shape[0], -1)[:, :-1]
+            Hz_fbank = scipy.signal.convolve(Hz_fbank, [Hz_lo2])
+            Hz_fbank[i, (Hz_fbank.shape[1] - Hz_hi2.size) // 2 : (Hz_fbank.shape[1] + Hz_hi2.size) // 2] += Hz_hi2
+        self.register_buffer('Hz_fbank', torch.as_tensor(Hz_fbank, dtype=torch.float32))
+    def forward(self, batch, debug_percentile=None):
+        images = batch["img"]
+        batch["vertices"] = batch["vertices"].float()
+        assert isinstance(images, torch.Tensor) and images.ndim == 4
+        batch_size, num_channels, height, width = images.shape
+        device = images.device
+        self.Hz_fbank = self.Hz_fbank.to(device)
+        self.Hz_geom = self.Hz_geom.to(device)
+        if debug_percentile is not None:
+            debug_percentile = torch.as_tensor(debug_percentile, dtype=torch.float32, device=device)
+        # -------------------------------------
+        # Select parameters for pixel blitting.
+        # -------------------------------------
+        # Initialize inverse homogeneous 2D transform: G_inv @ pixel_out ==> pixel_in
+        I_3 = torch.eye(3, device=device)
+        G_inv = I_3
+        # Apply integer translation with probability (xint * strength).
+        if self.xint > 0:
+            t = (torch.rand([batch_size, 2], device=device) * 2 - 1) * self.xint_max
+            t = torch.where(torch.rand([batch_size, 1], device=device) < self.xint * self.p, t, torch.zeros_like(t))
+            if debug_percentile is not None:
+                t = torch.full_like(t, (debug_percentile * 2 - 1) * self.xint_max)
+            G_inv = G_inv @ translate2d_inv(torch.round(t[:,0] * width), torch.round(t[:,1] * height))
+        # --------------------------------------------------------
+        # Select parameters for general geometric transformations.
+        # --------------------------------------------------------
+        # Apply isotropic scaling with probability (scale * strength).
+        if self.scale > 0:
+            s = torch.exp2(torch.randn([batch_size], device=device) * self.scale_std)
+            s = torch.where(torch.rand([batch_size], device=device) < self.scale * self.p, s, torch.ones_like(s))
+            if debug_percentile is not None:
+                s = torch.full_like(s, torch.exp2(torch.erfinv(debug_percentile * 2 - 1) * self.scale_std))
+            G_inv = G_inv @ scale2d_inv(s, s)
+        # Apply pre-rotation with probability p_rot.
+        p_rot = 1 - torch.sqrt((1 - self.rotate * self.p).clamp(0, 1)) # P(pre OR post) = p
+        if self.rotate > 0:
+            theta = (torch.rand([batch_size], device=device) * 2 - 1) * np.pi * self.rotate_max
+            theta = torch.where(torch.rand([batch_size], device=device) < p_rot, theta, torch.zeros_like(theta))
+            if debug_percentile is not None:
+                theta = torch.full_like(theta, (debug_percentile * 2 - 1) * np.pi * self.rotate_max)
+            G_inv = G_inv @ rotate2d_inv(-theta) # Before anisotropic scaling.
+        # Apply anisotropic scaling with probability (aniso * strength).
+        if self.aniso > 0:
+            s = torch.exp2(torch.randn([batch_size], device=device) * self.aniso_std)
+            s = torch.where(torch.rand([batch_size], device=device) < self.aniso * self.p, s, torch.ones_like(s))
+            if debug_percentile is not None:
+                s = torch.full_like(s, torch.exp2(torch.erfinv(debug_percentile * 2 - 1) * self.aniso_std))
+            G_inv = G_inv @ scale2d_inv(s, 1 / s)
+        # Apply post-rotation with probability p_rot.
+        if self.rotate > 0:
+            theta = (torch.rand([batch_size], device=device) * 2 - 1) * np.pi * self.rotate_max
+            theta = torch.where(torch.rand([batch_size], device=device) < p_rot, theta, torch.zeros_like(theta))
+            if debug_percentile is not None:
+                theta = torch.zeros_like(theta)
+            G_inv = G_inv @ rotate2d_inv(-theta) # After anisotropic scaling.
+        # Apply fractional translation with probability (xfrac * strength).
+        if self.xfrac > 0:
+            t = torch.randn([batch_size, 2], device=device) * self.xfrac_std
+            t = torch.where(torch.rand([batch_size, 1], device=device) < self.xfrac * self.p, t, torch.zeros_like(t))
+            if debug_percentile is not None:
+                t = torch.full_like(t, torch.erfinv(debug_percentile * 2 - 1) * self.xfrac_std)
+            G_inv = G_inv @ translate2d_inv(t[:,0] * width, t[:,1] * height)
+        # ----------------------------------
+        # Execute geometric transformations.
+        # ----------------------------------
+        # Execute if the transform is not identity.
+        if G_inv is not I_3:
+            # Calculate padding.
+            cx = (width - 1) / 2
+            cy = (height - 1) / 2
+            cp = matrix([-cx, -cy, 1], [cx, -cy, 1], [cx, cy, 1], [-cx, cy, 1], device=device) # [idx, xyz]
+            cp = G_inv @ cp.t() # [batch, xyz, idx]
+            Hz_pad = self.Hz_geom.shape[0] // 4
+            margin = cp[:, :2, :].permute(1, 0, 2).flatten(1) # [xy, batch * idx]
+            margin = torch.cat([-margin, margin]).max(dim=1).values # [x0, y0, x1, y1]
+            margin = margin + misc.constant([Hz_pad * 2 - cx, Hz_pad * 2 - cy] * 2, device=device)
+            margin = margin.max(misc.constant([0, 0] * 2, device=device))
+            margin = margin.min(misc.constant([width-1, height-1] * 2, device=device))
+            mx0, my0, mx1, my1 = margin.ceil().to(torch.int32)
+            # Pad image and adjust origin.
+            images = torch.nn.functional.pad(input=images, pad=[mx0,mx1,my0,my1], mode='reflect')
+            batch["mask"]  = torch.nn.functional.pad(input=batch["mask"], pad=[mx0,mx1,my0,my1], mode='constant', value=1.0)
+            batch["E_mask"]  = torch.nn.functional.pad(input=batch["E_mask"], pad=[mx0,mx1,my0,my1], mode='constant', value=0.0)
+            batch["vertices"]  = torch.nn.functional.pad(input=batch["vertices"], pad=[mx0,mx1,my0,my1], mode='constant', value=0.0)
+            G_inv = translate2d((mx0 - mx1) / 2, (my0 - my1) / 2) @ G_inv
+            # Upsample.
+            images = upfirdn2d.upsample2d(x=images, f=self.Hz_geom, up=2)
+            batch["mask"] = torch.nn.functional.interpolate(batch["mask"], scale_factor=2, mode="nearest")
+            batch["E_mask"] = torch.nn.functional.interpolate(batch["E_mask"], scale_factor=2, mode="nearest")
+            batch["vertices"] = torch.nn.functional.interpolate(batch["vertices"], scale_factor=2, mode="nearest")
+            G_inv = scale2d(2, 2, device=device) @ G_inv @ scale2d_inv(2, 2, device=device)
+            G_inv = translate2d(-0.5, -0.5, device=device) @ G_inv @ translate2d_inv(-0.5, -0.5, device=device)
+            # Execute transformation.
+            shape = [batch_size, num_channels, (height + Hz_pad * 2) * 2, (width + Hz_pad * 2) * 2]
+            G_inv = scale2d(2 / images.shape[3], 2 / images.shape[2], device=device) @ G_inv @ scale2d_inv(2 / shape[3], 2 / shape[2], device=device)
+            grid = torch.nn.functional.affine_grid(theta=G_inv[:,:2,:], size=shape, align_corners=False)
+            images = grid_sample_gradfix.grid_sample(images, grid)
+            batch["mask"] = torch.nn.functional.grid_sample(
+                input=batch["mask"], grid=grid, mode='nearest', padding_mode="border", align_corners=False)
+            batch["E_mask"] = torch.nn.functional.grid_sample(
+                input=batch["E_mask"], grid=grid, mode='nearest', padding_mode="border", align_corners=False)
+            batch["vertices"] = torch.nn.functional.grid_sample(
+                input=batch["vertices"], grid=grid, mode='nearest', padding_mode="border", align_corners=False)
+            # Downsample and crop.
+            images = upfirdn2d.downsample2d(x=images, f=self.Hz_geom, down=2, padding=-Hz_pad*2, flip_filter=True)
+            batch["mask"] = torch.nn.functional.interpolate(batch["mask"][:, :, Hz_pad*2:-Hz_pad*2, Hz_pad*2:-Hz_pad*2], scale_factor=.5, mode="nearest", recompute_scale_factor=False)
+            batch["E_mask"] = torch.nn.functional.interpolate(batch["E_mask"][:, :, Hz_pad*2:-Hz_pad*2, Hz_pad*2:-Hz_pad*2], scale_factor=.5, mode="nearest", recompute_scale_factor=False)
+            batch["vertices"] = torch.nn.functional.interpolate(batch["vertices"][:, :, Hz_pad*2:-Hz_pad*2, Hz_pad*2:-Hz_pad*2], scale_factor=.5, mode="nearest", recompute_scale_factor=False)
+        # --------------------------------------------
+        # Select parameters for color transformations.
+        # --------------------------------------------
+        # Initialize homogeneous 3D transformation matrix: C @ color_in ==> color_out
+        I_4 = torch.eye(4, device=device)
+        C = I_4
+        # Apply brightness with probability (brightness * strength).
+        if self.brightness > 0:
+            b = torch.randn([batch_size], device=device) * self.brightness_std
+            b = torch.where(torch.rand([batch_size], device=device) < self.brightness * self.p, b, torch.zeros_like(b))
+            if debug_percentile is not None:
+                b = torch.full_like(b, torch.erfinv(debug_percentile * 2 - 1) * self.brightness_std)
+            C = translate3d(b, b, b) @ C
+        # Apply contrast with probability (contrast * strength).
+        if self.contrast > 0:
+            c = torch.exp2(torch.randn([batch_size], device=device) * self.contrast_std)
+            c = torch.where(torch.rand([batch_size], device=device) < self.contrast * self.p, c, torch.ones_like(c))
+            if debug_percentile is not None:
+                c = torch.full_like(c, torch.exp2(torch.erfinv(debug_percentile * 2 - 1) * self.contrast_std))
+            C = scale3d(c, c, c) @ C
+        # Apply luma flip with probability (lumaflip * strength).
+        v = misc.constant(np.asarray([1, 1, 1, 0]) / np.sqrt(3), device=device) # Luma axis.
+        # Apply hue rotation with probability (hue * strength).
+        if self.hue > 0 and num_channels > 1:
+            theta = (torch.rand([batch_size], device=device) * 2 - 1) * np.pi * self.hue_max
+            theta = torch.where(torch.rand([batch_size], device=device) < self.hue * self.p, theta, torch.zeros_like(theta))
+            if debug_percentile is not None:
+                theta = torch.full_like(theta, (debug_percentile * 2 - 1) * np.pi * self.hue_max)
+            C = rotate3d(v, theta) @ C # Rotate around v.
+        # Apply saturation with probability (saturation * strength).
+        if self.saturation > 0 and num_channels > 1:
+            s = torch.exp2(torch.randn([batch_size, 1, 1], device=device) * self.saturation_std)
+            s = torch.where(torch.rand([batch_size, 1, 1], device=device) < self.saturation * self.p, s, torch.ones_like(s))
+            if debug_percentile is not None:
+                s = torch.full_like(s, torch.exp2(torch.erfinv(debug_percentile * 2 - 1) * self.saturation_std))
+            C = (v.ger(v) + (I_4 - v.ger(v)) * s) @ C
+        # ------------------------------
+        # Execute color transformations.
+        # ------------------------------
+        # Execute if the transform is not identity.
+        if C is not I_4:
+            images = images.reshape([batch_size, num_channels, height * width])
+            if num_channels == 3:
+                images = C[:, :3, :3] @ images + C[:, :3, 3:]
+            elif num_channels == 1:
+                C = C[:, :3, :].mean(dim=1, keepdims=True)
+                images = images * C[:, :, :3].sum(dim=2, keepdims=True) + C[:, :, 3:]
+            else:
+                raise ValueError('Image must be RGB (3 channels) or L (1 channel)')
+            images = images.reshape([batch_size, num_channels, height, width])
+        # ----------------------
+        # Image-space filtering.
+        # ----------------------
+        if self.imgfilter > 0:
+            num_bands = self.Hz_fbank.shape[0]
+            assert len(self.imgfilter_bands) == num_bands
+            expected_power = misc.constant(np.array([10, 1, 1, 1]) / 13, device=device) # Expected power spectrum (1/f).
+            # Apply amplification for each band with probability (imgfilter * strength * band_strength).
+            g = torch.ones([batch_size, num_bands], device=device) # Global gain vector (identity).
+            for i, band_strength in enumerate(self.imgfilter_bands):
+                t_i = torch.exp2(torch.randn([batch_size], device=device) * self.imgfilter_std)
+                t_i = torch.where(torch.rand([batch_size], device=device) < self.imgfilter * self.p * band_strength, t_i, torch.ones_like(t_i))
+                if debug_percentile is not None:
+                    t_i = torch.full_like(t_i, torch.exp2(torch.erfinv(debug_percentile * 2 - 1) * self.imgfilter_std)) if band_strength > 0 else torch.ones_like(t_i)
+                t = torch.ones([batch_size, num_bands], device=device)                  # Temporary gain vector.
+                t[:, i] = t_i                                                           # Replace i'th element.
+                t = t / (expected_power * t.square()).sum(dim=-1, keepdims=True).sqrt() # Normalize power.
+                g = g * t                                                               # Accumulate into global gain.
+            # Construct combined amplification filter.
+            Hz_prime = g @ self.Hz_fbank                                    # [batch, tap]
+            Hz_prime = Hz_prime.unsqueeze(1).repeat([1, num_channels, 1])   # [batch, channels, tap]
+            Hz_prime = Hz_prime.reshape([batch_size * num_channels, 1, -1]) # [batch * channels, 1, tap]
+            # Apply filter.
+            p = self.Hz_fbank.shape[1] // 2
+            images = images.reshape([1, batch_size * num_channels, height, width])
+            images = torch.nn.functional.pad(input=images, pad=[p,p,p,p], mode='reflect')
+            images = conv2d_gradfix.conv2d(input=images, weight=Hz_prime.unsqueeze(2), groups=batch_size*num_channels)
+            images = conv2d_gradfix.conv2d(input=images, weight=Hz_prime.unsqueeze(3), groups=batch_size*num_channels)
+            images = images.reshape([batch_size, num_channels, height, width])
+        # ------------------------
+        # Image-space corruptions.
+        # ------------------------
+        batch["img"] = images
+        batch["vertices"] = batch["vertices"].long()
+        batch["border"] = 1 - batch["E_mask"] - batch["mask"]
+        return batch

dp2/data/transforms/transforms.py ADDED Viewed

	@@ -0,0 +1,277 @@

+from pathlib import Path
+from typing import Dict, List
+import torchvision
+import torch
+import tops
+import torchvision.transforms.functional as F
+from .functional import hflip
+import numpy as np
+from dp2.utils.vis_utils import get_coco_keypoints
+from PIL import Image, ImageDraw
+from typing import Tuple
+class RandomHorizontalFlip(torch.nn.Module):
+    def __init__(self, p: float,  flip_map=None, **kwargs):
+        super().__init__()
+        self.flip_ratio = p
+        self.flip_map = flip_map
+        if self.flip_ratio is None:
+            self.flip_ratio = 0.5
+        assert 0 <= self.flip_ratio <= 1
+    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        if torch.rand(1) > self.flip_ratio:
+            return container
+        return hflip(container, self.flip_map)
+class CenterCrop(torch.nn.Module):
+    """
+    Performs the transform on the image.
+    NOTE: Does not transform the mask to improve runtime.
+    """
+    def __init__(self, size: List[int]):
+        super().__init__()
+        self.size = tuple(size)
+    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        min_size = min(container["img"].shape[1], container["img"].shape[2])
+        if min_size < self.size[0]:
+            container["img"] = F.center_crop(container["img"], min_size)
+            container["img"] = F.resize(container["img"], self.size)
+            return container
+        container["img"] = F.center_crop(container["img"], self.size)
+        return container
+class Resize(torch.nn.Module):
+    """
+    Performs the transform on the image.
+    NOTE: Does not transform the mask to improve runtime.
+    """
+    def __init__(self, size, interpolation=F.InterpolationMode.BILINEAR):
+        super().__init__()
+        self.size = tuple(size)
+        self.interpolation = interpolation
+    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        container["img"] = F.resize(container["img"], self.size, self.interpolation, antialias=True)
+        if "semantic_mask" in container:
+            container["semantic_mask"] = F.resize(
+                container["semantic_mask"], self.size, F.InterpolationMode.NEAREST)
+        if "embedding" in container:
+            container["embedding"] = F.resize(
+                container["embedding"], self.size, self.interpolation)
+        if "mask" in container:
+            container["mask"] = F.resize(
+                container["mask"], self.size, F.InterpolationMode.NEAREST)
+        if "E_mask" in container:
+            container["E_mask"] = F.resize(
+                container["E_mask"], self.size, F.InterpolationMode.NEAREST)
+        if "maskrcnn_mask" in container:
+            container["maskrcnn_mask"] = F.resize(
+                container["maskrcnn_mask"], self.size, F.InterpolationMode.NEAREST)
+        if "vertices" in container:
+            container["vertices"] = F.resize(
+                container["vertices"], self.size, F.InterpolationMode.NEAREST)
+        return container
+    def __repr__(self):
+        repr = super().__repr__()
+        vars_ = dict(size=self.size, interpolation=self.interpolation)
+        return repr + " " + " ".join([f"{k}: {v}" for k, v in vars_.items()])
+class Normalize(torch.nn.Module):
+    """
+    Performs the transform on the image.
+    NOTE: Does not transform the mask to improve runtime.
+    """
+    def __init__(self, mean, std, inplace, keys=["img"]):
+        super().__init__()
+        self.mean = mean
+        self.std = std
+        self.inplace = inplace
+        self.keys = keys
+    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        for key in self.keys:
+            container[key] = F.normalize(container[key], self.mean, self.std, self.inplace)
+        return container
+    def __repr__(self):
+        repr = super().__repr__()
+        vars_ = dict(mean=self.mean, std=self.std, inplace=self.inplace)
+        return repr + " " + " ".join([f"{k}: {v}" for k, v in vars_.items()])
+class ToFloat(torch.nn.Module):
+    def __init__(self, keys=["img"], norm=True) -> None:
+        super().__init__()
+        self.keys = keys
+        self.gain = 255 if norm else 1
+    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        for key in self.keys:
+            container[key] = container[key].float() / self.gain
+        return container
+class RandomCrop(torchvision.transforms.RandomCrop):
+    """
+    Performs the transform on the image.
+    NOTE: Does not transform the mask to improve runtime.
+    """
+    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        container["img"] = super().forward(container["img"])
+        return container
+class CreateCondition(torch.nn.Module):
+    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        if container["img"].dtype == torch.uint8:
+            container["condition"] = container["img"] * container["mask"].byte() + (1-container["mask"].byte()) * 127
+            return container
+        container["condition"] = container["img"] * container["mask"]
+        return container
+class CreateEmbedding(torch.nn.Module):
+    def __init__(self, embed_path: Path, cuda=True) -> None:
+        super().__init__()
+        self.embed_map = torch.load(embed_path, map_location=torch.device("cpu"))
+        if cuda:
+            self.embed_map = tops.to_cuda(self.embed_map)
+    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        vertices = container["vertices"]
+        if vertices.ndim == 3:
+            embedding = self.embed_map[vertices.long()].squeeze(dim=0)
+            embedding = embedding.permute(2, 0, 1) * container["E_mask"]
+            pass
+        else:
+            assert vertices.ndim == 4
+            embedding = self.embed_map[vertices.long()].squeeze(dim=1)
+            embedding = embedding.permute(0, 3, 1, 2) * container["E_mask"]
+        container["embedding"] = embedding
+        container["embed_map"] = self.embed_map.clone()
+        return container
+class InsertJointMap(torch.nn.Module):
+    def __init__(self, imsize: Tuple) -> None:
+        super().__init__()
+        self.imsize = imsize
+        knames = get_coco_keypoints()[0]
+        knames = knames + ["neck", "mid_hip"]
+        connectivity = {
+            "nose": ["left_eye", "right_eye", "neck"],
+            "left_eye": ["right_eye", "left_ear"],
+            "right_eye": ["right_ear"],
+            "left_shoulder": ["right_shoulder", "left_elbow", "left_hip"],
+            "right_shoulder": ["right_elbow", "right_hip"],
+            "left_elbow": ["left_wrist"],
+            "right_elbow": ["right_wrist"],
+            "left_hip": ["right_hip", "left_knee"],
+            "right_hip": ["right_knee"],
+            "left_knee": ["left_ankle"],
+            "right_knee": ["right_ankle"],
+            "neck": ["mid_hip", "nose"],
+        }
+        category = {
+            ("nose", "left_eye"): 0,  # head
+            ("nose", "right_eye"): 0,  # head
+            ("nose", "neck"): 0,  # head
+            ("left_eye", "right_eye"): 0,  # head
+            ("left_eye", "left_ear"): 0,  # head
+            ("right_eye", "right_ear"): 0,  # head
+            ("left_shoulder", "left_elbow"): 1,  # left arm
+            ("left_elbow", "left_wrist"): 1,  # left arm
+            ("right_shoulder", "right_elbow"): 2,  # right arm
+            ("right_elbow", "right_wrist"): 2,  # right arm
+            ("left_shoulder", "right_shoulder"): 3,  # body
+            ("left_shoulder", "left_hip"): 3,  # body
+            ("right_shoulder", "right_hip"): 3,  # body
+            ("left_hip", "right_hip"): 3,  # body
+            ("left_hip", "left_knee"): 4,  # left leg
+            ("left_knee", "left_ankle"): 4,  # left leg
+            ("right_hip", "right_knee"): 5,  # right leg
+            ("right_knee", "right_ankle"): 5,  # right leg
+            ("neck", "mid_hip"): 3,  # body
+            ("neck", "nose"): 0,  # head
+        }
+        self.indices2category = {
+            tuple([knames.index(n) for n in k]): v for k, v in category.items()
+        }
+        self.connectivity_indices = {
+            knames.index(k): [knames.index(v_) for v_ in v]
+            for k, v in connectivity.items()
+        }
+        self.l_shoulder = knames.index("left_shoulder")
+        self.r_shoulder = knames.index("right_shoulder")
+        self.l_hip = knames.index("left_hip")
+        self.r_hip = knames.index("right_hip")
+        self.l_eye = knames.index("left_eye")
+        self.r_eye = knames.index("right_eye")
+        self.nose = knames.index("nose")
+        self.neck = knames.index("neck")
+    def create_joint_map(self, N, H, W, keypoints):
+        joint_maps = np.zeros((N, H, W), dtype=np.uint8)
+        for bidx, keypoints in enumerate(keypoints):
+            assert keypoints.shape == (17, 3), keypoints.shape
+            keypoints = torch.cat((keypoints, torch.zeros(2, 3)))
+            visible = keypoints[:, -1] > 0
+            if visible[self.l_shoulder] and visible[self.r_shoulder]:
+                neck = (keypoints[self.l_shoulder]
+                        + (keypoints[self.r_shoulder] - keypoints[self.l_shoulder]) / 2)
+                keypoints[-2] = neck
+                visible[-2] = 1
+            if visible[self.l_hip] and visible[self.r_hip]:
+                mhip = (keypoints[self.l_hip]
+                        + (keypoints[self.r_hip] - keypoints[self.l_hip]) / 2
+                        )
+                keypoints[-1] = mhip
+                visible[-1] = 1
+            keypoints[:, 0] *= W
+            keypoints[:, 1] *= H
+            joint_map = Image.fromarray(np.zeros((H, W), dtype=np.uint8))
+            draw = ImageDraw.Draw(joint_map)
+            for fidx in self.connectivity_indices.keys():
+                for tidx in self.connectivity_indices[fidx]:
+                    if visible[fidx] == 0 or visible[tidx] == 0:
+                        continue
+                    c = self.indices2category[(fidx, tidx)]
+                    s = tuple(keypoints[fidx, :2].round().long().numpy().tolist())
+                    e = tuple(keypoints[tidx, :2].round().long().numpy().tolist())
+                    draw.line((s, e), width=1, fill=c + 1)
+            if visible[self.nose] == 0 and visible[self.neck] == 1:
+                m_eye = (
+                    keypoints[self.l_eye]
+                    + (keypoints[self.r_eye] - keypoints[self.l_eye]) / 2
+                )
+                s = tuple(m_eye[:2].round().long().numpy().tolist())
+                e = tuple(keypoints[self.neck, :2].round().long().numpy().tolist())
+                c = self.indices2category[(self.nose, self.neck)]
+                draw.line((s, e), width=1, fill=c + 1)
+            joint_map = np.array(joint_map)
+            joint_maps[bidx] = np.array(joint_map)
+        return joint_maps[:, None]
+    def forward(self, batch):
+        batch["joint_map"] = torch.from_numpy(self.create_joint_map(
+            batch["img"].shape[0], *self.imsize, batch["keypoints"]))
+        return batch

dp2/data/utils.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import torch
+from PIL import Image
+import numpy as np
+import multiprocessing
+import io
+from tops import logger
+from torch.utils.data._utils.collate import default_collate
+try:
+    import pyspng
+    PYSPNG_IMPORTED = True
+except ImportError:
+    PYSPNG_IMPORTED = False
+    print("Could not load pyspng. Defaulting to pillow image backend.")
+    from PIL import Image
+def get_fdf_keypoints():
+    return get_coco_keypoints()[:7]
+def get_fdf_flipmap():
+    keypoints = get_fdf_keypoints()
+    keypoint_flip_map = {
+        "left_eye": "right_eye",
+        "left_ear": "right_ear",
+        "left_shoulder": "right_shoulder",
+    }
+    for key, value in list(keypoint_flip_map.items()):
+        keypoint_flip_map[value] = key
+    keypoint_flip_map["nose"] = "nose"
+    keypoint_flip_map_idx = []
+    for source in keypoints:
+        keypoint_flip_map_idx.append(keypoints.index(keypoint_flip_map[source]))
+    return keypoint_flip_map_idx
+def get_coco_keypoints():
+    return [
+        "nose",
+        "left_eye",
+        "right_eye",  # 2
+        "left_ear",
+        "right_ear",  # 4
+        "left_shoulder",
+        "right_shoulder",  # 6
+        "left_elbow",
+        "right_elbow",  # 8
+        "left_wrist",
+        "right_wrist",  # 10
+        "left_hip",
+        "right_hip",  # 12
+        "left_knee",
+        "right_knee",  # 14
+        "left_ankle",
+        "right_ankle",  # 16
+    ]
+def get_coco_flipmap():
+    keypoints = get_coco_keypoints()
+    keypoint_flip_map = {
+        "left_eye": "right_eye",
+        "left_ear": "right_ear",
+        "left_shoulder": "right_shoulder",
+        "left_elbow": "right_elbow",
+        "left_wrist": "right_wrist",
+        "left_hip": "right_hip",
+        "left_knee": "right_knee",
+        "left_ankle": "right_ankle",
+    }
+    for key, value in list(keypoint_flip_map.items()):
+        keypoint_flip_map[value] = key
+    keypoint_flip_map["nose"] = "nose"
+    keypoint_flip_map_idx = []
+    for source in keypoints:
+        keypoint_flip_map_idx.append(keypoints.index(keypoint_flip_map[source]))
+    return keypoint_flip_map_idx
+def mask_decoder(x):
+    mask = torch.from_numpy(np.array(Image.open(io.BytesIO(x)))).squeeze()[None]
+    mask = mask > 0  # This fixes bug causing  maskf.loat().max() == 255.
+    return mask
+def png_decoder(x):
+    if PYSPNG_IMPORTED:
+        return torch.from_numpy(np.rollaxis(pyspng.load(x), 2))
+    with Image.open(io.BytesIO(x)) as im:
+        im = torch.from_numpy(np.rollaxis(np.array(im.convert("RGB")), 2))
+    return im
+def jpg_decoder(x):
+    with Image.open(io.BytesIO(x)) as im:
+        im = torch.from_numpy(np.rollaxis(np.array(im.convert("RGB")), 2))
+    return im
+def get_num_workers(num_workers: int):
+    n_cpus = multiprocessing.cpu_count()
+    if num_workers > n_cpus:
+        logger.warn(f"Setting the number of workers to match cpu count: {n_cpus}")
+        return n_cpus
+    return num_workers
+def collate_fn(batch):
+    elem = batch[0]
+    ignore_keys = set(["embed_map", "vertx2cat"])
+    batch_ = {
+        key: default_collate([d[key] for d in batch])
+        for key in elem
+        if key not in ignore_keys
+    }
+    if "embed_map" in elem:
+        batch_["embed_map"] = elem["embed_map"]
+    if "vertx2cat" in elem:
+        batch_["vertx2cat"] = elem["vertx2cat"]
+    return batch_

dp2/detection/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .cse_mask_face_detector import CSeMaskFaceDetector
+from .person_detector import CSEPersonDetector
+from .structures import PersonDetection, VehicleDetection, FaceDetection

dp2/detection/base.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import pickle
+import torch
+import lzma
+from pathlib import Path
+from tops import logger
+class BaseDetector:
+    def __init__(self, cache_directory: str) -> None:
+        if cache_directory is not None:
+            self.cache_directory = Path(cache_directory, str(self.__class__.__name__))
+            self.cache_directory.mkdir(exist_ok=True, parents=True)
+    def save_to_cache(self, detection, cache_path: Path, after_preprocess=True):
+        logger.log(f"Caching detection to: {cache_path}")
+        with lzma.open(cache_path, "wb") as fp:
+            torch.save(
+                [det.state_dict(after_preprocess=after_preprocess) for det in detection], fp,
+                pickle_protocol=pickle.HIGHEST_PROTOCOL)
+    def load_from_cache(self, cache_path: Path):
+        logger.log(f"Loading detection from cache path: {cache_path}")
+        with lzma.open(cache_path, "rb") as fp:
+            state_dict = torch.load(fp)
+        return [
+            state["cls"].from_state_dict(state_dict=state) for state in state_dict
+        ]
+    def forward_and_cache(self, im: torch.Tensor, cache_id: str, load_cache: bool):
+        if cache_id is None:
+            return self.forward(im)
+        cache_path = self.cache_directory.joinpath(cache_id + ".torch")
+        if cache_path.is_file() and load_cache:
+            try:
+                return self.load_from_cache(cache_path)
+            except Exception as e:
+                logger.warn(f"The cache file was corrupted: {cache_path}")
+                exit()
+        detections = self.forward(im)
+        self.save_to_cache(detections, cache_path)
+        return detections

dp2/detection/box_utils.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import numpy as np
+def expand_bbox_to_ratio(bbox, imshape, target_aspect_ratio):
+    x0, y0, x1, y1 = [int(_) for _ in bbox]
+    h, w = y1 - y0, x1 - x0
+    cur_ratio = h / w
+    if cur_ratio == target_aspect_ratio:
+        return [x0, y0, x1, y1]
+    if cur_ratio < target_aspect_ratio:
+        target_height = int(w*target_aspect_ratio)
+        y0, y1 = expand_axis(y0, y1, target_height, imshape[0])
+    else:
+        target_width = int(h/target_aspect_ratio)
+        x0, x1 = expand_axis(x0, x1, target_width, imshape[1])
+    return x0, y0, x1, y1
+def expand_axis(start, end, target_width, limit):
+    # Can return a bbox outside of limit
+    cur_width = end - start
+    start = start - (target_width-cur_width)//2
+    end = end + (target_width-cur_width)//2
+    if end - start != target_width:
+        end += 1
+    assert end - start == target_width
+    if start < 0 and end > limit:
+        return start, end
+    if start < 0 and end < limit:
+        to_shift = min(0 - start, limit - end)
+        start += to_shift
+        end += to_shift
+    if end > limit and start > 0:
+        to_shift = min(end - limit, start)
+        end -= to_shift
+        start -= to_shift
+    assert end - start == target_width
+    return start, end
+def expand_box(bbox, imshape, mask, percentage_background: float):
+    assert isinstance(bbox[0], int)
+    assert 0 < percentage_background < 1
+    # Percentage in S
+    mask_pixels = mask.long().sum().cpu()
+    total_pixels = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
+    percentage_mask = mask_pixels / total_pixels
+    if (1 - percentage_mask) > percentage_background:
+        return bbox
+    target_pixels = mask_pixels / (1 - percentage_background)
+    x0, y0, x1, y1 = bbox
+    H = y1 - y0
+    W = x1 - x0
+    p = np.sqrt(target_pixels/(H*W))
+    target_width = int(np.ceil(p * W))
+    target_height = int(np.ceil(p * H))
+    x0, x1 = expand_axis(x0, x1, target_width, imshape[1])
+    y0, y1 = expand_axis(y0, y1, target_height, imshape[0])
+    return [x0, y0, x1, y1]
+def expand_axises_by_percentage(bbox_XYXY, imshape, percentage):
+    x0, y0, x1, y1 = bbox_XYXY
+    H = y1 - y0
+    W = x1 - x0
+    expansion = int(((H*W)**0.5) * percentage)
+    new_width = W + expansion
+    new_height = H + expansion
+    x0, x1 = expand_axis(x0, x1, min(new_width, imshape[1]), imshape[1])
+    y0, y1 = expand_axis(y0, y1, min(new_height, imshape[0]), imshape[0])
+    return [x0, y0, x1, y1]
+def get_expanded_bbox(
+        bbox_XYXY,
+        imshape,
+        mask,
+        percentage_background: float,
+        axis_minimum_expansion: float,
+        target_aspect_ratio: float):
+    bbox_XYXY = bbox_XYXY.long().cpu().numpy().tolist()
+    # Expand each axis of the bounding box by a minimum percentage
+    bbox_XYXY = expand_axises_by_percentage(bbox_XYXY, imshape, axis_minimum_expansion)
+    # Find the minimum bbox with the aspect ratio. Can be outside of imshape
+    bbox_XYXY = expand_bbox_to_ratio(bbox_XYXY, imshape, target_aspect_ratio)
+    # Expands square box such that X% of the bbox is background
+    bbox_XYXY = expand_box(bbox_XYXY, imshape, mask, percentage_background)
+    assert isinstance(bbox_XYXY[0], (int, np.int64))
+    return bbox_XYXY
+def include_box(bbox, minimum_area, aspect_ratio_range, min_bbox_ratio_inside, imshape):
+    def area_inside_ratio(bbox, imshape):
+        area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
+        area_inside = (min(bbox[2], imshape[1]) - max(0, bbox[0])) * (min(imshape[0], bbox[3]) - max(0, bbox[1]))
+        return area_inside / area
+    ratio = (bbox[3] - bbox[1]) / (bbox[2] - bbox[0])
+    area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0])
+    if area_inside_ratio(bbox, imshape) < min_bbox_ratio_inside:
+        return False
+    if ratio <= aspect_ratio_range[0] or ratio >= aspect_ratio_range[1] or area < minimum_area:
+        return False
+    return True

dp2/detection/box_utils_fdf.py ADDED Viewed

	@@ -0,0 +1,202 @@

+"""
+The FDF dataset expands bound boxes differently from what is used for CSE.
+"""
+import numpy as np
+def quadratic_bounding_box(x0, y0, width, height, imshape):
+    # We assume that we can create a image that is quadratic without
+    # minimizing any of the sides
+    assert width <= min(imshape[:2])
+    assert height <= min(imshape[:2])
+    min_side = min(height, width)
+    if height != width:
+        side_diff = abs(height - width)
+        # Want to extend the shortest side
+        if min_side == height:
+            # Vertical side
+            height += side_diff
+            if height > imshape[0]:
+                # Take full frame, and shrink width
+                y0 = 0
+                height = imshape[0]
+                side_diff = abs(height - width)
+                width -= side_diff
+                x0 += side_diff // 2
+            else:
+                y0 -= side_diff // 2
+                y0 = max(0, y0)
+        else:
+            # Horizontal side
+            width += side_diff
+            if width > imshape[1]:
+                # Take full frame width, and shrink height
+                x0 = 0
+                width = imshape[1]
+                side_diff = abs(height - width)
+                height -= side_diff
+                y0 += side_diff // 2
+            else:
+                x0 -= side_diff // 2
+                x0 = max(0, x0)
+        # Check that bbox goes outside image
+        x1 = x0 + width
+        y1 = y0 + height
+        if imshape[1] < x1:
+            diff = x1 - imshape[1]
+            x0 -= diff
+        if imshape[0] < y1:
+            diff = y1 - imshape[0]
+            y0 -= diff
+    assert x0 >= 0, "Bounding box outside image."
+    assert y0 >= 0, "Bounding box outside image."
+    assert x0 + width <= imshape[1], "Bounding box outside image."
+    assert y0 + height <= imshape[0], "Bounding box outside image."
+    return x0, y0, width, height
+def expand_bounding_box(bbox, percentage, imshape):
+    orig_bbox = bbox.copy()
+    x0, y0, x1, y1 = bbox
+    width = x1 - x0
+    height = y1 - y0
+    x0, y0, width, height = quadratic_bounding_box(
+        x0, y0, width, height, imshape)
+    expanding_factor = int(max(height, width) * percentage)
+    possible_max_expansion = [(imshape[0] - width) // 2,
+                              (imshape[1] - height) // 2,
+                              expanding_factor]
+    expanding_factor = min(possible_max_expansion)
+    # Expand height
+    if expanding_factor > 0:
+        y0 = y0 - expanding_factor
+        y0 = max(0, y0)
+        height += expanding_factor * 2
+        if height > imshape[0]:
+            y0 -= (imshape[0] - height)
+            height = imshape[0]
+        if height + y0 > imshape[0]:
+            y0 -= (height + y0 - imshape[0])
+        # Expand width
+        x0 = x0 - expanding_factor
+        x0 = max(0, x0)
+        width += expanding_factor * 2
+        if width > imshape[1]:
+            x0 -= (imshape[1] - width)
+            width = imshape[1]
+        if width + x0 > imshape[1]:
+            x0 -= (width + x0 - imshape[1])
+    y1 = y0 + height
+    x1 = x0 + width
+    assert y0 >= 0, "Y0 is minus"
+    assert height <= imshape[0], "Height is larger than image."
+    assert x0 + width <= imshape[1]
+    assert y0 + height <= imshape[0]
+    assert width == height, "HEIGHT IS NOT EQUAL WIDTH!!"
+    assert x0 >= 0, "Y0 is minus"
+    assert width <= imshape[1], "Height is larger than image."
+    # Check that original bbox is within new
+    x0_o, y0_o, x1_o, y1_o = orig_bbox
+    assert x0 <= x0_o, f"New bbox is outisde of original. O:{x0_o}, N: {x0}"
+    assert x1 >= x1_o, f"New bbox is outisde of original. O:{x1_o}, N: {x1}"
+    assert y0 <= y0_o, f"New bbox is outisde of original. O:{y0_o}, N: {y0}"
+    assert y1 >= y1_o, f"New bbox is outisde of original. O:{y1_o}, N: {y1}"
+    x0, y0, width, height = [int(_) for _ in [x0, y0, width, height]]
+    x1 = x0 + width
+    y1 = y0 + height
+    return np.array([x0, y0, x1, y1])
+def is_keypoint_within_bbox(x0, y0, x1, y1, keypoint):
+    keypoint = keypoint[:, :3]  # only nose + eyes are relevant
+    kp_X = keypoint[0, :]
+    kp_Y = keypoint[1, :]
+    within_X = np.all(kp_X >= x0) and np.all(kp_X <= x1)
+    within_Y = np.all(kp_Y >= y0) and np.all(kp_Y <= y1)
+    return within_X and within_Y
+def expand_bbox_simple(bbox, percentage):
+    x0, y0, x1, y1 = bbox.astype(float)
+    width = x1 - x0
+    height = y1 - y0
+    x_c = int(x0) + width // 2
+    y_c = int(y0) + height // 2
+    avg_size = max(width, height)
+    new_width = avg_size * (1 + percentage)
+    x0 = x_c - new_width // 2
+    y0 = y_c - new_width // 2
+    x1 = x_c + new_width // 2
+    y1 = y_c + new_width // 2
+    return np.array([x0, y0, x1, y1]).astype(int)
+def pad_image(im, bbox, pad_value):
+    x0, y0, x1, y1 = bbox
+    if x0 < 0:
+        pad_im = np.zeros((im.shape[0], abs(x0), im.shape[2]),
+                          dtype=np.uint8) + pad_value
+        im = np.concatenate((pad_im, im), axis=1)
+        x1 += abs(x0)
+        x0 = 0
+    if y0 < 0:
+        pad_im = np.zeros((abs(y0), im.shape[1], im.shape[2]),
+                          dtype=np.uint8) + pad_value
+        im = np.concatenate((pad_im, im), axis=0)
+        y1 += abs(y0)
+        y0 = 0
+    if x1 >= im.shape[1]:
+        pad_im = np.zeros(
+            (im.shape[0], x1 - im.shape[1] + 1, im.shape[2]),
+            dtype=np.uint8) + pad_value
+        im = np.concatenate((im, pad_im), axis=1)
+    if y1 >= im.shape[0]:
+        pad_im = np.zeros(
+            (y1 - im.shape[0] + 1, im.shape[1], im.shape[2]),
+            dtype=np.uint8) + pad_value
+        im = np.concatenate((im, pad_im), axis=0)
+    return im[y0:y1, x0:x1]
+def clip_box(bbox, im):
+    bbox[0] = max(0, bbox[0])
+    bbox[1] = max(0, bbox[1])
+    bbox[2] = min(im.shape[1] - 1, bbox[2])
+    bbox[3] = min(im.shape[0] - 1, bbox[3])
+    return bbox
+def cut_face(im, bbox, simple_expand=False, pad_value=0, pad_im=True):
+    outside_im = (bbox < 0).any() or bbox[2] > im.shape[1] or bbox[3] > im.shape[0]
+    if simple_expand or (outside_im and pad_im):
+        return pad_image(im, bbox, pad_value)
+    bbox = clip_box(bbox, im)
+    x0, y0, x1, y1 = bbox
+    return im[y0:y1, x0:x1]
+def expand_bbox(
+        bbox_ltrb, imshape, simple_expand, default_to_simple=False,
+        expansion_factor=0.35):
+    assert bbox_ltrb.shape == (4,), f"BBox shape was: {bbox_ltrb.shape}"
+    bbox = bbox_ltrb.astype(float)
+    # FDF256 uses simple expand with ratio 0.4
+    if simple_expand:
+        return expand_bbox_simple(bbox, 0.4)
+    try:
+        return expand_bounding_box(bbox, expansion_factor, imshape)
+    except AssertionError:
+        return expand_bbox_simple(bbox, expansion_factor * 2)

dp2/detection/cse_mask_face_detector.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import torch
+import lzma
+import tops
+from pathlib import Path
+from dp2.detection.base import BaseDetector
+from .utils import combine_cse_maskrcnn_dets
+from face_detection import build_detector as build_face_detector
+from .models.cse import CSEDetector
+from .models.mask_rcnn import MaskRCNNDetector
+from .structures import CSEPersonDetection, VehicleDetection, FaceDetection, PersonDetection
+from tops import logger
+def box1_inside_box2(box1: torch.Tensor, box2: torch.Tensor):
+    assert len(box1.shape) == 2
+    assert len(box2.shape) == 2
+    box1_inside = torch.zeros(box1.shape[0], device=box1.device, dtype=torch.bool)
+    # This can be batched
+    for i, box in enumerate(box1):
+        is_outside_lefttop = (box[None, [0, 1]] <= box2[:, [0, 1]]).any(dim=1)
+        is_outside_rightbot = (box[None, [2, 3]] >= box2[:, [2, 3]]).any(dim=1)
+        is_outside = is_outside_lefttop.logical_or(is_outside_rightbot)
+        box1_inside[i] = is_outside.logical_not().any()
+    return box1_inside
+class CSeMaskFaceDetector(BaseDetector):
+    def __init__(
+            self,
+            mask_rcnn_cfg,
+            face_detector_cfg: dict,
+            cse_cfg: dict,
+            face_post_process_cfg: dict,
+            cse_post_process_cfg,
+            score_threshold: float,
+            **kwargs
+    ) -> None:
+        super().__init__(**kwargs)
+        self.mask_rcnn = MaskRCNNDetector(**mask_rcnn_cfg, score_thres=score_threshold)
+        if "confidence_threshold" not in face_detector_cfg:
+            face_detector_cfg["confidence_threshold"] = score_threshold
+        if "score_thres" not in cse_cfg:
+            cse_cfg["score_thres"] = score_threshold
+        self.cse_detector = CSEDetector(**cse_cfg)
+        self.face_detector = build_face_detector(**face_detector_cfg, clip_boxes=True)
+        self.cse_post_process_cfg = cse_post_process_cfg
+        self.face_mean = tops.to_cuda(torch.from_numpy(self.face_detector.mean).view(3, 1, 1))
+        self.mask_cse_iou_combine_threshold = self.cse_post_process_cfg.pop("iou_combine_threshold")
+        self.face_post_process_cfg = face_post_process_cfg
+    def __call__(self, *args, **kwargs):
+        return self.forward(*args, **kwargs)
+    def _detect_faces(self, im: torch.Tensor):
+        H, W = im.shape[1:]
+        im = im.float() - self.face_mean
+        im = self.face_detector.resize(im[None], 1.0)
+        boxes_XYXY = self.face_detector._batched_detect(im)[0][:, :-1]  # Remove score
+        boxes_XYXY[:, [0, 2]] *= W
+        boxes_XYXY[:, [1, 3]] *= H
+        return boxes_XYXY.round().long()
+    def load_from_cache(self, cache_path: Path):
+        logger.log(f"Loading detection from cache path: {cache_path}",)
+        with lzma.open(cache_path, "rb") as fp:
+            state_dict = torch.load(fp, map_location="cpu")
+        kwargs = dict(
+            post_process_cfg=self.cse_post_process_cfg,
+            embed_map=self.cse_detector.embed_map,
+            **self.face_post_process_cfg
+        )
+        return [
+            state["cls"].from_state_dict(**kwargs, state_dict=state)
+            for state in state_dict
+        ]
+    @torch.no_grad()
+    def forward(self, im: torch.Tensor):
+        maskrcnn_dets = self.mask_rcnn(im)
+        cse_dets = self.cse_detector(im)
+        embed_map = self.cse_detector.embed_map
+        print("Calling face detector.")
+        face_boxes = self._detect_faces(im).cpu()
+        maskrcnn_person = {
+            k: v[maskrcnn_dets["is_person"]] for k, v in maskrcnn_dets.items()
+        }
+        maskrcnn_other = {
+            k: v[maskrcnn_dets["is_person"].logical_not()] for k, v in maskrcnn_dets.items()
+        }
+        maskrcnn_other = VehicleDetection(maskrcnn_other["segmentation"])
+        combined_segmentation, cse_dets, matches = combine_cse_maskrcnn_dets(
+            maskrcnn_person["segmentation"], cse_dets, self.mask_cse_iou_combine_threshold)
+        persons_with_cse = CSEPersonDetection(
+            combined_segmentation, cse_dets, **self.cse_post_process_cfg,
+            embed_map=embed_map, orig_imshape_CHW=im.shape
+        )
+        persons_with_cse.pre_process()
+        not_matched = [i for i in range(maskrcnn_person["segmentation"].shape[0]) if i not in matches[:, 0]]
+        persons_without_cse = PersonDetection(
+            maskrcnn_person["segmentation"][not_matched], **self.cse_post_process_cfg,
+            orig_imshape_CHW=im.shape
+        )
+        persons_without_cse.pre_process()
+        face_boxes_covered = box1_inside_box2(face_boxes, persons_with_cse.dilated_boxes).logical_or(
+            box1_inside_box2(face_boxes, persons_without_cse.dilated_boxes)
+        )
+        face_boxes = face_boxes[face_boxes_covered.logical_not()]
+        face_boxes = FaceDetection(face_boxes, **self.face_post_process_cfg)
+        # Order matters. The anonymizer will anonymize FIFO.
+        # Later detections will overwrite.
+        all_detections = [face_boxes, maskrcnn_other, persons_without_cse, persons_with_cse]
+        return all_detections

dp2/detection/deep_privacy1_detector.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import torch
+import tops
+import lzma
+from torchvision.models.detection import keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights
+from .base import BaseDetector
+from face_detection import build_detector as build_face_detector
+from .structures import FaceDetection
+from tops import logger
+from pathlib import Path
+def is_keypoint_within_bbox(x0, y0, x1, y1, keypoint):
+    keypoint = keypoint[:3, :]  # only nose + eyes are relevant
+    kp_X = keypoint[:, 0]
+    kp_Y = keypoint[:, 1]
+    within_X = (kp_X >= x0).all() and (kp_X <= x1).all()
+    within_Y = (kp_Y >= y0).all() and (kp_Y <= y1).all()
+    return within_X and within_Y
+def match_bbox_keypoint(bounding_boxes, keypoints):
+    """
+        bounding_boxes shape: [N, 5]
+        keypoints: [N persons, K keypoints, (x, y)]
+    """
+    if len(bounding_boxes) == 0 or len(keypoints) == 0:
+        return torch.empty((0, 5)), torch.empty((0, 7, 2))
+    assert bounding_boxes.shape[1] == 4,\
+        f"Shape was : {bounding_boxes.shape}"
+    assert keypoints.shape[-1] == 2,\
+        f"Expected (x,y) in last axis, got: {keypoints.shape}"
+    assert keypoints.shape[1] in (5, 7),\
+        f"Expeted 5 or 7 keypoints. Keypoint shape was: {keypoints.shape}"
+    matches = []
+    for bbox_idx, bbox in enumerate(bounding_boxes):
+        keypoint = None
+        for kp_idx, keypoint in enumerate(keypoints):
+            if kp_idx in (x[1] for x in matches):
+                continue
+            if is_keypoint_within_bbox(*bbox, keypoint):
+                matches.append((bbox_idx, kp_idx))
+                break
+    keypoint_idx = [x[1] for x in matches]
+    bbox_idx = [x[0] for x in matches]
+    return bounding_boxes[bbox_idx], keypoints[keypoint_idx]
+class DeepPrivacy1Detector(BaseDetector):
+    def __init__(self,
+                 keypoint_threshold: float,
+                 face_detector_cfg,
+                 score_threshold: float,
+                 face_post_process_cfg,
+                 **kwargs):
+        super().__init__(**kwargs)
+        self.keypoint_detector = tops.to_cuda(keypointrcnn_resnet50_fpn(
+            weights=KeypointRCNN_ResNet50_FPN_Weights.COCO_V1).eval())
+        self.keypoint_threshold = keypoint_threshold
+        self.face_detector = build_face_detector(**face_detector_cfg, confidence_threshold=score_threshold)
+        self.face_mean = tops.to_cuda(torch.from_numpy(self.face_detector.mean).view(3, 1, 1))
+        self.face_post_process_cfg = face_post_process_cfg
+    @torch.no_grad()
+    def _detect_faces(self, im: torch.Tensor):
+        H, W = im.shape[1:]
+        im = im.float() - self.face_mean
+        im = self.face_detector.resize(im[None], 1.0)
+        boxes_XYXY = self.face_detector._batched_detect(im)[0][:, :-1]  # Remove score
+        boxes_XYXY[:, [0, 2]] *= W
+        boxes_XYXY[:, [1, 3]] *= H
+        return boxes_XYXY.round().long().cpu()
+    @torch.no_grad()
+    def _detect_keypoints(self, img: torch.Tensor):
+        img = img.float() / 255
+        outputs = self.keypoint_detector([img])
+        # Shape: [N persons, K keypoints, (x,y,visibility)]
+        keypoints = outputs[0]["keypoints"]
+        scores = outputs[0]["scores"]
+        assert list(scores) == sorted(list(scores))[::-1]
+        mask = scores >= self.keypoint_threshold
+        keypoints = keypoints[mask, :, :2]
+        return keypoints[:, :7, :2]
+    def __call__(self, *args, **kwargs):
+        return self.forward(*args, **kwargs)
+    @torch.no_grad()
+    def forward(self, im: torch.Tensor):
+        face_boxes = self._detect_faces(im)
+        keypoints = self._detect_keypoints(im)
+        face_boxes, keypoints = match_bbox_keypoint(face_boxes, keypoints)
+        face_boxes = FaceDetection(face_boxes, **self.face_post_process_cfg, keypoints=keypoints)
+        return [face_boxes]
+    def load_from_cache(self, cache_path: Path):
+        logger.log(f"Loading detection from cache path: {cache_path}",)
+        with lzma.open(cache_path, "rb") as fp:
+            state_dict = torch.load(fp, map_location="cpu")
+        kwargs = self.face_post_process_cfg
+        return [
+            state["cls"].from_state_dict(**kwargs, state_dict=state)
+            for state in state_dict
+        ]

dp2/detection/face_detector.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import torch
+import lzma
+import tops
+from pathlib import Path
+from dp2.detection.base import BaseDetector
+from face_detection import build_detector as build_face_detector
+from .structures import FaceDetection
+from tops import logger
+def box1_inside_box2(box1: torch.Tensor, box2: torch.Tensor):
+    assert len(box1.shape) == 2
+    assert len(box2.shape) == 2
+    box1_inside = torch.zeros(box1.shape[0], device=box1.device, dtype=torch.bool)
+    # This can be batched
+    for i, box in enumerate(box1):
+        is_outside_lefttop = (box[None, [0, 1]] <= box2[:, [0, 1]]).any(dim=1)
+        is_outside_rightbot = (box[None, [2, 3]] >= box2[:, [2, 3]]).any(dim=1)
+        is_outside = is_outside_lefttop.logical_or(is_outside_rightbot)
+        box1_inside[i] = is_outside.logical_not().any()
+    return box1_inside
+class FaceDetector(BaseDetector):
+    def __init__(
+            self,
+            face_detector_cfg: dict,
+            score_threshold: float,
+            face_post_process_cfg: dict,
+            **kwargs
+    ) -> None:
+        super().__init__(**kwargs)
+        self.face_detector = build_face_detector(**face_detector_cfg, confidence_threshold=score_threshold)
+        self.face_mean = tops.to_cuda(torch.from_numpy(self.face_detector.mean).view(3, 1, 1))
+        self.face_post_process_cfg = face_post_process_cfg
+    def __call__(self, *args, **kwargs):
+        return self.forward(*args, **kwargs)
+    def _detect_faces(self, im: torch.Tensor):
+        H, W = im.shape[1:]
+        im = im.float() - self.face_mean
+        im = self.face_detector.resize(im[None], 1.0)
+        boxes_XYXY = self.face_detector._batched_detect(im)[0][:, :-1]  # Remove score
+        boxes_XYXY[:, [0, 2]] *= W
+        boxes_XYXY[:, [1, 3]] *= H
+        return boxes_XYXY.round().long().cpu()
+    @torch.no_grad()
+    def forward(self, im: torch.Tensor):
+        face_boxes = self._detect_faces(im)
+        face_boxes = FaceDetection(face_boxes, **self.face_post_process_cfg)
+        return [face_boxes]
+    def load_from_cache(self, cache_path: Path):
+        logger.log(f"Loading detection from cache path: {cache_path}")
+        with lzma.open(cache_path, "rb") as fp:
+            state_dict = torch.load(fp)
+        return [
+            state["cls"].from_state_dict(state_dict=state, **self.face_post_process_cfg) for state in state_dict
+        ]