Spaces:

36Pix
/

autocomp-demo

Runtime error

App Files Files Community

gabgrenier commited on Aug 10, 2023

Commit

060b41f

1 Parent(s): ead9f2a

added harmonizer

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

csai.py +34 -1
harmonizer/.gitignore +101 -0
harmonizer/README.md +101 -0
harmonizer/pretrained/README.md +3 -0
harmonizer/src/__init__.py +0 -0
harmonizer/src/model/__init__.py +2 -0
harmonizer/src/model/backbone/__init__.py +1 -0
harmonizer/src/model/backbone/efficientnet/__init__.py +116 -0
harmonizer/src/model/backbone/efficientnet/model.py +395 -0
harmonizer/src/model/backbone/efficientnet/utils.py +586 -0
harmonizer/src/model/enhancer.py +40 -0
harmonizer/src/model/filter.py +231 -0
harmonizer/src/model/harmonizer.py +44 -0
harmonizer/src/model/module.py +80 -0
harmonizer/src/requirements.txt +6 -0
harmonizer/src/train/README.md +14 -0
harmonizer/src/train/harmonizer/__init__.py +0 -0
harmonizer/src/train/harmonizer/criterion.py +47 -0
harmonizer/src/train/harmonizer/data.py +198 -0
harmonizer/src/train/harmonizer/func.py +41 -0
harmonizer/src/train/harmonizer/model.py +41 -0
harmonizer/src/train/harmonizer/module/__init__.py +1 -0
harmonizer/src/train/harmonizer/module/backbone/__init__.py +1 -0
harmonizer/src/train/harmonizer/module/backbone/efficientnet/__init__.py +116 -0
harmonizer/src/train/harmonizer/module/backbone/efficientnet/model.py +395 -0
harmonizer/src/train/harmonizer/module/backbone/efficientnet/utils.py +586 -0
harmonizer/src/train/harmonizer/module/filter.py +231 -0
harmonizer/src/train/harmonizer/module/harmonizer.py +83 -0
harmonizer/src/train/harmonizer/module/module.py +80 -0
harmonizer/src/train/harmonizer/proxy.py +20 -0
harmonizer/src/train/harmonizer/script/train.py +85 -0
harmonizer/src/train/harmonizer/trainer.py +322 -0
harmonizer/src/train/torchtask/__init__.py +9 -0
harmonizer/src/train/torchtask/nn/__init__.py +3 -0
harmonizer/src/train/torchtask/nn/data.py +190 -0
harmonizer/src/train/torchtask/nn/func.py +99 -0
harmonizer/src/train/torchtask/nn/lrer.py +179 -0
harmonizer/src/train/torchtask/nn/module/__init__.py +3 -0
harmonizer/src/train/torchtask/nn/module/gaussian_blur.py +64 -0
harmonizer/src/train/torchtask/nn/module/gaussian_noise.py +40 -0
harmonizer/src/train/torchtask/nn/module/third_party/__init__.py +1 -0
harmonizer/src/train/torchtask/nn/module/third_party/sync_batchnorm/__init__.py +12 -0
harmonizer/src/train/torchtask/nn/module/third_party/sync_batchnorm/batchnorm.py +282 -0
harmonizer/src/train/torchtask/nn/module/third_party/sync_batchnorm/comm.py +129 -0
harmonizer/src/train/torchtask/nn/module/third_party/sync_batchnorm/replicate.py +88 -0
harmonizer/src/train/torchtask/nn/module/third_party/sync_batchnorm/unittest.py +29 -0
harmonizer/src/train/torchtask/nn/optimizer.py +247 -0
harmonizer/src/train/torchtask/requirements.txt +5 -0
harmonizer/src/train/torchtask/runner.py +33 -0
harmonizer/src/train/torchtask/template/__init__.py +16 -0

csai.py CHANGED Viewed

@@ -67,9 +67,42 @@ def process(fg, bg):
     # Use the final_mask_img when pasting
     bg.paste(fg, (0, 0), final_mask_img)
-    return bg
 def rvm(fg):
     model = MattingNetwork('mobilenetv3').eval().cuda()  # or "resnet50"

     # Use the final_mask_img when pasting
     bg.paste(fg, (0, 0), final_mask_img)
+    # now run the harmonizer to make sure the foreground matches the background
+    harmonized = harmonizer(bg, final_mask_img)
+    if harmonized != None:
+        return harmonized
+    else:
+        return bg
+def harmonizer(comp, mask):
+    try:
+        import torchvision.transforms.functional as tf
+        from harmonizer.src import model
+        harmonizer = model.Harmonizer()
+        harmonizer = harmonizer.cuda()
+        harmonizer.load_state_dict(torch.load("harmonizer/pretrained/harmonizer.pth"), strict=True)
+        harmonizer.eval()
+        comp = tf.to_tensor(comp)[None, ...]
+        mask = tf.to_tensor(mask)[None, ...]
+        comp = comp.cuda()
+        mask = mask.cuda()
+        with torch.no_grad():
+            arguments = harmonizer.predict_arguments(comp, mask)
+            harmonized = harmonizer.restore_image(comp, mask, arguments)[-1]
+        harmonized = np.transpose(harmonized[0].cpu().numpy(), (1, 2, 0)) * 255
+        harmonized = Image.fromarray(harmonized.astype(np.uint8))
+        return harmonized
+    except:
+        return None
 def rvm(fg):
     model = MattingNetwork('mobilenetv3').eval().cuda()  # or "resnet50"

harmonizer/.gitignore ADDED Viewed

	@@ -0,0 +1,101 @@

+# Temporary directories and files
+*.ckpt
+*.pth
+*.zip
+*.tar
+result/
+dataset/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# IPython Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# dotenv
+.env
+# virtualenv
+venv/
+ENV/
+# Spyder project settings
+.spyderproject
+# Rope project settings
+.ropeproject
+# Project files
+.vscode

harmonizer/README.md ADDED Viewed

	@@ -0,0 +1,101 @@

+<h2 align="center">Harmonizer: High-Resolution Image/Video Harmonization</h2>
+<p align="center"><i>Harmonizer: Learning to Perform White-Box
+Image and Video Harmonization (ECCV 2022)</i></p>
+<p align="center">
+  <a href="https://arxiv.org/abs/2207.01322">Paper</a> |
+  <a href="#demo">Demo</a> |
+  <a href="#code">Code</a> |
+  <a href="#license">License</a> |
+  <a href="#citation">Citation</a> |
+  <a href="#contact">Contact</a>
+</p>
+<p align="center">
+  <a href="https://youtu.be/kKKK3D1f_Mc">Harmonizer Result Video</a> |
+  <a href="https://youtu.be/NS8f-eJY9cc">Enhancer Result Video</a>
+</p>
+<div align="center"><b>Harmonizer</b> is a <b>lightweight (20MB)</b> model enabled image/video harmonization up to <b>8K</b> resolution.</div>
+<div align="center">With GPUs, Harmonizer has <b>real-time</b> performance at <b>Full HD</b> resolution.</div>
+<img src="doc/gif/harmonizer.gif" width="100%">
+<div align="center"><b>Enhancer</b> is a model applied the Harmonizer architecture for image/video color enhancement.</div>
+<img src="doc/gif/enhancer.gif" width="100%">
+---
+## Demo
+In our demos, the <b>Harmonizer</b> model is trained on the *iHarmony4* dataset, while the <b>Enhancer</b> model is trained on the *FiveK + HDRPlus* datasets.
+### Online Demo
+Try our online demos for fun without code!
+| Image Harmonization | Image Enhancement |
+| :---: | :---: |
+| [Online Demo](https://zhke.io/?harmonizer_demo) | [Online Demo](https://zhke.io/?enhancer_demo) |
+<img src="doc/gif/online_demo.gif" width="100%">
+### Offline Demo
+We provide offline demos for image/video harmonization/enhancement.
+| Image Harmonization | Video Harmonization | Image Enhancement | Video Enhancement |
+| :---: | :---: | :---: | :---: |
+| [Offline Demo](demo/image_harmonization) | [Offline Demo](demo/video_harmonization) | [Offline Demo](demo/image_enhancement) | [Offline Demo](demo/video_enhancement) |
+## Code
+### Training
+The training code is released in the folder `./src/train`.
+Refer to [README.md](src/train/README.md) for more details about training.
+### Validation
+We provide PyTorch validation code to reproduce the iHarmony4 results reported in our [paper](https://arxiv.org/abs/2207.01322), please:
+1. Download the Harmonizer model pre-trained on the iHarmony4 dataset from [this link](https://drive.google.com/file/d/15XGPQHBppaYGnhsP9l7iOGZudXNw1WbA/view?usp=sharing) and put it in the folder `./pretrained`.
+2. Download the four subsets of iHarmony4 from [this repository](https://github.com/bcmi/Image-Harmonization-Dataset-iHarmony4) and put them in the folder `./dataset/harmonization/iHarmony4`.
+3. Install python requirements. In the root path of this repository, run:
+    ```
+    pip install -r src/requirements.txt
+    ```
+5. For validation, in the root path of this repository, run:
+    ```
+    python -m src.val_harmonizer \
+           --pretrained ./pretrained/harmonizer \
+           --datasets HCOCO HFlickr HAdobe5k Hday2night \
+           --metric-size 256
+    ```
+    - You can change `--datasets` to validate a specific subset.
+    - You can remove `--metric-size` to calculate the metrics without resizing the outputs.
+    - The metric values may slightly different from our [paper](https://arxiv.org/abs/2207.01322) due to the dependency versions.
+## License
+This project is released under the [Creative Commons Attribution NonCommercial ShareAlike 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode) license.
+## Citation
+If this work helps your research, please consider to cite:
+```bibtex
+@InProceedings{Harmonizer,
+  author = {Zhanghan Ke and Chunyi Sun and Lei Zhu and Ke Xu and Rynson W.H. Lau},
+  title = {Harmonizer: Learning to Perform White-Box Image and Video Harmonization},
+  booktitle = {European Conference on Computer Vision (ECCV)},
+  year = {2022},
+}
+```
+## Contact
+This repository is maintained by Zhanghan Ke ([@ZHKKKe](https://github.com/ZHKKKe)).
+For questions, please contact `kezhanghan@outlook.com`.

harmonizer/pretrained/README.md ADDED Viewed

	@@ -0,0 +1,3 @@

+## Harmonizer - Pre-Trained Models
+This folder is used to save the official pre-trained models of Harmonizer/Enhancer.
+You can download them from [this link](https://drive.google.com/drive/folders/1k7TCcwETeF5SYoD2Ic211UQyV1lwIBHY?usp=sharing).

harmonizer/src/__init__.py ADDED Viewed

File without changes

harmonizer/src/model/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .harmonizer import Harmonizer
2	+ from .enhancer import Enhancer

harmonizer/src/model/backbone/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .efficientnet import EfficientBackbone, EfficientBackboneCommon

harmonizer/src/model/backbone/efficientnet/__init__.py ADDED Viewed

	@@ -0,0 +1,116 @@

+"""
+This EfficientNet implementation comes from:
+    Author: lukemelas (github username)
+    Github repo: https://github.com/lukemelas/EfficientNet-PyTorch
+"""
+import torch
+import torch.nn as nn
+from .model import EfficientNet
+from .utils import round_filters, get_same_padding_conv2d
+# for EfficientNet
+class EfficientBackbone(EfficientNet):
+    def __init__(self, blocks_args=None, global_params=None):
+        super(EfficientBackbone, self).__init__(blocks_args, global_params)
+        self.enc_channels = [16, 24, 40, 112, 1280]
+        # ------------------------------------------------------------
+        # delete the useless layers
+        # ------------------------------------------------------------
+        del self._conv_stem
+        del self._bn0
+        # ------------------------------------------------------------
+        # ------------------------------------------------------------
+        # parameters for the input layers
+        # ------------------------------------------------------------
+        bn_mom = 1 - self._global_params.batch_norm_momentum
+        bn_eps = self._global_params.batch_norm_epsilon
+        in_channels = 4
+        out_channels = round_filters(32, self._global_params)
+        out_channels = int(out_channels / 2)
+        # ------------------------------------------------------------
+        # ------------------------------------------------------------
+        # define the input layers
+        # ------------------------------------------------------------
+        image_size = global_params.image_size
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        self._conv_fg = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
+        self._bn_fg = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
+        self._conv_bg = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
+        self._bn_bg = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
+        # ------------------------------------------------------------
+    def forward(self, xfg, xbg):
+        xfg = self._swish(self._bn_fg(self._conv_fg(xfg)))
+        xbg = self._swish(self._bn_bg(self._conv_bg(xbg)))
+        x = torch.cat((xfg, xbg), dim=1)
+        block_outputs = []
+        for idx, block in enumerate(self._blocks):
+            drop_connect_rate = self._global_params.drop_connect_rate
+            drop_connect_rate *= float(idx) / len(self._blocks)
+            x = block(x, drop_connect_rate=drop_connect_rate)
+            block_outputs.append(x)
+        # Head
+        x = self._swish(self._bn1(self._conv_head(x)))
+        return block_outputs[0], block_outputs[2], block_outputs[4], block_outputs[10], x
+# for EfficientNet
+class EfficientBackboneCommon(EfficientNet):
+    def __init__(self, blocks_args=None, global_params=None):
+        super(EfficientBackboneCommon, self).__init__(blocks_args, global_params)
+        self.enc_channels = [16, 24, 40, 112, 1280]
+        # ------------------------------------------------------------
+        # delete the useless layers
+        # ------------------------------------------------------------
+        del self._conv_stem
+        del self._bn0
+        # ------------------------------------------------------------
+        # ------------------------------------------------------------
+        # parameters for the input layers
+        # ------------------------------------------------------------
+        bn_mom = 1 - self._global_params.batch_norm_momentum
+        bn_eps = self._global_params.batch_norm_epsilon
+        in_channels = 3
+        out_channels = round_filters(32, self._global_params)
+        # ------------------------------------------------------------
+        # ------------------------------------------------------------
+        # define the input layers
+        # ------------------------------------------------------------
+        image_size = global_params.image_size
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        self._conv = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
+        self._bn = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
+        # ------------------------------------------------------------
+    def forward(self, x):
+        x = self._swish(self._bn(self._conv(x)))
+        block_outputs = []
+        for idx, block in enumerate(self._blocks):
+            drop_connect_rate = self._global_params.drop_connect_rate
+            drop_connect_rate *= float(idx) / len(self._blocks)
+            x = block(x, drop_connect_rate=drop_connect_rate)
+            block_outputs.append(x)
+        # Head
+        x = self._swish(self._bn1(self._conv_head(x)))
+        return block_outputs[0], block_outputs[2], block_outputs[4], block_outputs[10], x

harmonizer/src/model/backbone/efficientnet/model.py ADDED Viewed

	@@ -0,0 +1,395 @@

+"""model.py - Model and module class for EfficientNet.
+   They are built to mirror those in the official TensorFlow implementation.
+"""
+# Author: lukemelas (github username)
+# Github repo: https://github.com/lukemelas/EfficientNet-PyTorch
+# With adjustments and added comments by workingcoder (github username).
+import torch
+from torch import nn
+from torch.nn import functional as F
+from .utils import (
+    round_filters,
+    round_repeats,
+    drop_connect,
+    get_same_padding_conv2d,
+    get_model_params,
+    efficientnet_params,
+    load_pretrained_weights,
+    Swish,
+    MemoryEfficientSwish,
+    calculate_output_image_size
+)
+VALID_MODELS = (
+    'efficientnet-b0', 'efficientnet-b1', 'efficientnet-b2', 'efficientnet-b3',
+    'efficientnet-b4', 'efficientnet-b5', 'efficientnet-b6', 'efficientnet-b7',
+    'efficientnet-b8',
+    # Support the construction of 'efficientnet-l2' without pretrained weights
+    'efficientnet-l2'
+)
+class MBConvBlock(nn.Module):
+    """Mobile Inverted Residual Bottleneck Block.
+    Args:
+        block_args (namedtuple): BlockArgs, defined in utils.py.
+        global_params (namedtuple): GlobalParam, defined in utils.py.
+        image_size (tuple or list): [image_height, image_width].
+    References:
+        [1] https://arxiv.org/abs/1704.04861 (MobileNet v1)
+        [2] https://arxiv.org/abs/1801.04381 (MobileNet v2)
+        [3] https://arxiv.org/abs/1905.02244 (MobileNet v3)
+    """
+    def __init__(self, block_args, global_params, image_size=None):
+        super().__init__()
+        self._block_args = block_args
+        self._bn_mom = 1 - global_params.batch_norm_momentum  # pytorch's difference from tensorflow
+        self._bn_eps = global_params.batch_norm_epsilon
+        self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
+        self.id_skip = block_args.id_skip  # whether to use skip connection and drop connect
+        # Expansion phase (Inverted Bottleneck)
+        inp = self._block_args.input_filters  # number of input channels
+        oup = self._block_args.input_filters * self._block_args.expand_ratio  # number of output channels
+        if self._block_args.expand_ratio != 1:
+            Conv2d = get_same_padding_conv2d(image_size=image_size)
+            self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
+            self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
+            # image_size = calculate_output_image_size(image_size, 1) <-- this wouldn't modify image_size
+        # Depthwise convolution phase
+        k = self._block_args.kernel_size
+        s = self._block_args.stride
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        self._depthwise_conv = Conv2d(
+            in_channels=oup, out_channels=oup, groups=oup,  # groups makes it depthwise
+            kernel_size=k, stride=s, bias=False)
+        self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
+        image_size = calculate_output_image_size(image_size, s)
+        # Squeeze and Excitation layer, if desired
+        if self.has_se:
+            Conv2d = get_same_padding_conv2d(image_size=(1, 1))
+            num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
+            self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
+            self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
+        # Pointwise convolution phase
+        final_oup = self._block_args.output_filters
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
+        self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
+        self._swish = MemoryEfficientSwish()
+    def forward(self, inputs, drop_connect_rate=None):
+        """MBConvBlock's forward function.
+        Args:
+            inputs (tensor): Input tensor.
+            drop_connect_rate (bool): Drop connect rate (float, between 0 and 1).
+        Returns:
+            Output of this block after processing.
+        """
+        # Expansion and Depthwise Convolution
+        x = inputs
+        if self._block_args.expand_ratio != 1:
+            x = self._expand_conv(inputs)
+            x = self._bn0(x)
+            x = self._swish(x)
+        x = self._depthwise_conv(x)
+        x = self._bn1(x)
+        x = self._swish(x)
+        # Squeeze and Excitation
+        if self.has_se:
+            x_squeezed = F.adaptive_avg_pool2d(x, 1)
+            x_squeezed = self._se_reduce(x_squeezed)
+            x_squeezed = self._swish(x_squeezed)
+            x_squeezed = self._se_expand(x_squeezed)
+            x = torch.sigmoid(x_squeezed) * x
+        # Pointwise Convolution
+        x = self._project_conv(x)
+        x = self._bn2(x)
+        # Skip connection and drop connect
+        input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
+        if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters:
+            # The combination of skip connection and drop connect brings about stochastic depth.
+            if drop_connect_rate:
+                x = drop_connect(x, p=drop_connect_rate, training=self.training)
+            x = x + inputs  # skip connection
+        return x
+    def set_swish(self, memory_efficient=True):
+        """Sets swish function as memory efficient (for training) or standard (for export).
+        Args:
+            memory_efficient (bool): Whether to use memory-efficient version of swish.
+        """
+        self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
+class EfficientNet(nn.Module):
+    """EfficientNet model.
+       Most easily loaded with the .from_name or .from_pretrained methods.
+    Args:
+        blocks_args (list[namedtuple]): A list of BlockArgs to construct blocks.
+        global_params (namedtuple): A set of GlobalParams shared between blocks.
+    References:
+        [1] https://arxiv.org/abs/1905.11946 (EfficientNet)
+    Example:
+        >>> import torch
+        >>> from efficientnet.model import EfficientNet
+        >>> inputs = torch.rand(1, 3, 224, 224)
+        >>> model = EfficientNet.from_pretrained('efficientnet-b0')
+        >>> model.eval()
+        >>> outputs = model(inputs)
+    """
+    def __init__(self, blocks_args=None, global_params=None):
+        super().__init__()
+        assert isinstance(blocks_args, list), 'blocks_args should be a list'
+        assert len(blocks_args) > 0, 'block args must be greater than 0'
+        self._global_params = global_params
+        self._blocks_args = blocks_args
+        # Batch norm parameters
+        bn_mom = 1 - self._global_params.batch_norm_momentum
+        bn_eps = self._global_params.batch_norm_epsilon
+        # Get stem static or dynamic convolution depending on image size
+        image_size = global_params.image_size
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        # Stem
+        in_channels = 3  # rgb
+        out_channels = round_filters(32, self._global_params)  # number of output channels
+        self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
+        self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
+        image_size = calculate_output_image_size(image_size, 2)
+        # Build blocks
+        self._blocks = nn.ModuleList([])
+        for block_args in self._blocks_args:
+            # Update block input and output filters based on depth multiplier.
+            block_args = block_args._replace(
+                input_filters=round_filters(block_args.input_filters, self._global_params),
+                output_filters=round_filters(block_args.output_filters, self._global_params),
+                num_repeat=round_repeats(block_args.num_repeat, self._global_params)
+            )
+            # The first block needs to take care of stride and filter size increase.
+            self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
+            image_size = calculate_output_image_size(image_size, block_args.stride)
+            if block_args.num_repeat > 1:  # modify block_args to keep same output size
+                block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
+            for _ in range(block_args.num_repeat - 1):
+                self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
+                # image_size = calculate_output_image_size(image_size, block_args.stride)  # stride = 1
+        # Head
+        in_channels = block_args.output_filters  # output of final block
+        out_channels = round_filters(1280, self._global_params)
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
+        self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
+        # Final linear layer
+        self._avg_pooling = nn.AdaptiveAvgPool2d(1)
+        if self._global_params.include_top:
+            self._dropout = nn.Dropout(self._global_params.dropout_rate)
+            self._fc = nn.Linear(out_channels, self._global_params.num_classes)
+        # set activation to memory efficient swish by default
+        self._swish = MemoryEfficientSwish()
+    def set_swish(self, memory_efficient=True):
+        """Sets swish function as memory efficient (for training) or standard (for export).
+        Args:
+            memory_efficient (bool): Whether to use memory-efficient version of swish.
+        """
+        self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
+        for block in self._blocks:
+            block.set_swish(memory_efficient)
+    def extract_endpoints(self, inputs):
+        """Use convolution layer to extract features
+        from reduction levels i in [1, 2, 3, 4, 5].
+        Args:
+            inputs (tensor): Input tensor.
+        Returns:
+            Dictionary of last intermediate features
+            with reduction levels i in [1, 2, 3, 4, 5].
+            Example:
+                >>> import torch
+                >>> from efficientnet.model import EfficientNet
+                >>> inputs = torch.rand(1, 3, 224, 224)
+                >>> model = EfficientNet.from_pretrained('efficientnet-b0')
+                >>> endpoints = model.extract_endpoints(inputs)
+                >>> print(endpoints['reduction_1'].shape)  # torch.Size([1, 16, 112, 112])
+                >>> print(endpoints['reduction_2'].shape)  # torch.Size([1, 24, 56, 56])
+                >>> print(endpoints['reduction_3'].shape)  # torch.Size([1, 40, 28, 28])
+                >>> print(endpoints['reduction_4'].shape)  # torch.Size([1, 112, 14, 14])
+                >>> print(endpoints['reduction_5'].shape)  # torch.Size([1, 320, 7, 7])
+                >>> print(endpoints['reduction_6'].shape)  # torch.Size([1, 1280, 7, 7])
+        """
+        endpoints = dict()
+        # Stem
+        x = self._swish(self._bn0(self._conv_stem(inputs)))
+        prev_x = x
+        # Blocks
+        for idx, block in enumerate(self._blocks):
+            drop_connect_rate = self._global_params.drop_connect_rate
+            if drop_connect_rate:
+                drop_connect_rate *= float(idx) / len(self._blocks)  # scale drop connect_rate
+            x = block(x, drop_connect_rate=drop_connect_rate)
+            if prev_x.size(2) > x.size(2):
+                endpoints['reduction_{}'.format(len(endpoints) + 1)] = prev_x
+            elif idx == len(self._blocks) - 1:
+                endpoints['reduction_{}'.format(len(endpoints) + 1)] = x
+            prev_x = x
+        # Head
+        x = self._swish(self._bn1(self._conv_head(x)))
+        endpoints['reduction_{}'.format(len(endpoints) + 1)] = x
+        return endpoints
+    def extract_features(self, inputs):
+        """use convolution layer to extract feature .
+        Args:
+            inputs (tensor): Input tensor.
+        Returns:
+            Output of the final convolution
+            layer in the efficientnet model.
+        """
+        # Stem
+        x = self._swish(self._bn0(self._conv_stem(inputs)))
+        # Blocks
+        for idx, block in enumerate(self._blocks):
+            drop_connect_rate = self._global_params.drop_connect_rate
+            if drop_connect_rate:
+                drop_connect_rate *= float(idx) / len(self._blocks)  # scale drop connect_rate
+            x = block(x, drop_connect_rate=drop_connect_rate)
+        # Head
+        x = self._swish(self._bn1(self._conv_head(x)))
+        return x
+    def forward(self, inputs):
+        """EfficientNet's forward function.
+           Calls extract_features to extract features, applies final linear layer, and returns logits.
+        Args:
+            inputs (tensor): Input tensor.
+        Returns:
+            Output of this model after processing.
+        """
+        # Convolution layers
+        x = self.extract_features(inputs)
+        # Pooling and final linear layer
+        x = self._avg_pooling(x)
+        if self._global_params.include_top:
+            x = x.flatten(start_dim=1)
+            x = self._dropout(x)
+            x = self._fc(x)
+        return x
+    @classmethod
+    def from_name(cls, model_name, in_channels=3, **override_params):
+        """Create an efficientnet model according to name.
+        Args:
+            model_name (str): Name for efficientnet.
+            in_channels (int): Input data's channel number.
+            override_params (other key word params):
+                Params to override model's global_params.
+                Optional key:
+                    'width_coefficient', 'depth_coefficient',
+                    'image_size', 'dropout_rate',
+                    'num_classes', 'batch_norm_momentum',
+                    'batch_norm_epsilon', 'drop_connect_rate',
+                    'depth_divisor', 'min_depth'
+        Returns:
+            An efficientnet model.
+        """
+        cls._check_model_name_is_valid(model_name)
+        blocks_args, global_params = get_model_params(model_name, override_params)
+        model = cls(blocks_args, global_params)
+        model._change_in_channels(in_channels)
+        return model
+    @classmethod
+    def from_pretrained(cls, model_name, weights_path=None, advprop=False,
+                        in_channels=3, num_classes=1000, **override_params):
+        """Create an efficientnet model according to name.
+        Args:
+            model_name (str): Name for efficientnet.
+            weights_path (None or str):
+                str: path to pretrained weights file on the local disk.
+                None: use pretrained weights downloaded from the Internet.
+            advprop (bool):
+                Whether to load pretrained weights
+                trained with advprop (valid when weights_path is None).
+            in_channels (int): Input data's channel number.
+            num_classes (int):
+                Number of categories for classification.
+                It controls the output size for final linear layer.
+            override_params (other key word params):
+                Params to override model's global_params.
+                Optional key:
+                    'width_coefficient', 'depth_coefficient',
+                    'image_size', 'dropout_rate',
+                    'batch_norm_momentum',
+                    'batch_norm_epsilon', 'drop_connect_rate',
+                    'depth_divisor', 'min_depth'
+        Returns:
+            A pretrained efficientnet model.
+        """
+        model = cls.from_name(model_name, num_classes=num_classes, **override_params)
+        load_pretrained_weights(model, model_name, weights_path=weights_path,
+                                load_fc=(num_classes == 1000), advprop=advprop)
+        model._change_in_channels(in_channels)
+        return model
+    @classmethod
+    def get_image_size(cls, model_name):
+        """Get the input image size for a given efficientnet model.
+        Args:
+            model_name (str): Name for efficientnet.
+        Returns:
+            Input image size (resolution).
+        """
+        cls._check_model_name_is_valid(model_name)
+        _, _, res, _ = efficientnet_params(model_name)
+        return res
+    @classmethod
+    def _check_model_name_is_valid(cls, model_name):
+        """Validates model name.
+        Args:
+            model_name (str): Name for efficientnet.
+        Returns:
+            bool: Is a valid name or not.
+        """
+        if model_name not in VALID_MODELS:
+            raise ValueError('model_name should be one of: ' + ', '.join(VALID_MODELS))
+    def _change_in_channels(self, in_channels):
+        """Adjust model's first convolution layer to in_channels, if in_channels not equals 3.
+        Args:
+            in_channels (int): Input data's channel number.
+        """
+        if in_channels != 3:
+            Conv2d = get_same_padding_conv2d(image_size=self._global_params.image_size)
+            out_channels = round_filters(32, self._global_params)
+            self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)

harmonizer/src/model/backbone/efficientnet/utils.py ADDED Viewed

	@@ -0,0 +1,586 @@

+"""utils.py - Helper functions for building the model and for loading model parameters.
+   These helper functions are built to mirror those in the official TensorFlow implementation.
+"""
+# Author: lukemelas (github username)
+# Github repo: https://github.com/lukemelas/EfficientNet-PyTorch
+# With adjustments and added comments by workingcoder (github username).
+import re
+import math
+import collections
+from functools import partial
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.utils import model_zoo
+################################################################################
+# Help functions for model architecture
+################################################################################
+# GlobalParams and BlockArgs: Two namedtuples
+# Swish and MemoryEfficientSwish: Two implementations of the method
+# round_filters and round_repeats:
+#     Functions to calculate params for scaling model width and depth ! ! !
+# get_width_and_height_from_size and calculate_output_image_size
+# drop_connect: A structural design
+# get_same_padding_conv2d:
+#     Conv2dDynamicSamePadding
+#     Conv2dStaticSamePadding
+# get_same_padding_maxPool2d:
+#     MaxPool2dDynamicSamePadding
+#     MaxPool2dStaticSamePadding
+#     It's an additional function, not used in EfficientNet,
+#     but can be used in other model (such as EfficientDet).
+# Parameters for the entire model (stem, all blocks, and head)
+GlobalParams = collections.namedtuple('GlobalParams', [
+    'width_coefficient', 'depth_coefficient', 'image_size', 'dropout_rate',
+    'num_classes', 'batch_norm_momentum', 'batch_norm_epsilon',
+    'drop_connect_rate', 'depth_divisor', 'min_depth', 'include_top'])
+# Parameters for an individual model block
+BlockArgs = collections.namedtuple('BlockArgs', [
+    'num_repeat', 'kernel_size', 'stride', 'expand_ratio',
+    'input_filters', 'output_filters', 'se_ratio', 'id_skip'])
+# Set GlobalParams and BlockArgs's defaults
+GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
+BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
+# Swish activation function
+if hasattr(nn, 'SiLU'):
+    Swish = nn.SiLU
+else:
+    # For compatibility with old PyTorch versions
+    class Swish(nn.Module):
+        def forward(self, x):
+            return x * torch.sigmoid(x)
+# A memory-efficient implementation of Swish function
+class SwishImplementation(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, i):
+        result = i * torch.sigmoid(i)
+        ctx.save_for_backward(i)
+        return result
+    @staticmethod
+    def backward(ctx, grad_output):
+        i = ctx.saved_tensors[0]
+        sigmoid_i = torch.sigmoid(i)
+        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
+class MemoryEfficientSwish(nn.Module):
+    def forward(self, x):
+        return SwishImplementation.apply(x)
+def round_filters(filters, global_params):
+    """Calculate and round number of filters based on width multiplier.
+       Use width_coefficient, depth_divisor and min_depth of global_params.
+    Args:
+        filters (int): Filters number to be calculated.
+        global_params (namedtuple): Global params of the model.
+    Returns:
+        new_filters: New filters number after calculating.
+    """
+    multiplier = global_params.width_coefficient
+    if not multiplier:
+        return filters
+    # TODO: modify the params names.
+    #       maybe the names (width_divisor,min_width)
+    #       are more suitable than (depth_divisor,min_depth).
+    divisor = global_params.depth_divisor
+    min_depth = global_params.min_depth
+    filters *= multiplier
+    min_depth = min_depth or divisor  # pay attention to this line when using min_depth
+    # follow the formula transferred from official TensorFlow implementation
+    new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
+    if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
+        new_filters += divisor
+    return int(new_filters)
+def round_repeats(repeats, global_params):
+    """Calculate module's repeat number of a block based on depth multiplier.
+       Use depth_coefficient of global_params.
+    Args:
+        repeats (int): num_repeat to be calculated.
+        global_params (namedtuple): Global params of the model.
+    Returns:
+        new repeat: New repeat number after calculating.
+    """
+    multiplier = global_params.depth_coefficient
+    if not multiplier:
+        return repeats
+    # follow the formula transferred from official TensorFlow implementation
+    return int(math.ceil(multiplier * repeats))
+def drop_connect(inputs, p, training):
+    """Drop connect.
+    Args:
+        input (tensor: BCWH): Input of this structure.
+        p (float: 0.0~1.0): Probability of drop connection.
+        training (bool): The running mode.
+    Returns:
+        output: Output after drop connection.
+    """
+    assert 0 <= p <= 1, 'p must be in range of [0,1]'
+    if not training:
+        return inputs
+    batch_size = inputs.shape[0]
+    keep_prob = 1 - p
+    # generate binary_tensor mask according to probability (p for 0, 1-p for 1)
+    random_tensor = keep_prob
+    random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
+    binary_tensor = torch.floor(random_tensor)
+    output = inputs / keep_prob * binary_tensor
+    return output
+def get_width_and_height_from_size(x):
+    """Obtain height and width from x.
+    Args:
+        x (int, tuple or list): Data size.
+    Returns:
+        size: A tuple or list (H,W).
+    """
+    if isinstance(x, int):
+        return x, x
+    if isinstance(x, list) or isinstance(x, tuple):
+        return x
+    else:
+        raise TypeError()
+def calculate_output_image_size(input_image_size, stride):
+    """Calculates the output image size when using Conv2dSamePadding with a stride.
+       Necessary for static padding. Thanks to mannatsingh for pointing this out.
+    Args:
+        input_image_size (int, tuple or list): Size of input image.
+        stride (int, tuple or list): Conv2d operation's stride.
+    Returns:
+        output_image_size: A list [H,W].
+    """
+    if input_image_size is None:
+        return None
+    image_height, image_width = get_width_and_height_from_size(input_image_size)
+    stride = stride if isinstance(stride, int) else stride[0]
+    image_height = int(math.ceil(image_height / stride))
+    image_width = int(math.ceil(image_width / stride))
+    return [image_height, image_width]
+# Note:
+# The following 'SamePadding' functions make output size equal ceil(input size/stride).
+# Only when stride equals 1, can the output size be the same as input size.
+# Don't be confused by their function names ! ! !
+def get_same_padding_conv2d(image_size=None):
+    """Chooses static padding if you have specified an image size, and dynamic padding otherwise.
+       Static padding is necessary for ONNX exporting of models.
+    Args:
+        image_size (int or tuple): Size of the image.
+    Returns:
+        Conv2dDynamicSamePadding or Conv2dStaticSamePadding.
+    """
+    if image_size is None:
+        return Conv2dDynamicSamePadding
+    else:
+        return partial(Conv2dStaticSamePadding, image_size=image_size)
+class Conv2dDynamicSamePadding(nn.Conv2d):
+    """2D Convolutions like TensorFlow, for a dynamic image size.
+       The padding is operated in forward function by calculating dynamically.
+    """
+    # Tips for 'SAME' mode padding.
+    #     Given the following:
+    #         i: width or height
+    #         s: stride
+    #         k: kernel size
+    #         d: dilation
+    #         p: padding
+    #     Output after Conv2d:
+    #         o = floor((i+p-((k-1)*d+1))/s+1)
+    # If o equals i, i = floor((i+p-((k-1)*d+1))/s+1),
+    # => p = (i-1)*s+((k-1)*d+1)-i
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
+        super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
+        self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
+    def forward(self, x):
+        ih, iw = x.size()[-2:]
+        kh, kw = self.weight.size()[-2:]
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)  # change the output size according to stride ! ! !
+        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
+        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
+        if pad_h > 0 or pad_w > 0:
+            x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
+        return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
+class Conv2dStaticSamePadding(nn.Conv2d):
+    """2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size.
+       The padding mudule is calculated in construction function, then used in forward.
+    """
+    # With the same calculation as Conv2dDynamicSamePadding
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, image_size=None, **kwargs):
+        super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs)
+        self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
+        # Calculate padding based on image size and save it
+        assert image_size is not None
+        ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
+        kh, kw = self.weight.size()[-2:]
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
+        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
+        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
+        if pad_h > 0 or pad_w > 0:
+            self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2,
+                                                pad_h // 2, pad_h - pad_h // 2))
+        else:
+            self.static_padding = nn.Identity()
+    def forward(self, x):
+        x = self.static_padding(x)
+        x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
+        return x
+def get_same_padding_maxPool2d(image_size=None):
+    """Chooses static padding if you have specified an image size, and dynamic padding otherwise.
+       Static padding is necessary for ONNX exporting of models.
+    Args:
+        image_size (int or tuple): Size of the image.
+    Returns:
+        MaxPool2dDynamicSamePadding or MaxPool2dStaticSamePadding.
+    """
+    if image_size is None:
+        return MaxPool2dDynamicSamePadding
+    else:
+        return partial(MaxPool2dStaticSamePadding, image_size=image_size)
+class MaxPool2dDynamicSamePadding(nn.MaxPool2d):
+    """2D MaxPooling like TensorFlow's 'SAME' mode, with a dynamic image size.
+       The padding is operated in forward function by calculating dynamically.
+    """
+    def __init__(self, kernel_size, stride, padding=0, dilation=1, return_indices=False, ceil_mode=False):
+        super().__init__(kernel_size, stride, padding, dilation, return_indices, ceil_mode)
+        self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride
+        self.kernel_size = [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size
+        self.dilation = [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation
+    def forward(self, x):
+        ih, iw = x.size()[-2:]
+        kh, kw = self.kernel_size
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
+        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
+        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
+        if pad_h > 0 or pad_w > 0:
+            x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
+        return F.max_pool2d(x, self.kernel_size, self.stride, self.padding,
+                            self.dilation, self.ceil_mode, self.return_indices)
+class MaxPool2dStaticSamePadding(nn.MaxPool2d):
+    """2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size.
+       The padding mudule is calculated in construction function, then used in forward.
+    """
+    def __init__(self, kernel_size, stride, image_size=None, **kwargs):
+        super().__init__(kernel_size, stride, **kwargs)
+        self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride
+        self.kernel_size = [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size
+        self.dilation = [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation
+        # Calculate padding based on image size and save it
+        assert image_size is not None
+        ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
+        kh, kw = self.kernel_size
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
+        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
+        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
+        if pad_h > 0 or pad_w > 0:
+            self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
+        else:
+            self.static_padding = nn.Identity()
+    def forward(self, x):
+        x = self.static_padding(x)
+        x = F.max_pool2d(x, self.kernel_size, self.stride, self.padding,
+                         self.dilation, self.ceil_mode, self.return_indices)
+        return x
+################################################################################
+# Helper functions for loading model params
+################################################################################
+# BlockDecoder: A Class for encoding and decoding BlockArgs
+# efficientnet_params: A function to query compound coefficient
+# get_model_params and efficientnet:
+#     Functions to get BlockArgs and GlobalParams for efficientnet
+# url_map and url_map_advprop: Dicts of url_map for pretrained weights
+# load_pretrained_weights: A function to load pretrained weights
+class BlockDecoder(object):
+    """Block Decoder for readability,
+       straight from the official TensorFlow repository.
+    """
+    @staticmethod
+    def _decode_block_string(block_string):
+        """Get a block through a string notation of arguments.
+        Args:
+            block_string (str): A string notation of arguments.
+                                Examples: 'r1_k3_s11_e1_i32_o16_se0.25_noskip'.
+        Returns:
+            BlockArgs: The namedtuple defined at the top of this file.
+        """
+        assert isinstance(block_string, str)
+        ops = block_string.split('_')
+        options = {}
+        for op in ops:
+            splits = re.split(r'(\d.*)', op)
+            if len(splits) >= 2:
+                key, value = splits[:2]
+                options[key] = value
+        # Check stride
+        assert (('s' in options and len(options['s']) == 1) or
+                (len(options['s']) == 2 and options['s'][0] == options['s'][1]))
+        return BlockArgs(
+            num_repeat=int(options['r']),
+            kernel_size=int(options['k']),
+            stride=[int(options['s'][0])],
+            expand_ratio=int(options['e']),
+            input_filters=int(options['i']),
+            output_filters=int(options['o']),
+            se_ratio=float(options['se']) if 'se' in options else None,
+            id_skip=('noskip' not in block_string))
+    @staticmethod
+    def _encode_block_string(block):
+        """Encode a block to a string.
+        Args:
+            block (namedtuple): A BlockArgs type argument.
+        Returns:
+            block_string: A String form of BlockArgs.
+        """
+        args = [
+            'r%d' % block.num_repeat,
+            'k%d' % block.kernel_size,
+            's%d%d' % (block.strides[0], block.strides[1]),
+            'e%s' % block.expand_ratio,
+            'i%d' % block.input_filters,
+            'o%d' % block.output_filters
+        ]
+        if 0 < block.se_ratio <= 1:
+            args.append('se%s' % block.se_ratio)
+        if block.id_skip is False:
+            args.append('noskip')
+        return '_'.join(args)
+    @staticmethod
+    def decode(string_list):
+        """Decode a list of string notations to specify blocks inside the network.
+        Args:
+            string_list (list[str]): A list of strings, each string is a notation of block.
+        Returns:
+            blocks_args: A list of BlockArgs namedtuples of block args.
+        """
+        assert isinstance(string_list, list)
+        blocks_args = []
+        for block_string in string_list:
+            blocks_args.append(BlockDecoder._decode_block_string(block_string))
+        return blocks_args
+    @staticmethod
+    def encode(blocks_args):
+        """Encode a list of BlockArgs to a list of strings.
+        Args:
+            blocks_args (list[namedtuples]): A list of BlockArgs namedtuples of block args.
+        Returns:
+            block_strings: A list of strings, each string is a notation of block.
+        """
+        block_strings = []
+        for block in blocks_args:
+            block_strings.append(BlockDecoder._encode_block_string(block))
+        return block_strings
+def efficientnet_params(model_name):
+    """Map EfficientNet model name to parameter coefficients.
+    Args:
+        model_name (str): Model name to be queried.
+    Returns:
+        params_dict[model_name]: A (width,depth,res,dropout) tuple.
+    """
+    params_dict = {
+        # Coefficients:   width,depth,res,dropout
+        'efficientnet-b0': (1.0, 1.0, 224, 0.2),
+        'efficientnet-b1': (1.0, 1.1, 240, 0.2),
+        'efficientnet-b2': (1.1, 1.2, 260, 0.3),
+        'efficientnet-b3': (1.2, 1.4, 300, 0.3),
+        'efficientnet-b4': (1.4, 1.8, 380, 0.4),
+        'efficientnet-b5': (1.6, 2.2, 456, 0.4),
+        'efficientnet-b6': (1.8, 2.6, 528, 0.5),
+        'efficientnet-b7': (2.0, 3.1, 600, 0.5),
+        'efficientnet-b8': (2.2, 3.6, 672, 0.5),
+        'efficientnet-l2': (4.3, 5.3, 800, 0.5),
+    }
+    return params_dict[model_name]
+def efficientnet(width_coefficient=None, depth_coefficient=None, image_size=None,
+                 dropout_rate=0.2, drop_connect_rate=0.2, num_classes=1000, include_top=False):
+    """Create BlockArgs and GlobalParams for efficientnet model.
+    Args:
+        width_coefficient (float)
+        depth_coefficient (float)
+        image_size (int)
+        dropout_rate (float)
+        drop_connect_rate (float)
+        num_classes (int)
+        Meaning as the name suggests.
+    Returns:
+        blocks_args, global_params.
+    """
+    # Blocks args for the whole model(efficientnet-b0 by default)
+    # It will be modified in the construction of EfficientNet Class according to model
+    blocks_args = [
+        'r1_k3_s11_e1_i32_o16_se0.25',
+        'r2_k3_s22_e6_i16_o24_se0.25',
+        'r2_k5_s22_e6_i24_o40_se0.25',
+        'r3_k3_s22_e6_i40_o80_se0.25',
+        'r3_k5_s11_e6_i80_o112_se0.25',
+        'r4_k5_s22_e6_i112_o192_se0.25',
+        'r1_k3_s11_e6_i192_o320_se0.25',
+    ]
+    blocks_args = BlockDecoder.decode(blocks_args)
+    global_params = GlobalParams(
+        width_coefficient=width_coefficient,
+        depth_coefficient=depth_coefficient,
+        image_size=image_size,
+        dropout_rate=dropout_rate,
+        num_classes=num_classes,
+        batch_norm_momentum=0.99,
+        batch_norm_epsilon=1e-3,
+        drop_connect_rate=drop_connect_rate,
+        depth_divisor=8,
+        min_depth=None,
+        include_top=include_top,
+    )
+    return blocks_args, global_params
+def get_model_params(model_name, override_params):
+    """Get the block args and global params for a given model name.
+    Args:
+        model_name (str): Model's name.
+        override_params (dict): A dict to modify global_params.
+    Returns:
+        blocks_args, global_params
+    """
+    if model_name.startswith('efficientnet'):
+        w, d, s, p = efficientnet_params(model_name)
+        # note: all models have drop connect rate = 0.2
+        blocks_args, global_params = efficientnet(
+            width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s)
+    else:
+        raise NotImplementedError('model name is not pre-defined: {}'.format(model_name))
+    if override_params:
+        # ValueError will be raised here if override_params has fields not included in global_params.
+        global_params = global_params._replace(**override_params)
+    return blocks_args, global_params
+# train with Standard methods
+# check more details in paper(EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks)
+url_map = {
+    'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth',
+    'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth',
+    'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth',
+    'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth',
+    'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth',
+    'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth',
+    'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth',
+    'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth',
+}
+# train with Adversarial Examples(AdvProp)
+# check more details in paper(Adversarial Examples Improve Image Recognition)
+url_map_advprop = {
+    'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth',
+    'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b1-0f3ce85a.pth',
+    'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b2-6e9d97e5.pth',
+    'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b3-cdd7c0f4.pth',
+    'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b4-44fb3a87.pth',
+    'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth',
+    'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b6-ac80338e.pth',
+    'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth',
+    'efficientnet-b8': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b8-22a8fe65.pth',
+}
+# TODO: add the petrained weights url map of 'efficientnet-l2'
+def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True, advprop=False, verbose=True):
+    """Loads pretrained weights from weights path or download using url.
+    Args:
+        model (Module): The whole model of efficientnet.
+        model_name (str): Model name of efficientnet.
+        weights_path (None or str):
+            str: path to pretrained weights file on the local disk.
+            None: use pretrained weights downloaded from the Internet.
+        load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model.
+        advprop (bool): Whether to load pretrained weights
+                        trained with advprop (valid when weights_path is None).
+    """
+    if isinstance(weights_path, str):
+        state_dict = torch.load(weights_path)
+    else:
+        # AutoAugment or Advprop (different preprocessing)
+        url_map_ = url_map_advprop if advprop else url_map
+        state_dict = model_zoo.load_url(url_map_[model_name])
+    if load_fc:
+        ret = model.load_state_dict(state_dict, strict=False)
+        # assert not ret.missing_keys, 'Missing keys when loading pretrained weights: {}'.format(ret.missing_keys)
+    else:
+        state_dict.pop('_fc.weight')
+        state_dict.pop('_fc.bias')
+        ret = model.load_state_dict(state_dict, strict=False)
+        # assert set(ret.missing_keys) == set(
+            # ['_fc.weight', '_fc.bias']), 'Missing keys when loading pretrained weights: {}'.format(ret.missing_keys)
+    # assert not ret.unexpected_keys, 'Missing keys when loading pretrained weights: {}'.format(ret.unexpected_keys)
+    if verbose:
+        print('Loaded pretrained weights for {}'.format(model_name))

harmonizer/src/model/enhancer.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import torch
+from torch import nn
+import torch.nn.functional as F
+import torchvision.transforms.functional as tf
+from .filter import Filter
+from .backbone import EfficientBackboneCommon
+from .module import CascadeArgumentRegressor, FilterPerformer
+class Enhancer(nn.Module):
+    def __init__(self):
+        super(Enhancer, self).__init__()
+        self.input_size = (256, 256)
+        self.filter_types = [
+            Filter.BRIGHTNESS,
+            Filter.CONTRAST,
+            Filter.SATURATION,
+            Filter.HIGHLIGHT,
+            Filter.SHADOW,
+        ]
+        self.backbone = EfficientBackboneCommon.from_name('efficientnet-b0')
+        self.regressor = CascadeArgumentRegressor(1280, 160, 1, len(self.filter_types))
+        self.performer = FilterPerformer(self.filter_types)
+    def predict_arguments(self, x, mask):
+        x = F.interpolate(x, self.input_size, mode='bilinear', align_corners=False)
+        enc2x, enc4x, enc8x, enc16x, enc32x = self.backbone(x)
+        arguments = self.regressor(enc32x)
+        return arguments
+    def restore_image(self, x, mask, arguments):
+        assert len(arguments) == len(self.filter_types)
+        arguments = [torch.clamp(arg, -1, 1).view(-1, 1, 1, 1) for arg in arguments]
+        return self.performer.restore(x, mask, arguments)

harmonizer/src/model/filter.py ADDED Viewed

	@@ -0,0 +1,231 @@

+import math
+from enum import Enum
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import kornia
+class BrightnessFilter(nn.Module):
+    def __init__(self):
+        super(BrightnessFilter, self).__init__()
+        self.epsilon = 1e-6
+    def forward(self, image, x):
+        """
+        Arguments:
+            image (tensor [n, 3, h, w]): RGB image with pixel values between [0, 1]
+            x (tensor [n, 1, 1, 1]): brightness argument with values between [-1, 1]
+        """
+        # convert image from RGB to HSV
+        image = kornia.color.rgb_to_hsv(image)
+        h = image[:,0:1,:,:]
+        s = image[:,1:2,:,:]
+        v = image[:,2:3,:,:]
+        # calculate alpha
+        amask = (x >= 0).float()
+        alpha = (1 / ((1 - x) + self.epsilon)) * amask + (x + 1) * (1 - amask)
+        # adjust the V channel
+        v = v * alpha
+        # convert image from HSV to RGB
+        image = torch.cat((h, s, v), dim=1)
+        image = kornia.color.hsv_to_rgb(image)
+        # clip pixel values to [0, 1]
+        image = torch.clamp(image, 0.0, 1.0)
+        return image
+class ContrastFilter(nn.Module):
+    def __init__(self):
+        super(ContrastFilter, self).__init__()
+    def forward(self, image, x):
+        """
+        Arguments:
+            image(tensor [n, 3, h, w]): RGB image with pixel values between [0, 1]
+            x (tensor [n, 1, 1, 1]): contrast argument with values between [-1, 1]
+        """
+        # calculate the mean of the image as the threshold
+        threshold = torch.mean(image, dim=(1, 2, 3), keepdim=True)
+        # pre-process x if it is a positive value
+        mask = (x.detach() > 0).float()
+        x_ = 255 / (256 - torch.floor(x * 255)) - 1
+        x_ = x * (1 - mask) + x_ * mask
+        # modify the contrast of the image
+        image = image + (image - threshold) * x_
+        # clip pixel values to [0, 1]
+        image = torch.clamp(image, 0.0, 1.0)
+        return image
+class SaturationFilter(nn.Module):
+    def __init__(self):
+        super(SaturationFilter, self).__init__()
+        self.epsilon = 1e-6
+    def forward(self, image, x):
+        """
+        Arguments:
+            image(tensor [n, 3, h, w]): RGB image with pixel values between [0, 1]
+            x (tensor [n, 1, 1, 1]): saturation argument with values between [-1, 1]
+        """
+        # calculate the basic properties of the image
+        cmin = torch.min(image, dim=1, keepdim=True)[0]
+        cmax = torch.max(image, dim=1, keepdim=True)[0]
+        var = cmax - cmin
+        ran = cmax + cmin
+        mean = ran / 2
+        is_positive = (x.detach() >= 0).float()
+        # calculate s
+        m = (mean < 0.5).float()
+        s = (var / (ran + self.epsilon)) * m + (var / (2 - ran + self.epsilon)) * (1 - m)
+        # if x is positive
+        m = ((x + s) > 1).float()
+        a_pos = s * m + (1 - x) * (1 - m)
+        a_pos = 1 / (a_pos + self.epsilon) - 1
+        # if x is negtive
+        a_neg = 1 + x
+        a = a_pos * is_positive + a_neg * (1 - is_positive)
+        image = image * is_positive + mean * (1 - is_positive) + (image - mean) * a
+        # clip pixel values to [0, 1]
+        image = torch.clamp(image, 0.0, 1.0)
+        return image
+class TemperatureFilter(nn.Module):
+    def __init__(self):
+        super(TemperatureFilter, self).__init__()
+        self.epsilon = 1e-6
+    def forward(self, image, x):
+        """
+        Arguments:
+            image(tensor [n, 3, h, w]): RGB image with pixel values between [0, 1]
+            x (tensor [n, 1, 1, 1]): color temperature argument with values between [-1, 1]
+        """
+        # split the R/G/B channels
+        R, G, B = image[:, 0:1, ...], image[:, 1:2, ...], image[:, 2:3, ...]
+        # calculate the mean of each channel
+        meanR = torch.mean(R, dim=(2, 3), keepdim=True)
+        meanG = torch.mean(G, dim=(2, 3), keepdim=True)
+        meanB = torch.mean(B, dim=(2, 3), keepdim=True)
+        # calculate correction factors
+        gray = (meanR + meanG + meanB) / 3
+        coefR = gray / (meanR + self.epsilon)
+        coefG = gray / (meanG + self.epsilon)
+        coefB = gray / (meanB + self.epsilon)
+        aR = 1 - coefR
+        aG = 1 - coefG
+        aB = 1 - coefB
+        # adjust temperature
+        is_positive = (x.detach() > 0).float()
+        is_negative = (x.detach() < 0).float()
+        is_zero = (x.detach() == 0).float()
+        meanR_ = meanR + x * torch.sign(x) * is_negative
+        meanG_ = meanG + x * torch.sign(x) * 0.5 * (1 - is_zero)
+        meanB_ = meanB + x * torch.sign(x) * is_positive
+        gray_ = (meanR_ + meanG_ + meanB_) / 3
+        coefR_ = gray_ / (meanR_ + self.epsilon) + aR
+        coefG_ = gray_ / (meanG_ + self.epsilon) + aG
+        coefB_ = gray_ / (meanB_ + self.epsilon) + aB
+        R_ = coefR_ * R
+        G_ = coefG_ * G
+        B_ = coefB_ * B
+        # the RGB image with the adjusted brightness
+        image = torch.cat((R_, G_, B_), dim=1)
+        # clip pixel values to [0, 1]
+        image = torch.clamp(image, 0.0, 1.0)
+        return image
+class HighlightFilter(nn.Module):
+    def __init__(self):
+        super(HighlightFilter, self).__init__()
+    def forward(self, image, x):
+        """
+        Arguments:
+            image(tensor [n, 3, h, w]): RGB image with pixel values between [0, 1]
+            x (tensor [n, 1, 1, 1]): highlight argument with values between [-1, 1]
+        """
+        x = x + 1
+        image = kornia.enhance.invert(image, image.detach() * 0 + 1)
+        image = torch.clamp(torch.pow(image + 1e-9, x), 0.0, 1.0)
+        image = kornia.enhance.invert(image, image.detach() * 0 + 1)
+        # clip pixel values to [0, 1]
+        image = torch.clamp(image, 0.0, 1.0)
+        return image
+class ShadowFilter(nn.Module):
+    def __init__(self):
+        super(ShadowFilter, self).__init__()
+    def forward(self, image, x):
+        """
+        Arguments:
+            image(tensor [n, 3, h, w]): RGB image with pixel values between [0, 1]
+            x (tensor [n, 1, 1, 1]): shadow argument with values between [-1, 1]
+        """
+        x = -x + 1
+        image = torch.clamp(torch.pow(image + 1e-9, x), 0.0, 1.0)
+        # clip pixel values to [0, 1]
+        image = torch.clamp(image, 0.0, 1.0)
+        return image
+class Filter(Enum):
+    BRIGHTNESS = 1
+    CONTRAST = 2
+    SATURATION = 3
+    TEMPERATURE = 4
+    HIGHLIGHT = 5
+    SHADOW = 6
+FILTER_MODULES = {
+    Filter.BRIGHTNESS: BrightnessFilter,
+    Filter.CONTRAST: ContrastFilter,
+    Filter.SATURATION: SaturationFilter,
+    Filter.TEMPERATURE: TemperatureFilter,
+    Filter.HIGHLIGHT: HighlightFilter,
+    Filter.SHADOW: ShadowFilter,
+}

harmonizer/src/model/harmonizer.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import torch
+from torch import nn
+import torch.nn.functional as F
+import torchvision.transforms.functional as tf
+from .filter import Filter
+from .backbone import EfficientBackbone
+from .module import CascadeArgumentRegressor, FilterPerformer
+class Harmonizer(nn.Module):
+    def __init__(self):
+        super(Harmonizer, self).__init__()
+        self.input_size = (256, 256)
+        self.filter_types = [
+            Filter.TEMPERATURE,
+            Filter.BRIGHTNESS,
+            Filter.CONTRAST,
+            Filter.SATURATION,
+            Filter.HIGHLIGHT,
+            Filter.SHADOW,
+        ]
+        self.backbone = EfficientBackbone.from_name('efficientnet-b0')
+        self.regressor = CascadeArgumentRegressor(1280, 160, 1, len(self.filter_types))
+        self.performer = FilterPerformer(self.filter_types)
+    def predict_arguments(self, comp, mask):
+        comp = F.interpolate(comp, self.input_size, mode='bilinear', align_corners=False)
+        mask = F.interpolate(mask, self.input_size, mode='bilinear', align_corners=False)
+        fg = torch.cat((comp, mask), dim=1)
+        bg = torch.cat((comp, (1 - mask)), dim=1)
+        enc2x, enc4x, enc8x, enc16x, enc32x = self.backbone(fg, bg)
+        arguments = self.regressor(enc32x)
+        return arguments
+    def restore_image(self, comp, mask, arguments):
+        assert len(arguments) == len(self.filter_types)
+        arguments = [torch.clamp(arg, -1, 1).view(-1, 1, 1, 1) for arg in arguments]
+        return self.performer.restore(comp, mask, arguments)

harmonizer/src/model/module.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import cv2
+import math
+from enum import Enum
+import torch
+from torch import nn
+import torch.nn.functional as F
+from .filter import Filter, FILTER_MODULES
+class CascadeArgumentRegressor(nn.Module):
+    def __init__(self, in_channels, base_channels, out_channels, head_num):
+        super(CascadeArgumentRegressor, self).__init__()
+        self.in_channels = in_channels
+        self.base_channels = base_channels
+        self.out_channels = out_channels
+        self.head_num = head_num
+        self.pool = nn.AdaptiveAvgPool2d((1, 1))
+        self.f = nn.Linear(self.in_channels, 160)
+        self.g = nn.Linear(self.in_channels, self.base_channels)
+        self.headers = nn.ModuleList()
+        for i in range(0, self.head_num):
+            self.headers.append(
+                nn.ModuleList([
+                    nn.Linear(160 + self.base_channels, self.base_channels),
+                    nn.Linear(self.base_channels, self.out_channels),
+                ])
+            )
+    def forward(self, x):
+        x = self.pool(x)
+        n, c, _, _ = x.shape
+        x = x.view(n, c)
+        f = self.f(x)
+        g = self.g(x)
+        pred_args = []
+        for i in range(0, self.head_num):
+            g = self.headers[i][0](torch.cat((f, g), dim=1))
+            pred_args.append(self.headers[i][1](g))
+        return pred_args
+class FilterPerformer(nn.Module):
+    def __init__(self, filter_types):
+        super(FilterPerformer, self).__init__()
+        self.filters = [FILTER_MODULES[filter_type]() for filter_type in filter_types]
+    def forward(self):
+        pass
+    def restore(self, x, mask, arguments):
+        assert len(self.filters) == len(arguments)
+        outputs = []
+        _image = x
+        for filter, arg in zip(self.filters, arguments):
+            _image = filter(_image, arg)
+            outputs.append(_image * mask + x * (1 - mask))
+        return outputs
+    def adjust(self, image, mask, arguments):
+        assert len(self.filters) == len(arguments)
+        outputs = []
+        _image = image
+        for filter, arg in zip(reversed(self.filters), reversed(arguments)):
+            _image = filter(_image, arg)
+            outputs.append(_image * mask + image * (1 - mask))
+        return outputs

harmonizer/src/requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+tqdm
+numpy
+Pillow
+argparse
+scikit-image == 0.19.2
+kornia

harmonizer/src/train/README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+## Quick Start - Training Harmonizer
+1. Download the iHarmony4 dataset and put it in the folder `./harmonizer/dataset/`
+2. Pre-process the iHarmony4 dataset for training.
+We provide the processed Hday2night subset as an example at [this link](https://drive.google.com/drive/folders/1HtrmUlFsT1yIfJ2JkGWwAwFDlv8StD6e?usp=sharing).
+You should convert other subsets to the same format for training.
+Otherwise, you need to implement new dataset loaders in the file `./harmonizer/data.py` to load datasets with other formats.
+3. Run the training script by:
+    ```
+    cd ./harmonizer
+    python -m script.train
+    ```
+    You can config the training arguments in the script.

harmonizer/src/train/harmonizer/__init__.py ADDED Viewed

File without changes

harmonizer/src/train/harmonizer/criterion.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import torch
+import torch.nn as nn
+import torchtask
+def add_parser_arguments(parser):
+    torchtask.criterion_template.add_parser_arguments(parser)
+def harmonizer_loss():
+    return HarmonizerLoss
+class AbsoluteLoss(nn.Module):
+    def __init__(self, epsilon=1e-6):
+        super(AbsoluteLoss, self).__init__()
+        self.epsilon = epsilon
+    def forward(self, pred, gt):
+        loss = torch.sqrt((pred - gt) ** 2 + self.epsilon)
+        return loss
+class HarmonizerLoss(torchtask.criterion_template.TaskCriterion):
+    def __init__(self, args):
+        super(HarmonizerLoss, self).__init__(args)
+        self.l1 = AbsoluteLoss()
+        self.l2 = nn.MSELoss(reduction='none')
+    def forward(self, pred, gt, inp):
+        pred_outputs, = pred
+        x, mask = inp
+        assert len(pred_outputs) == len(gt)
+        image_losses = []
+        for pred_, gt_ in zip(pred_outputs, gt):
+            l1_loss = torch.sum(self.l1(pred_, gt_) * mask, dim=(1, 2, 3)) / (torch.sum(mask, dim=(1, 2, 3)) + 1e-6)
+            l2_loss = torch.sum(self.l2(pred_, gt_) * mask, dim=(1, 2, 3)) / (torch.sum(mask, dim=(1, 2, 3)) + 1e-6) * 10
+            loss = (l1_loss + l2_loss)
+            image_losses.append(loss)
+        return image_losses

harmonizer/src/train/harmonizer/data.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import os
+import cv2
+import random
+import numpy as np
+from PIL import Image
+from torchvision import transforms
+import torchtask
+def add_parser_arguments(parser):
+    torchtask.data_template.add_parser_arguments(parser)
+def harmonizer_iharmony4():
+    return HarmonizerIHarmony4
+def original_iharmony4():
+    return OriginalIHarmony4
+def resize(img, size):
+    interp = cv2.INTER_LINEAR
+    return Image.fromarray(
+        cv2.resize(np.array(img).astype('uint8'), size, interpolation=interp))
+im_train_transform = transforms.Compose([
+    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.03),
+    transforms.ToTensor(),
+])
+im_val_transform = transforms.Compose([
+    transforms.ToTensor(),
+])
+class HarmonizerIHarmony4(torchtask.data_template.TaskDataset):
+    def __init__(self, args, is_train):
+        super(HarmonizerIHarmony4, self).__init__(args, is_train)
+        self.im_dir = os.path.join(self.root_dir, 'image')
+        self.mask_dir = os.path.join(self.root_dir, 'mask')
+        if not os.path.exists(self.mask_dir):
+            self.mask_dir = os.path.join(self.root_dir, 'matte')
+        self.sample_list = [_ for _ in os.listdir(self.im_dir)]
+        self.idxs = [_ for _ in range(0, len(self.sample_list))]
+        self.im_size = self.args.im_size
+        self.rotation = True if self.is_train else False
+        self.fliplr = True if self.is_train else False
+    def __getitem__(self, idx):
+        image_path = os.path.join(self.im_dir, self.sample_list[idx])
+        mask_path = os.path.join(self.mask_dir, self.sample_list[idx])
+        image = self.im_loader.load(image_path)
+        mask = self.im_loader.load(mask_path)
+        width, height = image.size
+        # resize to self.im_size
+        image = resize(image, (self.im_size, self.im_size))
+        mask = resize(mask, (self.im_size, self.im_size))
+        # convert to np array and scale to [0, 1]
+        image = np.array(image).astype('float32') / 255.0
+        mask = np.array(mask).astype('float32') / 255.0
+        # check image shape
+        if len(mask.shape) == 3:
+            mask = mask[:, :, -1]
+        if len(image.shape) == 2:
+            image = image[:, :, None]
+        if image.shape[2] == 1:
+            image = np.repeat(image, 3, axis=2)
+        elif image.shape[2] == 4:
+            image = image[:, :, 0:3]
+        # random rotate
+        rerotation = 0
+        if self.rotation and random.randint(0, 1) == 0:
+            rotate_num = random.randint(1, 3)
+            rerotation = 4 - rotate_num
+            image = np.rot90(image, k=rotate_num).copy()
+            mask =  np.rot90(mask, k=rotate_num).copy()
+        # random flip
+        if self.fliplr and (random.randint(0, 1) == 0):
+            image = np.fliplr(image).copy()
+            mask = np.fliplr(mask).copy()
+        image = Image.fromarray((image * 255.0).astype('uint8'))
+        if self.is_train:
+            image = im_train_transform(image)
+        else:
+            image = im_val_transform(image)
+        mask = mask[None, :, :]
+        adjusted = image.numpy() * -1
+        return (adjusted, mask), (image, )
+class OriginalIHarmony4(torchtask.data_template.TaskDataset):
+    def __init__(self, args, is_train):
+        super(OriginalIHarmony4, self).__init__(args, is_train)
+        self.adjusted_dir = os.path.join(self.root_dir, 'comp')
+        self.mask_dir = os.path.join(self.root_dir, 'mask')
+        self.im_dir = os.path.join(self.root_dir, 'image')
+        self.sample_list = [_ for _ in os.listdir(self.adjusted_dir)]
+        self.idxs = [_ for _ in range(0, len(self.sample_list))]
+        self.im_size = self.args.im_size
+        self.rotation = True if self.is_train else False
+        self.fliplr = True if self.is_train else False
+    def __getitem__(self, idx):
+        sname = self.sample_list[idx]
+        adjusted_path = os.path.join(self.adjusted_dir, sname)
+        image_path = os.path.join(self.im_dir, sname)
+        mask_path = os.path.join(self.mask_dir, sname)
+        if not os.path.exists(image_path):
+            prefix = '_'.join(sname.split('_')[:-1])
+            image_path = os.path.join(self.im_dir, '{0}.jpg'.format(prefix))
+            mask_path = os.path.join(self.mask_dir, '{0}.jpg'.format(prefix))
+        adjusted = self.im_loader.load(adjusted_path)
+        image = self.im_loader.load(image_path)
+        mask = self.im_loader.load(mask_path)
+        width, height = image.size
+        # resize to self.im_size
+        adjusted = resize(adjusted, (self.im_size, self.im_size))
+        image = resize(image, (self.im_size, self.im_size))
+        mask = resize(mask, (self.im_size, self.im_size))
+        # convert to np array and scale to [0, 1]
+        adjusted = np.array(adjusted).astype('float32') / 255.0
+        image = np.array(image).astype('float32') / 255.0
+        mask = np.array(mask).astype('float32') / 255.0
+        # check image shape
+        if len(mask.shape) == 3:
+            mask = mask[:, :, -1]
+        if len(image.shape) == 2:
+            image = image[:, :, None]
+        if image.shape[2] == 1:
+            image = np.repeat(image, 3, axis=2)
+        elif image.shape[2] == 4:
+            image = image[:, :, 0:3]
+        if len(adjusted.shape) == 2:
+            adjusted = adjusted[:, :, None]
+        if adjusted.shape[2] == 1:
+            adjusted = np.repeat(adjusted, 3, axis=2)
+        elif adjusted.shape[2] == 4:
+            adjusted = adjusted[:, :, 0:3]
+        # random rotate
+        rerotation = 0
+        if self.rotation and random.randint(0, 1) == 0:
+            rotate_num = random.randint(1, 3)
+            rerotation = 4 - rotate_num
+            adjusted = np.rot90(adjusted, k=rotate_num).copy()
+            image = np.rot90(image, k=rotate_num).copy()
+            mask =  np.rot90(mask, k=rotate_num).copy()
+        # random flip
+        if self.fliplr and (random.randint(0, 1) == 0):
+            adjusted = np.fliplr(adjusted).copy()
+            image = np.fliplr(image).copy()
+            mask = np.fliplr(mask).copy()
+        adjusted = Image.fromarray((adjusted * 255.0).astype('uint8'))
+        image = Image.fromarray((image * 255.0).astype('uint8'))
+        # NOTE: do not add random color adjustement here
+        adjusted = im_val_transform(adjusted)
+        image = im_val_transform(image)
+        mask = mask[None, :, :]
+        return (adjusted, mask), (image, )

harmonizer/src/train/harmonizer/func.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import numpy as np
+import torch
+import skimage
+import torchtask
+def task_func():
+    return HarmonizationFunc
+class HarmonizationFunc(torchtask.func_template.TaskFunc):
+    def __init__(self, args):
+        super(HarmonizationFunc, self).__init__(args)
+    def metrics(self, pred_image, gt_image, mask, meters, id_str=''):
+        n, c, h, w = pred_image.shape
+        assert n == 1
+        total_pixels = h * w
+        fg_pixels = int(torch.sum(mask, dim=(2, 3))[0][0].cpu().numpy())
+        pred_image = torch.clamp(pred_image * 255, 0, 255)
+        gt_image = torch.clamp(gt_image * 255, 0, 255)
+        pred_image = pred_image[0].permute(1, 2, 0).detach().cpu().numpy()
+        gt_image = gt_image[0].permute(1, 2, 0).detach().cpu().numpy()
+        mask = mask[0].permute(1, 2, 0).detach().cpu().numpy()
+        batch_mse = skimage.metrics.mean_squared_error(pred_image, gt_image)
+        meters.update('{0}_{1}_mse'.format(id_str, self.METRIC_STR), batch_mse)
+        batch_fmse = skimage.metrics.mean_squared_error(pred_image * mask, gt_image * mask) * total_pixels / fg_pixels
+        meters.update('{0}_{1}_fmse'.format(id_str, self.METRIC_STR), batch_fmse)
+        batch_psnr = skimage.metrics.peak_signal_noise_ratio(pred_image, gt_image, data_range=pred_image.max() - pred_image.min())
+        meters.update('{0}_{1}_psnr'.format(id_str, self.METRIC_STR), batch_psnr)
+        batch_ssim = skimage.metrics.structural_similarity(pred_image, gt_image, multichannel=True)
+        meters.update('{0}_{1}_ssim'.format(id_str, self.METRIC_STR), batch_ssim)

harmonizer/src/train/harmonizer/model.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torch
+import torch.nn.functional as F
+import torchtask
+from module import harmonizer as _harmonizer
+def add_parser_arguments(parser):
+    torchtask.model_template.add_parser_arguments(parser)
+def harmonizer():
+    return Harmonizer
+class Harmonizer(torchtask.model_template.TaskModel):
+    def __init__(self, args):
+        super(Harmonizer, self).__init__(args)
+        self.model = _harmonizer.Harmonizer()
+        self.param_groups = [
+            {'params': filter(lambda p:p.requires_grad, self.model.backbone.parameters()), 'lr': self.args.lr},
+            {'params': filter(lambda p:p.requires_grad, self.model.regressor.parameters()), 'lr': self.args.lr},
+            {'params': filter(lambda p:p.requires_grad, self.model.performer.parameters()), 'lr': self.args.lr},
+        ]
+    def forward(self, inp):
+        resulter, debugger = {}, {}
+        x, mask = inp
+        pred = self.model(x, mask)
+        resulter['outputs'] = pred
+        return resulter, debugger
+    def restore(self, x, mask, arguments):
+        with torch.no_grad():
+            return self.model.restore_image(x, mask, arguments)
+    def adjust(self, x, mask, arguments):
+        with torch.no_grad():
+            return self.model.adjust_image(x, mask, arguments)

harmonizer/src/train/harmonizer/module/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .harmonizer import Harmonizer

harmonizer/src/train/harmonizer/module/backbone/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .efficientnet import EfficientBackbone, EfficientBackboneCommon

harmonizer/src/train/harmonizer/module/backbone/efficientnet/__init__.py ADDED Viewed

	@@ -0,0 +1,116 @@

+"""
+This EfficientNet implementation comes from:
+    Author: lukemelas (github username)
+    Github repo: https://github.com/lukemelas/EfficientNet-PyTorch
+"""
+import torch
+import torch.nn as nn
+from .model import EfficientNet
+from .utils import round_filters, get_same_padding_conv2d
+# for EfficientNet
+class EfficientBackbone(EfficientNet):
+    def __init__(self, blocks_args=None, global_params=None):
+        super(EfficientBackbone, self).__init__(blocks_args, global_params)
+        self.enc_channels = [16, 24, 40, 112, 1280]
+        # ------------------------------------------------------------
+        # delete the useless layers
+        # ------------------------------------------------------------
+        del self._conv_stem
+        del self._bn0
+        # ------------------------------------------------------------
+        # ------------------------------------------------------------
+        # parameters for the input layers
+        # ------------------------------------------------------------
+        bn_mom = 1 - self._global_params.batch_norm_momentum
+        bn_eps = self._global_params.batch_norm_epsilon
+        in_channels = 4
+        out_channels = round_filters(32, self._global_params)
+        out_channels = int(out_channels / 2)
+        # ------------------------------------------------------------
+        # ------------------------------------------------------------
+        # define the input layers
+        # ------------------------------------------------------------
+        image_size = global_params.image_size
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        self._conv_fg = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
+        self._bn_fg = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
+        self._conv_bg = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
+        self._bn_bg = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
+        # ------------------------------------------------------------
+    def forward(self, xfg, xbg):
+        xfg = self._swish(self._bn_fg(self._conv_fg(xfg)))
+        xbg = self._swish(self._bn_bg(self._conv_bg(xbg)))
+        x = torch.cat((xfg, xbg), dim=1)
+        block_outputs = []
+        for idx, block in enumerate(self._blocks):
+            drop_connect_rate = self._global_params.drop_connect_rate
+            drop_connect_rate *= float(idx) / len(self._blocks)
+            x = block(x, drop_connect_rate=drop_connect_rate)
+            block_outputs.append(x)
+        # Head
+        x = self._swish(self._bn1(self._conv_head(x)))
+        return block_outputs[0], block_outputs[2], block_outputs[4], block_outputs[10], x
+# for EfficientNet
+class EfficientBackboneCommon(EfficientNet):
+    def __init__(self, blocks_args=None, global_params=None):
+        super(EfficientBackboneCommon, self).__init__(blocks_args, global_params)
+        self.enc_channels = [16, 24, 40, 112, 1280]
+        # ------------------------------------------------------------
+        # delete the useless layers
+        # ------------------------------------------------------------
+        del self._conv_stem
+        del self._bn0
+        # ------------------------------------------------------------
+        # ------------------------------------------------------------
+        # parameters for the input layers
+        # ------------------------------------------------------------
+        bn_mom = 1 - self._global_params.batch_norm_momentum
+        bn_eps = self._global_params.batch_norm_epsilon
+        in_channels = 3
+        out_channels = round_filters(32, self._global_params)
+        # ------------------------------------------------------------
+        # ------------------------------------------------------------
+        # define the input layers
+        # ------------------------------------------------------------
+        image_size = global_params.image_size
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        self._conv = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
+        self._bn = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
+        # ------------------------------------------------------------
+    def forward(self, x):
+        x = self._swish(self._bn(self._conv(x)))
+        block_outputs = []
+        for idx, block in enumerate(self._blocks):
+            drop_connect_rate = self._global_params.drop_connect_rate
+            drop_connect_rate *= float(idx) / len(self._blocks)
+            x = block(x, drop_connect_rate=drop_connect_rate)
+            block_outputs.append(x)
+        # Head
+        x = self._swish(self._bn1(self._conv_head(x)))
+        return block_outputs[0], block_outputs[2], block_outputs[4], block_outputs[10], x

harmonizer/src/train/harmonizer/module/backbone/efficientnet/model.py ADDED Viewed

	@@ -0,0 +1,395 @@

+"""model.py - Model and module class for EfficientNet.
+   They are built to mirror those in the official TensorFlow implementation.
+"""
+# Author: lukemelas (github username)
+# Github repo: https://github.com/lukemelas/EfficientNet-PyTorch
+# With adjustments and added comments by workingcoder (github username).
+import torch
+from torch import nn
+from torch.nn import functional as F
+from .utils import (
+    round_filters,
+    round_repeats,
+    drop_connect,
+    get_same_padding_conv2d,
+    get_model_params,
+    efficientnet_params,
+    load_pretrained_weights,
+    Swish,
+    MemoryEfficientSwish,
+    calculate_output_image_size
+)
+VALID_MODELS = (
+    'efficientnet-b0', 'efficientnet-b1', 'efficientnet-b2', 'efficientnet-b3',
+    'efficientnet-b4', 'efficientnet-b5', 'efficientnet-b6', 'efficientnet-b7',
+    'efficientnet-b8',
+    # Support the construction of 'efficientnet-l2' without pretrained weights
+    'efficientnet-l2'
+)
+class MBConvBlock(nn.Module):
+    """Mobile Inverted Residual Bottleneck Block.
+    Args:
+        block_args (namedtuple): BlockArgs, defined in utils.py.
+        global_params (namedtuple): GlobalParam, defined in utils.py.
+        image_size (tuple or list): [image_height, image_width].
+    References:
+        [1] https://arxiv.org/abs/1704.04861 (MobileNet v1)
+        [2] https://arxiv.org/abs/1801.04381 (MobileNet v2)
+        [3] https://arxiv.org/abs/1905.02244 (MobileNet v3)
+    """
+    def __init__(self, block_args, global_params, image_size=None):
+        super().__init__()
+        self._block_args = block_args
+        self._bn_mom = 1 - global_params.batch_norm_momentum  # pytorch's difference from tensorflow
+        self._bn_eps = global_params.batch_norm_epsilon
+        self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
+        self.id_skip = block_args.id_skip  # whether to use skip connection and drop connect
+        # Expansion phase (Inverted Bottleneck)
+        inp = self._block_args.input_filters  # number of input channels
+        oup = self._block_args.input_filters * self._block_args.expand_ratio  # number of output channels
+        if self._block_args.expand_ratio != 1:
+            Conv2d = get_same_padding_conv2d(image_size=image_size)
+            self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
+            self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
+            # image_size = calculate_output_image_size(image_size, 1) <-- this wouldn't modify image_size
+        # Depthwise convolution phase
+        k = self._block_args.kernel_size
+        s = self._block_args.stride
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        self._depthwise_conv = Conv2d(
+            in_channels=oup, out_channels=oup, groups=oup,  # groups makes it depthwise
+            kernel_size=k, stride=s, bias=False)
+        self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
+        image_size = calculate_output_image_size(image_size, s)
+        # Squeeze and Excitation layer, if desired
+        if self.has_se:
+            Conv2d = get_same_padding_conv2d(image_size=(1, 1))
+            num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
+            self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
+            self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
+        # Pointwise convolution phase
+        final_oup = self._block_args.output_filters
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
+        self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
+        self._swish = MemoryEfficientSwish()
+    def forward(self, inputs, drop_connect_rate=None):
+        """MBConvBlock's forward function.
+        Args:
+            inputs (tensor): Input tensor.
+            drop_connect_rate (bool): Drop connect rate (float, between 0 and 1).
+        Returns:
+            Output of this block after processing.
+        """
+        # Expansion and Depthwise Convolution
+        x = inputs
+        if self._block_args.expand_ratio != 1:
+            x = self._expand_conv(inputs)
+            x = self._bn0(x)
+            x = self._swish(x)
+        x = self._depthwise_conv(x)
+        x = self._bn1(x)
+        x = self._swish(x)
+        # Squeeze and Excitation
+        if self.has_se:
+            x_squeezed = F.adaptive_avg_pool2d(x, 1)
+            x_squeezed = self._se_reduce(x_squeezed)
+            x_squeezed = self._swish(x_squeezed)
+            x_squeezed = self._se_expand(x_squeezed)
+            x = torch.sigmoid(x_squeezed) * x
+        # Pointwise Convolution
+        x = self._project_conv(x)
+        x = self._bn2(x)
+        # Skip connection and drop connect
+        input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
+        if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters:
+            # The combination of skip connection and drop connect brings about stochastic depth.
+            if drop_connect_rate:
+                x = drop_connect(x, p=drop_connect_rate, training=self.training)
+            x = x + inputs  # skip connection
+        return x
+    def set_swish(self, memory_efficient=True):
+        """Sets swish function as memory efficient (for training) or standard (for export).
+        Args:
+            memory_efficient (bool): Whether to use memory-efficient version of swish.
+        """
+        self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
+class EfficientNet(nn.Module):
+    """EfficientNet model.
+       Most easily loaded with the .from_name or .from_pretrained methods.
+    Args:
+        blocks_args (list[namedtuple]): A list of BlockArgs to construct blocks.
+        global_params (namedtuple): A set of GlobalParams shared between blocks.
+    References:
+        [1] https://arxiv.org/abs/1905.11946 (EfficientNet)
+    Example:
+        >>> import torch
+        >>> from efficientnet.model import EfficientNet
+        >>> inputs = torch.rand(1, 3, 224, 224)
+        >>> model = EfficientNet.from_pretrained('efficientnet-b0')
+        >>> model.eval()
+        >>> outputs = model(inputs)
+    """
+    def __init__(self, blocks_args=None, global_params=None):
+        super().__init__()
+        assert isinstance(blocks_args, list), 'blocks_args should be a list'
+        assert len(blocks_args) > 0, 'block args must be greater than 0'
+        self._global_params = global_params
+        self._blocks_args = blocks_args
+        # Batch norm parameters
+        bn_mom = 1 - self._global_params.batch_norm_momentum
+        bn_eps = self._global_params.batch_norm_epsilon
+        # Get stem static or dynamic convolution depending on image size
+        image_size = global_params.image_size
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        # Stem
+        in_channels = 3  # rgb
+        out_channels = round_filters(32, self._global_params)  # number of output channels
+        self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
+        self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
+        image_size = calculate_output_image_size(image_size, 2)
+        # Build blocks
+        self._blocks = nn.ModuleList([])
+        for block_args in self._blocks_args:
+            # Update block input and output filters based on depth multiplier.
+            block_args = block_args._replace(
+                input_filters=round_filters(block_args.input_filters, self._global_params),
+                output_filters=round_filters(block_args.output_filters, self._global_params),
+                num_repeat=round_repeats(block_args.num_repeat, self._global_params)
+            )
+            # The first block needs to take care of stride and filter size increase.
+            self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
+            image_size = calculate_output_image_size(image_size, block_args.stride)
+            if block_args.num_repeat > 1:  # modify block_args to keep same output size
+                block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
+            for _ in range(block_args.num_repeat - 1):
+                self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
+                # image_size = calculate_output_image_size(image_size, block_args.stride)  # stride = 1
+        # Head
+        in_channels = block_args.output_filters  # output of final block
+        out_channels = round_filters(1280, self._global_params)
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
+        self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
+        # Final linear layer
+        self._avg_pooling = nn.AdaptiveAvgPool2d(1)
+        if self._global_params.include_top:
+            self._dropout = nn.Dropout(self._global_params.dropout_rate)
+            self._fc = nn.Linear(out_channels, self._global_params.num_classes)
+        # set activation to memory efficient swish by default
+        self._swish = MemoryEfficientSwish()
+    def set_swish(self, memory_efficient=True):
+        """Sets swish function as memory efficient (for training) or standard (for export).
+        Args:
+            memory_efficient (bool): Whether to use memory-efficient version of swish.
+        """
+        self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
+        for block in self._blocks:
+            block.set_swish(memory_efficient)
+    def extract_endpoints(self, inputs):
+        """Use convolution layer to extract features
+        from reduction levels i in [1, 2, 3, 4, 5].
+        Args:
+            inputs (tensor): Input tensor.
+        Returns:
+            Dictionary of last intermediate features
+            with reduction levels i in [1, 2, 3, 4, 5].
+            Example:
+                >>> import torch
+                >>> from efficientnet.model import EfficientNet
+                >>> inputs = torch.rand(1, 3, 224, 224)
+                >>> model = EfficientNet.from_pretrained('efficientnet-b0')
+                >>> endpoints = model.extract_endpoints(inputs)
+                >>> print(endpoints['reduction_1'].shape)  # torch.Size([1, 16, 112, 112])
+                >>> print(endpoints['reduction_2'].shape)  # torch.Size([1, 24, 56, 56])
+                >>> print(endpoints['reduction_3'].shape)  # torch.Size([1, 40, 28, 28])
+                >>> print(endpoints['reduction_4'].shape)  # torch.Size([1, 112, 14, 14])
+                >>> print(endpoints['reduction_5'].shape)  # torch.Size([1, 320, 7, 7])
+                >>> print(endpoints['reduction_6'].shape)  # torch.Size([1, 1280, 7, 7])
+        """
+        endpoints = dict()
+        # Stem
+        x = self._swish(self._bn0(self._conv_stem(inputs)))
+        prev_x = x
+        # Blocks
+        for idx, block in enumerate(self._blocks):
+            drop_connect_rate = self._global_params.drop_connect_rate
+            if drop_connect_rate:
+                drop_connect_rate *= float(idx) / len(self._blocks)  # scale drop connect_rate
+            x = block(x, drop_connect_rate=drop_connect_rate)
+            if prev_x.size(2) > x.size(2):
+                endpoints['reduction_{}'.format(len(endpoints) + 1)] = prev_x
+            elif idx == len(self._blocks) - 1:
+                endpoints['reduction_{}'.format(len(endpoints) + 1)] = x
+            prev_x = x
+        # Head
+        x = self._swish(self._bn1(self._conv_head(x)))
+        endpoints['reduction_{}'.format(len(endpoints) + 1)] = x
+        return endpoints
+    def extract_features(self, inputs):
+        """use convolution layer to extract feature .
+        Args:
+            inputs (tensor): Input tensor.
+        Returns:
+            Output of the final convolution
+            layer in the efficientnet model.
+        """
+        # Stem
+        x = self._swish(self._bn0(self._conv_stem(inputs)))
+        # Blocks
+        for idx, block in enumerate(self._blocks):
+            drop_connect_rate = self._global_params.drop_connect_rate
+            if drop_connect_rate:
+                drop_connect_rate *= float(idx) / len(self._blocks)  # scale drop connect_rate
+            x = block(x, drop_connect_rate=drop_connect_rate)
+        # Head
+        x = self._swish(self._bn1(self._conv_head(x)))
+        return x
+    def forward(self, inputs):
+        """EfficientNet's forward function.
+           Calls extract_features to extract features, applies final linear layer, and returns logits.
+        Args:
+            inputs (tensor): Input tensor.
+        Returns:
+            Output of this model after processing.
+        """
+        # Convolution layers
+        x = self.extract_features(inputs)
+        # Pooling and final linear layer
+        x = self._avg_pooling(x)
+        if self._global_params.include_top:
+            x = x.flatten(start_dim=1)
+            x = self._dropout(x)
+            x = self._fc(x)
+        return x
+    @classmethod
+    def from_name(cls, model_name, in_channels=3, **override_params):
+        """Create an efficientnet model according to name.
+        Args:
+            model_name (str): Name for efficientnet.
+            in_channels (int): Input data's channel number.
+            override_params (other key word params):
+                Params to override model's global_params.
+                Optional key:
+                    'width_coefficient', 'depth_coefficient',
+                    'image_size', 'dropout_rate',
+                    'num_classes', 'batch_norm_momentum',
+                    'batch_norm_epsilon', 'drop_connect_rate',
+                    'depth_divisor', 'min_depth'
+        Returns:
+            An efficientnet model.
+        """
+        cls._check_model_name_is_valid(model_name)
+        blocks_args, global_params = get_model_params(model_name, override_params)
+        model = cls(blocks_args, global_params)
+        model._change_in_channels(in_channels)
+        return model
+    @classmethod
+    def from_pretrained(cls, model_name, weights_path=None, advprop=False,
+                        in_channels=3, num_classes=1000, **override_params):
+        """Create an efficientnet model according to name.
+        Args:
+            model_name (str): Name for efficientnet.
+            weights_path (None or str):
+                str: path to pretrained weights file on the local disk.
+                None: use pretrained weights downloaded from the Internet.
+            advprop (bool):
+                Whether to load pretrained weights
+                trained with advprop (valid when weights_path is None).
+            in_channels (int): Input data's channel number.
+            num_classes (int):
+                Number of categories for classification.
+                It controls the output size for final linear layer.
+            override_params (other key word params):
+                Params to override model's global_params.
+                Optional key:
+                    'width_coefficient', 'depth_coefficient',
+                    'image_size', 'dropout_rate',
+                    'batch_norm_momentum',
+                    'batch_norm_epsilon', 'drop_connect_rate',
+                    'depth_divisor', 'min_depth'
+        Returns:
+            A pretrained efficientnet model.
+        """
+        model = cls.from_name(model_name, num_classes=num_classes, **override_params)
+        load_pretrained_weights(model, model_name, weights_path=weights_path,
+                                load_fc=(num_classes == 1000), advprop=advprop)
+        model._change_in_channels(in_channels)
+        return model
+    @classmethod
+    def get_image_size(cls, model_name):
+        """Get the input image size for a given efficientnet model.
+        Args:
+            model_name (str): Name for efficientnet.
+        Returns:
+            Input image size (resolution).
+        """
+        cls._check_model_name_is_valid(model_name)
+        _, _, res, _ = efficientnet_params(model_name)
+        return res
+    @classmethod
+    def _check_model_name_is_valid(cls, model_name):
+        """Validates model name.
+        Args:
+            model_name (str): Name for efficientnet.
+        Returns:
+            bool: Is a valid name or not.
+        """
+        if model_name not in VALID_MODELS:
+            raise ValueError('model_name should be one of: ' + ', '.join(VALID_MODELS))
+    def _change_in_channels(self, in_channels):
+        """Adjust model's first convolution layer to in_channels, if in_channels not equals 3.
+        Args:
+            in_channels (int): Input data's channel number.
+        """
+        if in_channels != 3:
+            Conv2d = get_same_padding_conv2d(image_size=self._global_params.image_size)
+            out_channels = round_filters(32, self._global_params)
+            self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)

harmonizer/src/train/harmonizer/module/backbone/efficientnet/utils.py ADDED Viewed

	@@ -0,0 +1,586 @@

+"""utils.py - Helper functions for building the model and for loading model parameters.
+   These helper functions are built to mirror those in the official TensorFlow implementation.
+"""
+# Author: lukemelas (github username)
+# Github repo: https://github.com/lukemelas/EfficientNet-PyTorch
+# With adjustments and added comments by workingcoder (github username).
+import re
+import math
+import collections
+from functools import partial
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.utils import model_zoo
+################################################################################
+# Help functions for model architecture
+################################################################################
+# GlobalParams and BlockArgs: Two namedtuples
+# Swish and MemoryEfficientSwish: Two implementations of the method
+# round_filters and round_repeats:
+#     Functions to calculate params for scaling model width and depth ! ! !
+# get_width_and_height_from_size and calculate_output_image_size
+# drop_connect: A structural design
+# get_same_padding_conv2d:
+#     Conv2dDynamicSamePadding
+#     Conv2dStaticSamePadding
+# get_same_padding_maxPool2d:
+#     MaxPool2dDynamicSamePadding
+#     MaxPool2dStaticSamePadding
+#     It's an additional function, not used in EfficientNet,
+#     but can be used in other model (such as EfficientDet).
+# Parameters for the entire model (stem, all blocks, and head)
+GlobalParams = collections.namedtuple('GlobalParams', [
+    'width_coefficient', 'depth_coefficient', 'image_size', 'dropout_rate',
+    'num_classes', 'batch_norm_momentum', 'batch_norm_epsilon',
+    'drop_connect_rate', 'depth_divisor', 'min_depth', 'include_top'])
+# Parameters for an individual model block
+BlockArgs = collections.namedtuple('BlockArgs', [
+    'num_repeat', 'kernel_size', 'stride', 'expand_ratio',
+    'input_filters', 'output_filters', 'se_ratio', 'id_skip'])
+# Set GlobalParams and BlockArgs's defaults
+GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
+BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
+# Swish activation function
+if hasattr(nn, 'SiLU'):
+    Swish = nn.SiLU
+else:
+    # For compatibility with old PyTorch versions
+    class Swish(nn.Module):
+        def forward(self, x):
+            return x * torch.sigmoid(x)
+# A memory-efficient implementation of Swish function
+class SwishImplementation(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, i):
+        result = i * torch.sigmoid(i)
+        ctx.save_for_backward(i)
+        return result
+    @staticmethod
+    def backward(ctx, grad_output):
+        i = ctx.saved_tensors[0]
+        sigmoid_i = torch.sigmoid(i)
+        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
+class MemoryEfficientSwish(nn.Module):
+    def forward(self, x):
+        return SwishImplementation.apply(x)
+def round_filters(filters, global_params):
+    """Calculate and round number of filters based on width multiplier.
+       Use width_coefficient, depth_divisor and min_depth of global_params.
+    Args:
+        filters (int): Filters number to be calculated.
+        global_params (namedtuple): Global params of the model.
+    Returns:
+        new_filters: New filters number after calculating.
+    """
+    multiplier = global_params.width_coefficient
+    if not multiplier:
+        return filters
+    # TODO: modify the params names.
+    #       maybe the names (width_divisor,min_width)
+    #       are more suitable than (depth_divisor,min_depth).
+    divisor = global_params.depth_divisor
+    min_depth = global_params.min_depth
+    filters *= multiplier
+    min_depth = min_depth or divisor  # pay attention to this line when using min_depth
+    # follow the formula transferred from official TensorFlow implementation
+    new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
+    if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
+        new_filters += divisor
+    return int(new_filters)
+def round_repeats(repeats, global_params):
+    """Calculate module's repeat number of a block based on depth multiplier.
+       Use depth_coefficient of global_params.
+    Args:
+        repeats (int): num_repeat to be calculated.
+        global_params (namedtuple): Global params of the model.
+    Returns:
+        new repeat: New repeat number after calculating.
+    """
+    multiplier = global_params.depth_coefficient
+    if not multiplier:
+        return repeats
+    # follow the formula transferred from official TensorFlow implementation
+    return int(math.ceil(multiplier * repeats))
+def drop_connect(inputs, p, training):
+    """Drop connect.
+    Args:
+        input (tensor: BCWH): Input of this structure.
+        p (float: 0.0~1.0): Probability of drop connection.
+        training (bool): The running mode.
+    Returns:
+        output: Output after drop connection.
+    """
+    assert 0 <= p <= 1, 'p must be in range of [0,1]'
+    if not training:
+        return inputs
+    batch_size = inputs.shape[0]
+    keep_prob = 1 - p
+    # generate binary_tensor mask according to probability (p for 0, 1-p for 1)
+    random_tensor = keep_prob
+    random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
+    binary_tensor = torch.floor(random_tensor)
+    output = inputs / keep_prob * binary_tensor
+    return output
+def get_width_and_height_from_size(x):
+    """Obtain height and width from x.
+    Args:
+        x (int, tuple or list): Data size.
+    Returns:
+        size: A tuple or list (H,W).
+    """
+    if isinstance(x, int):
+        return x, x
+    if isinstance(x, list) or isinstance(x, tuple):
+        return x
+    else:
+        raise TypeError()
+def calculate_output_image_size(input_image_size, stride):
+    """Calculates the output image size when using Conv2dSamePadding with a stride.
+       Necessary for static padding. Thanks to mannatsingh for pointing this out.
+    Args:
+        input_image_size (int, tuple or list): Size of input image.
+        stride (int, tuple or list): Conv2d operation's stride.
+    Returns:
+        output_image_size: A list [H,W].
+    """
+    if input_image_size is None:
+        return None
+    image_height, image_width = get_width_and_height_from_size(input_image_size)
+    stride = stride if isinstance(stride, int) else stride[0]
+    image_height = int(math.ceil(image_height / stride))
+    image_width = int(math.ceil(image_width / stride))
+    return [image_height, image_width]
+# Note:
+# The following 'SamePadding' functions make output size equal ceil(input size/stride).
+# Only when stride equals 1, can the output size be the same as input size.
+# Don't be confused by their function names ! ! !
+def get_same_padding_conv2d(image_size=None):
+    """Chooses static padding if you have specified an image size, and dynamic padding otherwise.
+       Static padding is necessary for ONNX exporting of models.
+    Args:
+        image_size (int or tuple): Size of the image.
+    Returns:
+        Conv2dDynamicSamePadding or Conv2dStaticSamePadding.
+    """
+    if image_size is None:
+        return Conv2dDynamicSamePadding
+    else:
+        return partial(Conv2dStaticSamePadding, image_size=image_size)
+class Conv2dDynamicSamePadding(nn.Conv2d):
+    """2D Convolutions like TensorFlow, for a dynamic image size.
+       The padding is operated in forward function by calculating dynamically.
+    """
+    # Tips for 'SAME' mode padding.
+    #     Given the following:
+    #         i: width or height
+    #         s: stride
+    #         k: kernel size
+    #         d: dilation
+    #         p: padding
+    #     Output after Conv2d:
+    #         o = floor((i+p-((k-1)*d+1))/s+1)
+    # If o equals i, i = floor((i+p-((k-1)*d+1))/s+1),
+    # => p = (i-1)*s+((k-1)*d+1)-i
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
+        super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
+        self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
+    def forward(self, x):
+        ih, iw = x.size()[-2:]
+        kh, kw = self.weight.size()[-2:]
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)  # change the output size according to stride ! ! !
+        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
+        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
+        if pad_h > 0 or pad_w > 0:
+            x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
+        return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
+class Conv2dStaticSamePadding(nn.Conv2d):
+    """2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size.
+       The padding mudule is calculated in construction function, then used in forward.
+    """
+    # With the same calculation as Conv2dDynamicSamePadding
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, image_size=None, **kwargs):
+        super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs)
+        self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
+        # Calculate padding based on image size and save it
+        assert image_size is not None
+        ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
+        kh, kw = self.weight.size()[-2:]
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
+        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
+        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
+        if pad_h > 0 or pad_w > 0:
+            self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2,
+                                                pad_h // 2, pad_h - pad_h // 2))
+        else:
+            self.static_padding = nn.Identity()
+    def forward(self, x):
+        x = self.static_padding(x)
+        x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
+        return x
+def get_same_padding_maxPool2d(image_size=None):
+    """Chooses static padding if you have specified an image size, and dynamic padding otherwise.
+       Static padding is necessary for ONNX exporting of models.
+    Args:
+        image_size (int or tuple): Size of the image.
+    Returns:
+        MaxPool2dDynamicSamePadding or MaxPool2dStaticSamePadding.
+    """
+    if image_size is None:
+        return MaxPool2dDynamicSamePadding
+    else:
+        return partial(MaxPool2dStaticSamePadding, image_size=image_size)
+class MaxPool2dDynamicSamePadding(nn.MaxPool2d):
+    """2D MaxPooling like TensorFlow's 'SAME' mode, with a dynamic image size.
+       The padding is operated in forward function by calculating dynamically.
+    """
+    def __init__(self, kernel_size, stride, padding=0, dilation=1, return_indices=False, ceil_mode=False):
+        super().__init__(kernel_size, stride, padding, dilation, return_indices, ceil_mode)
+        self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride
+        self.kernel_size = [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size
+        self.dilation = [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation
+    def forward(self, x):
+        ih, iw = x.size()[-2:]
+        kh, kw = self.kernel_size
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
+        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
+        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
+        if pad_h > 0 or pad_w > 0:
+            x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
+        return F.max_pool2d(x, self.kernel_size, self.stride, self.padding,
+                            self.dilation, self.ceil_mode, self.return_indices)
+class MaxPool2dStaticSamePadding(nn.MaxPool2d):
+    """2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size.
+       The padding mudule is calculated in construction function, then used in forward.
+    """
+    def __init__(self, kernel_size, stride, image_size=None, **kwargs):
+        super().__init__(kernel_size, stride, **kwargs)
+        self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride
+        self.kernel_size = [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size
+        self.dilation = [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation
+        # Calculate padding based on image size and save it
+        assert image_size is not None
+        ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
+        kh, kw = self.kernel_size
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
+        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
+        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
+        if pad_h > 0 or pad_w > 0:
+            self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
+        else:
+            self.static_padding = nn.Identity()
+    def forward(self, x):
+        x = self.static_padding(x)
+        x = F.max_pool2d(x, self.kernel_size, self.stride, self.padding,
+                         self.dilation, self.ceil_mode, self.return_indices)
+        return x
+################################################################################
+# Helper functions for loading model params
+################################################################################
+# BlockDecoder: A Class for encoding and decoding BlockArgs
+# efficientnet_params: A function to query compound coefficient
+# get_model_params and efficientnet:
+#     Functions to get BlockArgs and GlobalParams for efficientnet
+# url_map and url_map_advprop: Dicts of url_map for pretrained weights
+# load_pretrained_weights: A function to load pretrained weights
+class BlockDecoder(object):
+    """Block Decoder for readability,
+       straight from the official TensorFlow repository.
+    """
+    @staticmethod
+    def _decode_block_string(block_string):
+        """Get a block through a string notation of arguments.
+        Args:
+            block_string (str): A string notation of arguments.
+                                Examples: 'r1_k3_s11_e1_i32_o16_se0.25_noskip'.
+        Returns:
+            BlockArgs: The namedtuple defined at the top of this file.
+        """
+        assert isinstance(block_string, str)
+        ops = block_string.split('_')
+        options = {}
+        for op in ops:
+            splits = re.split(r'(\d.*)', op)
+            if len(splits) >= 2:
+                key, value = splits[:2]
+                options[key] = value
+        # Check stride
+        assert (('s' in options and len(options['s']) == 1) or
+                (len(options['s']) == 2 and options['s'][0] == options['s'][1]))
+        return BlockArgs(
+            num_repeat=int(options['r']),
+            kernel_size=int(options['k']),
+            stride=[int(options['s'][0])],
+            expand_ratio=int(options['e']),
+            input_filters=int(options['i']),
+            output_filters=int(options['o']),
+            se_ratio=float(options['se']) if 'se' in options else None,
+            id_skip=('noskip' not in block_string))
+    @staticmethod
+    def _encode_block_string(block):
+        """Encode a block to a string.
+        Args:
+            block (namedtuple): A BlockArgs type argument.
+        Returns:
+            block_string: A String form of BlockArgs.
+        """
+        args = [
+            'r%d' % block.num_repeat,
+            'k%d' % block.kernel_size,
+            's%d%d' % (block.strides[0], block.strides[1]),
+            'e%s' % block.expand_ratio,
+            'i%d' % block.input_filters,
+            'o%d' % block.output_filters
+        ]
+        if 0 < block.se_ratio <= 1:
+            args.append('se%s' % block.se_ratio)
+        if block.id_skip is False:
+            args.append('noskip')
+        return '_'.join(args)
+    @staticmethod
+    def decode(string_list):
+        """Decode a list of string notations to specify blocks inside the network.
+        Args:
+            string_list (list[str]): A list of strings, each string is a notation of block.
+        Returns:
+            blocks_args: A list of BlockArgs namedtuples of block args.
+        """
+        assert isinstance(string_list, list)
+        blocks_args = []
+        for block_string in string_list:
+            blocks_args.append(BlockDecoder._decode_block_string(block_string))
+        return blocks_args
+    @staticmethod
+    def encode(blocks_args):
+        """Encode a list of BlockArgs to a list of strings.
+        Args:
+            blocks_args (list[namedtuples]): A list of BlockArgs namedtuples of block args.
+        Returns:
+            block_strings: A list of strings, each string is a notation of block.
+        """
+        block_strings = []
+        for block in blocks_args:
+            block_strings.append(BlockDecoder._encode_block_string(block))
+        return block_strings
+def efficientnet_params(model_name):
+    """Map EfficientNet model name to parameter coefficients.
+    Args:
+        model_name (str): Model name to be queried.
+    Returns:
+        params_dict[model_name]: A (width,depth,res,dropout) tuple.
+    """
+    params_dict = {
+        # Coefficients:   width,depth,res,dropout
+        'efficientnet-b0': (1.0, 1.0, 224, 0.2),
+        'efficientnet-b1': (1.0, 1.1, 240, 0.2),
+        'efficientnet-b2': (1.1, 1.2, 260, 0.3),
+        'efficientnet-b3': (1.2, 1.4, 300, 0.3),
+        'efficientnet-b4': (1.4, 1.8, 380, 0.4),
+        'efficientnet-b5': (1.6, 2.2, 456, 0.4),
+        'efficientnet-b6': (1.8, 2.6, 528, 0.5),
+        'efficientnet-b7': (2.0, 3.1, 600, 0.5),
+        'efficientnet-b8': (2.2, 3.6, 672, 0.5),
+        'efficientnet-l2': (4.3, 5.3, 800, 0.5),
+    }
+    return params_dict[model_name]
+def efficientnet(width_coefficient=None, depth_coefficient=None, image_size=None,
+                 dropout_rate=0.2, drop_connect_rate=0.2, num_classes=1000, include_top=False):
+    """Create BlockArgs and GlobalParams for efficientnet model.
+    Args:
+        width_coefficient (float)
+        depth_coefficient (float)
+        image_size (int)
+        dropout_rate (float)
+        drop_connect_rate (float)
+        num_classes (int)
+        Meaning as the name suggests.
+    Returns:
+        blocks_args, global_params.
+    """
+    # Blocks args for the whole model(efficientnet-b0 by default)
+    # It will be modified in the construction of EfficientNet Class according to model
+    blocks_args = [
+        'r1_k3_s11_e1_i32_o16_se0.25',
+        'r2_k3_s22_e6_i16_o24_se0.25',
+        'r2_k5_s22_e6_i24_o40_se0.25',
+        'r3_k3_s22_e6_i40_o80_se0.25',
+        'r3_k5_s11_e6_i80_o112_se0.25',
+        'r4_k5_s22_e6_i112_o192_se0.25',
+        'r1_k3_s11_e6_i192_o320_se0.25',
+    ]
+    blocks_args = BlockDecoder.decode(blocks_args)
+    global_params = GlobalParams(
+        width_coefficient=width_coefficient,
+        depth_coefficient=depth_coefficient,
+        image_size=image_size,
+        dropout_rate=dropout_rate,
+        num_classes=num_classes,
+        batch_norm_momentum=0.99,
+        batch_norm_epsilon=1e-3,
+        drop_connect_rate=drop_connect_rate,
+        depth_divisor=8,
+        min_depth=None,
+        include_top=include_top,
+    )
+    return blocks_args, global_params
+def get_model_params(model_name, override_params):
+    """Get the block args and global params for a given model name.
+    Args:
+        model_name (str): Model's name.
+        override_params (dict): A dict to modify global_params.
+    Returns:
+        blocks_args, global_params
+    """
+    if model_name.startswith('efficientnet'):
+        w, d, s, p = efficientnet_params(model_name)
+        # note: all models have drop connect rate = 0.2
+        blocks_args, global_params = efficientnet(
+            width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s)
+    else:
+        raise NotImplementedError('model name is not pre-defined: {}'.format(model_name))
+    if override_params:
+        # ValueError will be raised here if override_params has fields not included in global_params.
+        global_params = global_params._replace(**override_params)
+    return blocks_args, global_params
+# train with Standard methods
+# check more details in paper(EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks)
+url_map = {
+    'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth',
+    'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth',
+    'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth',
+    'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth',
+    'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth',
+    'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth',
+    'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth',
+    'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth',
+}
+# train with Adversarial Examples(AdvProp)
+# check more details in paper(Adversarial Examples Improve Image Recognition)
+url_map_advprop = {
+    'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth',
+    'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b1-0f3ce85a.pth',
+    'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b2-6e9d97e5.pth',
+    'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b3-cdd7c0f4.pth',
+    'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b4-44fb3a87.pth',
+    'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth',
+    'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b6-ac80338e.pth',
+    'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth',
+    'efficientnet-b8': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b8-22a8fe65.pth',
+}
+# TODO: add the petrained weights url map of 'efficientnet-l2'
+def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True, advprop=False, verbose=True):
+    """Loads pretrained weights from weights path or download using url.
+    Args:
+        model (Module): The whole model of efficientnet.
+        model_name (str): Model name of efficientnet.
+        weights_path (None or str):
+            str: path to pretrained weights file on the local disk.
+            None: use pretrained weights downloaded from the Internet.
+        load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model.
+        advprop (bool): Whether to load pretrained weights
+                        trained with advprop (valid when weights_path is None).
+    """
+    if isinstance(weights_path, str):
+        state_dict = torch.load(weights_path)
+    else:
+        # AutoAugment or Advprop (different preprocessing)
+        url_map_ = url_map_advprop if advprop else url_map
+        state_dict = model_zoo.load_url(url_map_[model_name])
+    if load_fc:
+        ret = model.load_state_dict(state_dict, strict=False)
+        # assert not ret.missing_keys, 'Missing keys when loading pretrained weights: {}'.format(ret.missing_keys)
+    else:
+        state_dict.pop('_fc.weight')
+        state_dict.pop('_fc.bias')
+        ret = model.load_state_dict(state_dict, strict=False)
+        # assert set(ret.missing_keys) == set(
+            # ['_fc.weight', '_fc.bias']), 'Missing keys when loading pretrained weights: {}'.format(ret.missing_keys)
+    # assert not ret.unexpected_keys, 'Missing keys when loading pretrained weights: {}'.format(ret.unexpected_keys)
+    if verbose:
+        print('Loaded pretrained weights for {}'.format(model_name))

harmonizer/src/train/harmonizer/module/filter.py ADDED Viewed

	@@ -0,0 +1,231 @@

+import math
+from enum import Enum
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import kornia
+class BrightnessFilter(nn.Module):
+    def __init__(self):
+        super(BrightnessFilter, self).__init__()
+        self.epsilon = 1e-6
+    def forward(self, image, x):
+        """
+        Arguments:
+            image (tensor [n, 3, h, w]): RGB image with pixel values between [0, 1]
+            x (tensor [n, 1, 1, 1]): brightness argument with values between [-1, 1]
+        """
+        # convert image from RGB to HSV
+        image = kornia.color.rgb_to_hsv(image)
+        h = image[:,0:1,:,:]
+        s = image[:,1:2,:,:]
+        v = image[:,2:3,:,:]
+        # calculate alpha
+        amask = (x >= 0).float()
+        alpha = (1 / ((1 - x) + self.epsilon)) * amask + (x + 1) * (1 - amask)
+        # adjust the V channel
+        v = v * alpha
+        # convert image from HSV to RGB
+        image = torch.cat((h, s, v), dim=1)
+        image = kornia.color.hsv_to_rgb(image)
+        # clip pixel values to [0, 1]
+        image = torch.clamp(image, 0.0, 1.0)
+        return image
+class ContrastFilter(nn.Module):
+    def __init__(self):
+        super(ContrastFilter, self).__init__()
+    def forward(self, image, x):
+        """
+        Arguments:
+            image(tensor [n, 3, h, w]): RGB image with pixel values between [0, 1]
+            x (tensor [n, 1, 1, 1]): contrast argument with values between [-1, 1]
+        """
+        # calculate the mean of the image as the threshold
+        threshold = torch.mean(image, dim=(1, 2, 3), keepdim=True)
+        # pre-process x if it is a positive value
+        mask = (x.detach() > 0).float()
+        x_ = 255 / (256 - torch.floor(x * 255)) - 1
+        x_ = x * (1 - mask) + x_ * mask
+        # modify the contrast of the image
+        image = image + (image - threshold) * x_
+        # clip pixel values to [0, 1]
+        image = torch.clamp(image, 0.0, 1.0)
+        return image
+class SaturationFilter(nn.Module):
+    def __init__(self):
+        super(SaturationFilter, self).__init__()
+        self.epsilon = 1e-6
+    def forward(self, image, x):
+        """
+        Arguments:
+            image(tensor [n, 3, h, w]): RGB image with pixel values between [0, 1]
+            x (tensor [n, 1, 1, 1]): saturation argument with values between [-1, 1]
+        """
+        # calculate the basic properties of the image
+        cmin = torch.min(image, dim=1, keepdim=True)[0]
+        cmax = torch.max(image, dim=1, keepdim=True)[0]
+        var = cmax - cmin
+        ran = cmax + cmin
+        mean = ran / 2
+        is_positive = (x.detach() >= 0).float()
+        # calculate s
+        m = (mean < 0.5).float()
+        s = (var / (ran + self.epsilon)) * m + (var / (2 - ran + self.epsilon)) * (1 - m)
+        # if x is positive
+        m = ((x + s) > 1).float()
+        a_pos = s * m + (1 - x) * (1 - m)
+        a_pos = 1 / (a_pos + self.epsilon) - 1
+        # if x is negtive
+        a_neg = 1 + x
+        a = a_pos * is_positive + a_neg * (1 - is_positive)
+        image = image * is_positive + mean * (1 - is_positive) + (image - mean) * a
+        # clip pixel values to [0, 1]
+        image = torch.clamp(image, 0.0, 1.0)
+        return image
+class TemperatureFilter(nn.Module):
+    def __init__(self):
+        super(TemperatureFilter, self).__init__()
+        self.epsilon = 1e-6
+    def forward(self, image, x):
+        """
+        Arguments:
+            image(tensor [n, 3, h, w]): RGB image with pixel values between [0, 1]
+            x (tensor [n, 1, 1, 1]): color temperature argument with values between [-1, 1]
+        """
+        # split the R/G/B channels
+        R, G, B = image[:, 0:1, ...], image[:, 1:2, ...], image[:, 2:3, ...]
+        # calculate the mean of each channel
+        meanR = torch.mean(R, dim=(2, 3), keepdim=True)
+        meanG = torch.mean(G, dim=(2, 3), keepdim=True)
+        meanB = torch.mean(B, dim=(2, 3), keepdim=True)
+        # calculate correction factors
+        gray = (meanR + meanG + meanB) / 3
+        coefR = gray / (meanR + self.epsilon)
+        coefG = gray / (meanG + self.epsilon)
+        coefB = gray / (meanB + self.epsilon)
+        aR = 1 - coefR
+        aG = 1 - coefG
+        aB = 1 - coefB
+        # adjust temperature
+        is_positive = (x.detach() > 0).float()
+        is_negative = (x.detach() < 0).float()
+        is_zero = (x.detach() == 0).float()
+        meanR_ = meanR + x * torch.sign(x) * is_negative
+        meanG_ = meanG + x * torch.sign(x) * 0.5 * (1 - is_zero)
+        meanB_ = meanB + x * torch.sign(x) * is_positive
+        gray_ = (meanR_ + meanG_ + meanB_) / 3
+        coefR_ = gray_ / (meanR_ + self.epsilon) + aR
+        coefG_ = gray_ / (meanG_ + self.epsilon) + aG
+        coefB_ = gray_ / (meanB_ + self.epsilon) + aB
+        R_ = coefR_ * R
+        G_ = coefG_ * G
+        B_ = coefB_ * B
+        # the RGB image with the adjusted brightness
+        image = torch.cat((R_, G_, B_), dim=1)
+        # clip pixel values to [0, 1]
+        image = torch.clamp(image, 0.0, 1.0)
+        return image
+class HighlightFilter(nn.Module):
+    def __init__(self):
+        super(HighlightFilter, self).__init__()
+    def forward(self, image, x):
+        """
+        Arguments:
+            image(tensor [n, 3, h, w]): RGB image with pixel values between [0, 1]
+            x (tensor [n, 1, 1, 1]): highlight argument with values between [-1, 1]
+        """
+        x = x + 1
+        image = kornia.enhance.invert(image, image.detach() * 0 + 1)
+        image = torch.clamp(torch.pow(image + 1e-9, x), 0.0, 1.0)
+        image = kornia.enhance.invert(image, image.detach() * 0 + 1)
+        # clip pixel values to [0, 1]
+        image = torch.clamp(image, 0.0, 1.0)
+        return image
+class ShadowFilter(nn.Module):
+    def __init__(self):
+        super(ShadowFilter, self).__init__()
+    def forward(self, image, x):
+        """
+        Arguments:
+            image(tensor [n, 3, h, w]): RGB image with pixel values between [0, 1]
+            x (tensor [n, 1, 1, 1]): shadow argument with values between [-1, 1]
+        """
+        x = -x + 1
+        image = torch.clamp(torch.pow(image + 1e-9, x), 0.0, 1.0)
+        # clip pixel values to [0, 1]
+        image = torch.clamp(image, 0.0, 1.0)
+        return image
+class Filter(Enum):
+    BRIGHTNESS = 1
+    CONTRAST = 2
+    SATURATION = 3
+    TEMPERATURE = 4
+    HIGHLIGHT = 5
+    SHADOW = 6
+FILTER_MODULES = {
+    Filter.BRIGHTNESS: BrightnessFilter,
+    Filter.CONTRAST: ContrastFilter,
+    Filter.SATURATION: SaturationFilter,
+    Filter.TEMPERATURE: TemperatureFilter,
+    Filter.HIGHLIGHT: HighlightFilter,
+    Filter.SHADOW: ShadowFilter,
+}

harmonizer/src/train/harmonizer/module/harmonizer.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import torch
+from torch import nn
+import torch.nn.functional as F
+import torchvision.transforms.functional as tf
+from .filter import Filter
+from .backbone import EfficientBackbone
+from .module import CascadeArgumentRegressor, FilterPerformer
+class Harmonizer(nn.Module):
+    def __init__(self):
+        super(Harmonizer, self).__init__()
+        self.input_size = (256, 256)
+        self.filter_types = [
+            Filter.TEMPERATURE,
+            Filter.BRIGHTNESS,
+            Filter.CONTRAST,
+            Filter.SATURATION,
+            Filter.HIGHLIGHT,
+            Filter.SHADOW,
+        ]
+        self.filter_argument_ranges = [
+            0.3,
+            0.5,
+            0.5,
+            0.6,
+            0.4,
+            0.4,
+        ]
+        self.backbone = EfficientBackbone.from_name('efficientnet-b0')
+        self.regressor = CascadeArgumentRegressor(1280, 160, 1, len(self.filter_types))
+        self.performer = FilterPerformer(self.filter_types)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                self._init_conv(m)
+            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
+                self._init_norm(m)
+        self.backbone = EfficientBackbone.from_pretrained('efficientnet-b0')
+    def forward(self, comp, mask):
+        arguments = self.predict_arguments(comp, mask)
+        pred = self.restore_image(comp, mask, arguments)
+        return pred
+    def predict_arguments(self, comp, mask):
+        comp = F.interpolate(comp, self.input_size, mode='bilinear', align_corners=False)
+        mask = F.interpolate(mask, self.input_size, mode='bilinear', align_corners=False)
+        fg = torch.cat((comp, mask), dim=1)
+        bg = torch.cat((comp, (1 - mask)), dim=1)
+        enc2x, enc4x, enc8x, enc16x, enc32x = self.backbone(fg, bg)
+        arguments = self.regressor(enc32x)
+        return arguments
+    def restore_image(self, comp, mask, arguments):
+        assert len(arguments) == len(self.filter_types)
+        arguments = [torch.clamp(arg, -1, 1).view(-1, 1, 1, 1) for arg in arguments]
+        return self.performer.restore(comp, mask, arguments)
+    def adjust_image(self, image, mask, arguments):
+        assert len(arguments) == len(self.filter_types)
+        arguments = [(torch.clamp(arg, -1, 1) * r).view(-1, 1, 1, 1) \
+            for arg, r in zip(arguments, self.filter_argument_ranges)]
+        return self.performer.adjust(image, mask, arguments)
+    def _init_conv(self, conv):
+        nn.init.kaiming_uniform_(
+            conv.weight, a=0, mode='fan_in', nonlinearity='relu')
+        if conv.bias is not None:
+            nn.init.constant_(conv.bias, 0)
+    def _init_norm(self, bn):
+        if bn.weight is not None:
+            nn.init.constant_(bn.weight, 1)
+            nn.init.constant_(bn.bias, 0)

harmonizer/src/train/harmonizer/module/module.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import cv2
+import math
+from enum import Enum
+import torch
+from torch import nn
+import torch.nn.functional as F
+from .filter import Filter, FILTER_MODULES
+class CascadeArgumentRegressor(nn.Module):
+    def __init__(self, in_channels, base_channels, out_channels, head_num):
+        super(CascadeArgumentRegressor, self).__init__()
+        self.in_channels = in_channels
+        self.base_channels = base_channels
+        self.out_channels = out_channels
+        self.head_num = head_num
+        self.pool = nn.AdaptiveAvgPool2d((1, 1))
+        self.f = nn.Linear(self.in_channels, 160)
+        self.g = nn.Linear(self.in_channels, self.base_channels)
+        self.headers = nn.ModuleList()
+        for i in range(0, self.head_num):
+            self.headers.append(
+                nn.ModuleList([
+                    nn.Linear(160 + self.base_channels, self.base_channels),
+                    nn.Linear(self.base_channels, self.out_channels),
+                ])
+            )
+    def forward(self, x):
+        x = self.pool(x)
+        n, c, _, _ = x.shape
+        x = x.view(n, c)
+        f = self.f(x)
+        g = self.g(x)
+        pred_args = []
+        for i in range(0, self.head_num):
+            g = self.headers[i][0](torch.cat((f, g), dim=1))
+            pred_args.append(self.headers[i][1](g))
+        return pred_args
+class FilterPerformer(nn.Module):
+    def __init__(self, filter_types):
+        super(FilterPerformer, self).__init__()
+        self.filters = [FILTER_MODULES[filter_type]() for filter_type in filter_types]
+    def forward(self):
+        pass
+    def restore(self, x, mask, arguments):
+        assert len(self.filters) == len(arguments)
+        outputs = []
+        _image = x
+        for filter, arg in zip(self.filters, arguments):
+            _image = filter(_image, arg)
+            outputs.append(_image * mask + x * (1 - mask))
+        return outputs
+    def adjust(self, image, mask, arguments):
+        assert len(self.filters) == len(arguments)
+        outputs = []
+        _image = image
+        for filter, arg in zip(reversed(self.filters), reversed(arguments)):
+            _image = filter(_image, arg)
+            outputs.append(_image * mask + image * (1 - mask))
+        return outputs

harmonizer/src/train/harmonizer/proxy.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import torchtask
+import func, data, model, criterion, trainer
+def add_parser_arguments(parser):
+    torchtask.proxy_template.add_parser_arguments(parser)
+    data.add_parser_arguments(parser)
+    model.add_parser_arguments(parser)
+    criterion.add_parser_arguments(parser)
+    trainer.add_parser_arguments(parser)
+class HarmonizerProxy(torchtask.proxy_template.TaskProxy):
+    NAME = 'harmonizer'
+    def __init__(self, args):
+        super(HarmonizerProxy, self).__init__(args, func, data, model, criterion, trainer)

harmonizer/src/train/harmonizer/script/train.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import os
+import sys
+import collections
+sys.path.append('..')
+import torchtask
+import proxy
+config = collections.OrderedDict(
+    [
+        ('exp_id', os.path.basename(__file__).split(".")[0]),
+        ('trainer', 'harmonizer_trainer'),
+        # arguments - Task Proxy
+        ('short_ep', False),
+        # arguments - exp
+        ('resume', ''),
+        ('validation', False),
+        ('out_path', 'result'),
+        ('visualize', False),
+        ('debug', False),
+        ('val_freq', 1),
+        ('log_freq', 100),
+        ('visual_freq', 100),
+        ('checkpoint_freq', 1),
+        # arguments - dataset / dataloader
+        ('im_size', 256),
+        ('num_workers', 4),
+        ('ignore_additional', False),
+        ('trainset', {
+            'harmonizer_iharmony4': [
+                './dataset/iHarmony4/HAdobe5k/train',
+                './dataset/iHarmony4/HCOCO/train',
+                './dataset/iHarmony4/Hday2night/train',
+                './dataset/iHarmony4/HFlickr/train',
+            ]
+        }),
+        ('additionalset', {
+            'original_iharmony4': [
+                './dataset/iHarmony4/HAdobe5k/train',
+                './dataset/iHarmony4/HCOCO/train',
+                './dataset/iHarmony4/Hday2night/train',
+                './dataset/iHarmony4/HFlickr/train',
+            ],
+        }),
+        ('valset', {
+            'original_iharmony4': [
+                './dataset/iHarmony4/HAdobe5k/test',
+                './dataset/iHarmony4/HCOCO/test',
+                './dataset/iHarmony4/Hday2night/test',
+                './dataset/iHarmony4/HFlickr/test',
+            ]
+        }),
+        # arguments - task specific components
+        ('models', {'model': 'harmonizer'}),
+        ('optimizers', {'model': 'adam'}),
+        ('lrers', {'model': 'multisteplr'}),
+        ('criterions', {'model': 'harmonizer_loss'}),
+        # arguments - task specific optimizer / lr scheduler
+        ('lr', 0.0003),
+        ('milestones', [25, 50]),
+        ('gamma', 0.1),
+        # arguments - training details
+        ('epochs', 60),
+        ('batch_size', 16),
+        ('additional_batch_size', 8),
+    ]
+)
+if __name__ == '__main__':
+    torchtask.run_script(config, proxy, proxy.HarmonizerProxy)

harmonizer/src/train/harmonizer/trainer.py ADDED Viewed

	@@ -0,0 +1,322 @@

+import os
+import time
+import numpy as np
+from PIL import Image
+import torch
+from torch.autograd import Variable
+import torchtask
+from torchtask.utils import logger, cmd, tool
+from torchtask.nn import func
+def add_parser_arguments(parser):
+    torchtask.trainer_template.add_parser_arguments(parser)
+def harmonizer_trainer(args, model_dict, optimizer_dict, lrer_dict, criterion_dict, task_func):
+    model_funcs = [model_dict['model']]
+    optimizer_funcs = [optimizer_dict['model']]
+    lrer_funcs = [lrer_dict['model']]
+    criterion_funcs = [criterion_dict['model']]
+    algorithm = HarmonizerTrainer(args)
+    algorithm.build(model_funcs, optimizer_funcs, lrer_funcs, criterion_funcs, task_func)
+    return algorithm
+class HarmonizerTrainer(torchtask.trainer_template.TaskTrainer):
+    def __init__(self, args):
+        super(HarmonizerTrainer, self).__init__(args)
+        self.model = None
+        self.optimizer = None
+        self.lrer = None
+        self.criterion = None
+    def _build(self, model_funcs, optimizer_funcs, lrer_funcs, criterion_funcs, task_func):
+        self.task_func = task_func
+        self.model = func.create_model(model_funcs[0], 'model', args=self.args)
+        self.models = {'model': self.model}
+        self.optimizer = optimizer_funcs[0](self.model.module.param_groups)
+        self.optimizers = {'optimizer': self.optimizer}
+        self.lrer = lrer_funcs[0](self.optimizer)
+        self.lrers = {'lrer': self.lrer}
+        self.criterion = criterion_funcs[0](self.args)
+        self.criterions = {'criterion': self.criterion}
+    def _train(self, data_loader, epoch):
+        self.meters.reset()
+        lbs = self.args.labeled_batch_size
+        self.model.train()
+        timer = time.time()
+        for idx, (inp, gt) in enumerate(data_loader):
+            # pre-process input tensor and ground truth tensor
+            inp, gt = self._batch_prehandle(inp, gt, True)
+            x, mask = inp
+            # forword the model
+            self.optimizer.zero_grad()
+            resulter, debugger = self.model(inp)
+            pred_outputs = tool.dict_value(resulter, 'outputs')
+            # calculate loss for the fine labeled data
+            l_pred_outputs = func.split_tensor_tuple(pred_outputs, 0, lbs)
+            l_pred = (l_pred_outputs, )
+            l_gt = func.split_tensor_tuple(gt, 0, lbs)
+            l_inp = func.split_tensor_tuple(inp, 0, lbs)
+            l_image_losses = self.criterion(l_pred, l_gt, l_inp)
+            # if self.args.dynamic_loss:
+            sum_losses = l_image_losses[0].detach()
+            for i in range(1, len(l_image_losses)):
+                sum_losses = sum_losses + \
+                    (l_image_losses[i].detach() - l_image_losses[i-1].detach()) * ((l_image_losses[i].detach() - l_image_losses[i-1].detach()) > 0).float()
+            sum_losses = sum_losses + 1e-9
+            sum_losses = sum_losses.detach()
+            scaled_l_image_losses = [torch.mean(l_image_losses[0] / sum_losses)]
+            self.meters.update('fine_filter_0_loss', torch.mean(l_image_losses[0] / sum_losses).item())
+            for i in range(1, len(l_image_losses)):
+                loss = (l_image_losses[i] - l_image_losses[i-1].detach()) / sum_losses
+                loss = loss * (loss > 0).float()
+                loss = torch.mean(loss)
+                scaled_l_image_losses.append(loss)
+                self.meters.update('fine_filter_{0}_loss'.format(i), loss.item())
+            # calculate loss for the coarse labeled data
+            if not self.args.ignore_additional:
+                u_pred_outputs = func.split_tensor_tuple(pred_outputs, lbs, self.args.batch_size)
+                u_pred_outputs = (u_pred_outputs[-1], )
+                u_pred = (u_pred_outputs, )
+                u_gt = func.split_tensor_tuple(gt, lbs, self.args.batch_size)
+                u_gt = (u_gt[-1], )
+                u_inp = func.split_tensor_tuple(inp, lbs, self.args.batch_size)
+                u_image_losses = self.criterion(u_pred, u_gt, u_inp)
+                u_image_loss = torch.mean(u_image_losses[0]) * 10
+                self.meters.update('coarse_filter_loss', u_image_loss.item())
+            else:
+                self.meters.update('coarse_filter_loss', torch.mean(torch.zeros(1)).item())
+            # calculate the sum of all losses
+            loss = 0
+            for l_image_loss in scaled_l_image_losses:
+                loss = loss + l_image_loss
+            loss = loss + u_image_loss
+            # backward and update
+            loss.backward()
+            self.optimizer.step()
+            # logging
+            self.meters.update('batch_time', time.time() - timer)
+            if idx % self.args.log_freq == 0:
+                logger.log_info('step: [{0}][{1}/{2}]\tbatch-time: {meters[batch_time]:.3f}'.format(epoch+1, idx, len(data_loader), meters=self.meters))
+                logger.log_info('\tfine-filter-0-loss: {meters[fine_filter_0_loss]:.6f}'.format(meters=self.meters))
+                logger.log_info('\tfine-filter-1-loss: {meters[fine_filter_1_loss]:.6f}'.format(meters=self.meters))
+                logger.log_info('\tfine-filter-2-loss: {meters[fine_filter_2_loss]:.6f}'.format(meters=self.meters))
+                logger.log_info('\tfine-filter-3-loss: {meters[fine_filter_3_loss]:.6f}'.format(meters=self.meters))
+                logger.log_info('\tfine-filter-4-loss: {meters[fine_filter_4_loss]:.6f}'.format(meters=self.meters))
+                logger.log_info('\tfine-filter-5-loss: {meters[fine_filter_5_loss]:.6f}'.format(meters=self.meters))
+                logger.log_info('\tcoarse-filter-loss: {meters[coarse_filter_loss]:.6f}'.format(meters=self.meters))
+            # visualization
+            if self.args.visualize and idx % self.args.visual_freq == 0:
+                self._visualization(
+                    epoch, idx, True,
+                    func.split_tensor_tuple(inp, 0, 1, reduce_dim=True),
+                    func.split_tensor_tuple(pred_outputs, 0, 1, reduce_dim=True),
+                    func.split_tensor_tuple(gt, 0, 1, reduce_dim=True))
+            # update iteration-based lrers
+            if not self.args.is_epoch_lrer:
+                self.lrer.step()
+            timer = time.time()
+        # update epoch-based lrers
+        if self.args.is_epoch_lrer:
+            self.lrer.step()
+    def _validate(self, data_loader, epoch):
+        self.meters.reset()
+        self.model.eval()
+        timer = time.time()
+        for idx, (inp, gt) in enumerate(data_loader):
+            inp, gt = self._batch_prehandle(inp, gt, False)
+            x, mask = inp
+            resulter, debugger = self.model(inp)
+            pred_outputs = tool.dict_value(resulter, 'outputs')
+            pred = (pred_outputs[-1], )
+            gt = (gt[-1], )
+            # calculate loss for the fine labeled data
+            losses = self.criterion.forward(pred, gt, inp)
+            loss = 0
+            for _loss in losses:
+                loss = loss + _loss
+            loss = loss / len(losses)
+            self.meters.update('loss', loss.item())
+            self.task_func.metrics(pred_outputs[-1].detach(), gt[-1], mask, self.meters, id_str='IH')
+            self.meters.update('batch_time', time.time() - timer)
+            if idx % self.args.log_freq == 0:
+                logger.log_info('step: [{0}][{1}/{2}]\tbatch-time: {meters[batch_time]:.3f}\n'
+                                'loss: {meters[loss]:.6f}\n'
+                                .format(epoch+1, idx, len(data_loader), meters=self.meters))
+            if self.args.visualize:
+                self._visualization(
+                    epoch, idx, False,
+                    func.split_tensor_tuple(inp, 0, 1, reduce_dim=True),
+                    func.split_tensor_tuple((pred_outputs[-1], ), 0, 1, reduce_dim=True),
+                    func.split_tensor_tuple(gt, 0, 1, reduce_dim=True))
+            timer = time.time()
+        metrics_info = {'IH': ''}
+        for key in sorted(list(self.meters.keys())):
+            if self.task_func.METRIC_STR in key:
+                for id_str in metrics_info.keys():
+                    if key.startswith(id_str):
+                        metrics_info[id_str] += '{0}: {1:.6}\t'.format(key, self.meters[key])
+        logger.log_info('Validation metrics:\n task-metrics\t=>\t{0}\n'.format(metrics_info['IH'].replace('_', '-')))
+    def _visualization(self, epoch, idx, is_train, inp, pred, gt):
+        visualize_path = self.args.visual_train_path if is_train else self.args.visual_val_path
+        out_path = os.path.join(visualize_path, '{0}_{1}'.format(epoch, idx))
+        x, mask = inp
+        x = (np.transpose(x.cpu().numpy(), (1, 2, 0)))
+        Image.fromarray((x * 255).astype('uint8')).save(out_path + '_1_0_x.jpg')
+        mask = mask[0].data.cpu().numpy()
+        Image.fromarray((mask * 255).astype('uint8'), mode='L').save(out_path + '_2_0_mask.jpg')
+        for idx, (pred_, gt_) in enumerate(zip(pred, gt)):
+            pred_ = (np.transpose(pred_.detach().cpu().numpy(), (1, 2, 0)))
+            Image.fromarray((pred_ * 255).astype('uint8')).save(out_path + '_1_{0}_pred_filter.jpg'.format(idx+1))
+            if torch.mean(gt_) != -999:
+                gt_ = (np.transpose(gt_.cpu().numpy(), (1, 2, 0)))
+                Image.fromarray((gt_ * 255).astype('uint8')).save(out_path + '_2_{0}_gt_filter.jpg'.format(idx+1))
+    def _save_checkpoint(self, epoch):
+        state = {
+            'epoch': epoch,
+            'model': self.model.state_dict(),
+            'optimizer': self.optimizer.state_dict(),
+            'lrer': self.lrer.state_dict(),
+        }
+        checkpoint = os.path.join(self.args.checkpoint_path, 'checkpoint_{0}.ckpt'.format(epoch))
+        torch.save(state, checkpoint)
+    def _load_checkpoint(self):
+        checkpoint = torch.load(self.args.resume)
+        self.model.load_state_dict(checkpoint['model'])
+        self.optimizer.load_state_dict(checkpoint['optimizer'])
+        self.lrer.load_state_dict(checkpoint['lrer'])
+        return checkpoint['epoch']
+    def _batch_prehandle(self, inp, gt, is_train):
+        lbs = self.args.labeled_batch_size
+        ubs = self.args.additional_batch_size
+        # convert all input and ground truth to Variables
+        inp_var = []
+        for i in inp:
+            inp_var.append(Variable(i).cuda())
+        inp = tuple(inp_var)
+        gt_var = []
+        for g in gt:
+            gt_var.append(Variable(g).cuda())
+        gt = tuple(gt_var)
+        filter_num = len(self.model.module.model.filter_types)
+        if is_train:
+            # ----------------------------------------------------------------
+            # for fine labeled data, we generate the adjusted input
+            # ----------------------------------------------------------------
+            l_inp = func.split_tensor_tuple(inp, 0, lbs)
+            l_gt = func.split_tensor_tuple(gt, 0, lbs)
+            _, l_mask = l_inp
+            l_gt_image, = l_gt
+            n = l_gt_image.shape[0]
+            l_rand_arguments = [self._rand_adjustment_values(n) for _ in range(0, filter_num)]
+            l_x = self.model.module.adjust(l_gt_image, l_mask, l_rand_arguments)
+            l_inp = (l_x[-1], l_mask)
+            l_gt = []
+            for _ in reversed(l_x[:-1]):
+                l_gt.append(_)
+            l_gt.append(l_gt_image)
+            if not self.args.ignore_additional:
+                # ----------------------------------------------------------------
+                # for coarse labeled data, we use the existising adjusted input
+                # ----------------------------------------------------------------
+                u_inp = func.split_tensor_tuple(inp, lbs, self.args.batch_size)
+                u_gt = func.split_tensor_tuple(gt, lbs, self.args.batch_size)
+                u_gt_image, = u_gt
+                none_value = torch.ones(ubs).view(ubs, 1).cuda() * -999
+                none_im = u_gt_image.cuda() * 0 - 999
+                u_gt = [none_im for _ in range(0, filter_num)]
+                u_gt[-1] = u_gt_image
+                inp = func.combine_tensor_tuple(l_inp, u_inp, 0)
+                gt = func.combine_tensor_tuple(l_gt, u_gt, 0)
+            else:
+                inp = l_inp
+                gt = l_gt
+        else:
+            gt_image, = gt
+            none_value = torch.ones(1).view(1, 1).cuda() * -999
+            none_im = gt_image.cuda() * 0 - 999
+            gt = [none_im for _ in range(0, filter_num)]
+            gt[-1] = gt_image
+        return inp, gt
+    def _rand_adjustment_values(self, n):
+        x = torch.FloatTensor(np.random.uniform(-1, 1, n))
+        x = x.view(n, 1).cuda()
+        return x

harmonizer/src/train/torchtask/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import os
+from .utils import *
+from .nn import *
+from .template import *
+from .runner import run_script

harmonizer/src/train/torchtask/nn/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .lrer import VALID_LRER
+from .optimizer import VALID_OPTIMIZER
+from .module import SynchronizedBatchNorm2d

harmonizer/src/train/torchtask/nn/data.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import os
+import itertools
+import numpy as np
+from torch.utils.data import Dataset
+from torch.utils.data.sampler import Sampler
+""" This file implements dataset wrappers and batch samplers for TorchTask.
+"""
+class _TorchTaskDatasetWrapper(Dataset):
+    """ This is the superclass of TorchTask dataset wrapper.
+    """
+    def __init__(self):
+        super(_TorchTaskDatasetWrapper, self).__init__()
+        self.labeled_idxs = []      # index of the labeled data
+        self.additional_idxs = []    # index of the additional data
+class SplitUnlabeledWrapper(_TorchTaskDatasetWrapper):
+    """ Split the fully labeled dataset into a labeled subset and an
+        additional dataset based on a given sublabeled prefix list.
+    For a fully labeled dataset, a common operation is to remove the labels
+    of some samples and treat them as the additional samples.
+    This dataset wrapper implements the dataset-split operation by using
+    the given sublabeled prefix list. Samples whose prefix in the list
+    are treated as the labeled samples, while others samples are treated as
+    the additional samples.
+    """
+    def __init__(self, dataset, sublabeled_prefix, ignore_additional=False):
+        super(SplitUnlabeledWrapper, self).__init__()
+        self.dataset = dataset
+        self.sublabeled_prefix = sublabeled_prefix
+        self.ignore_additional = ignore_additional
+        self._split_labeled()
+    def __len__(self):
+        return self.dataset.__len__()
+    def __getitem__(self, idx):
+        return self.dataset.__getitem__(idx)
+    def _split_labeled(self):
+        labeled_list, additional_list = [], []
+        for img in self.dataset.sample_list:
+            is_labeled = False
+            for pdx, prefix in enumerate(self.sublabeled_prefix):
+                if img.startswith(prefix):
+                    labeled_list.append(img)
+                    is_labeled = True
+                    break
+            if not is_labeled:
+                additional_list.append(img)
+        labeled_size, additional_size = len(labeled_list), len(additional_list)
+        assert labeled_size + additional_size == len(self.dataset.sample_list)
+        if self.ignore_additional:
+            self.dataset.sample_list = labeled_list
+            self.dataset.idxs = [_ for _ in range(0, len(self.dataset.sample_list))]
+            self.labeled_idxs = self.dataset.idxs
+            self.additional_idxs = []
+        else:
+            self.dataset.sample_list = labeled_list + additional_list
+            self.dataset.idxs = [_ for _ in range(0, len(self.dataset.sample_list))]
+            self.labeled_idxs = [_ for _ in range(0, labeled_size)]
+            self.additional_idxs = [_ + labeled_size for _ in range(0, additional_size)]
+class JointDatasetsWrapper(_TorchTaskDatasetWrapper):
+    """ Combine several datasets (can be labeled or additional) into one dataset.
+    This dataset wrapper will combine multiple given dataset into one big dataset.
+    The new dataset consists of a labeled subset and an additional subset.
+    """
+    def __init__(self, labeled_datasets, additional_datasets, ignore_additional=False):
+        super(JointDatasetsWrapper, self).__init__()
+        self.labeled_datasets = labeled_datasets
+        self.additional_datasets = additional_datasets
+        self.ignore_additional = ignore_additional
+        self.labeled_datasets_size = [len(d) for d in self.labeled_datasets]
+        self.additional_datasets_size = [len(d) for d in self.additional_datasets]
+        self.labeled_size = np.sum(np.asarray(self.labeled_datasets_size))
+        self.labeled_idxs = [_ for _ in range(0, self.labeled_size)]
+        self.additional_size = 0
+        if not self.ignore_additional:
+            self.additional_size = np.sum(np.asarray(self.additional_datasets_size))
+            self.additional_idxs = [self.labeled_size + _ for _ in range(0, self.additional_size)]
+    def __len__(self):
+        return int(self.labeled_size + self.additional_size)
+    def __getitem__(self, idx):
+        assert 0 <= idx < self.__len__()
+        if idx >= self.labeled_size:
+            idx -= self.labeled_size
+            datasets = self.additional_datasets
+            datasets_size = self.additional_datasets_size
+        else:
+            datasets = self.labeled_datasets
+            datasets_size = self.labeled_datasets_size
+        accumulated_idxs = 0
+        for ddx, dsize in enumerate(datasets_size):
+            accumulated_idxs += dsize
+            if idx < accumulated_idxs:
+                return datasets[ddx].__getitem__(idx - (accumulated_idxs - dsize))
+class TwoStreamBatchSampler(Sampler):
+    """ This two stream batch sampler is used to read data from '_TorchTaskDatasetWrapper'.
+    It iterates two sets of indices simultaneously to read mini-batch for TorchTask.
+    There are two sets of indices:
+        labeled_idxs, additional_idxs
+    An 'epoch' is defined by going through the longer indices once.
+    In each 'epoch', the shorter indices are iterated through as many times as needed.
+    """
+    def __init__(self, labeled_idxs, additional_idxs, labeled_batch_size, additional_batch_size, short_ep=False):
+        self.labeled_idxs = labeled_idxs
+        self.additional_idxs = additional_idxs
+        self.labeled_batch_size = labeled_batch_size
+        self.additional_batch_size = additional_batch_size
+        assert len(self.labeled_idxs) >= self.labeled_batch_size > 0
+        assert len(self.additional_idxs) >= self.additional_batch_size > 0
+        self.additional_batchs = len(self.additional_idxs) // self.additional_batch_size
+        self.labeled_batchs = len(self.labeled_idxs) // self.labeled_batch_size
+        self.short_ep = short_ep
+    def __iter__(self):
+        if not self.short_ep:
+            if self.additional_batchs >= self.labeled_batchs:
+                additional_iter = self.iterate_once(self.additional_idxs)
+                labeled_iter = self.iterate_eternally(self.labeled_idxs)
+            else:
+                additional_iter = self.iterate_eternally(self.additional_idxs)
+                labeled_iter = self.iterate_once(self.labeled_idxs)
+        else:
+            if self.additional_batchs >= self.labeled_batchs:
+                additional_iter = self.iterate_eternally(self.additional_idxs)
+                labeled_iter = self.iterate_once(self.labeled_idxs)
+            else:
+                additional_iter = self.iterate_once(self.additional_idxs)
+                labeled_iter = self.iterate_eternally(self.labeled_idxs)
+        return (labeled_batch + additional_batch
+                for (labeled_batch, additional_batch) in zip(
+                    self.grouper(labeled_iter, self.labeled_batch_size),
+                    self.grouper(additional_iter, self.additional_batch_size)))
+    def __len__(self):
+        if self.short_ep:
+            return min(self.additional_batchs, self.labeled_batchs)
+        else:
+            return max(self.additional_batchs, self.labeled_batchs)
+    def iterate_once(self, iterable):
+        return np.random.permutation(iterable)
+    def iterate_eternally(self, indices):
+        def infinite_shuffles():
+            while True:
+                yield np.random.permutation(indices)
+        return itertools.chain.from_iterable(infinite_shuffles())
+    def grouper(self, iterable, n):
+        # e.g., grouper('ABCDEFG', 3) --> ABC DEF"
+        args = [iter(iterable)] * n
+        return zip(*args)

harmonizer/src/train/torchtask/nn/func.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import numpy as np
+import torch
+from torchtask.utils import logger
+""" This file provides tool functions for deep learning.
+"""
+def sigmoid_rampup(current, rampup_length):
+    """ Exponential rampup from https://arxiv.org/abs/1610.02242 .
+    """
+    if rampup_length == 0:
+        return 1.0
+    else:
+        current = np.clip(current, 0.0, rampup_length)
+        phase = 1.0 - current / rampup_length
+        return float(np.exp(-5.0 * phase * phase))
+def split_tensor_tuple(ttuple, start, end, reduce_dim=False):
+    """ Slice each tensor in the input tuple by channel-dim.
+    Arguments:
+        ttuple (tuple): tuple of a torch.Tensor
+        start (int): start index of slicing
+        end (int): end index of slicing
+        reduce_dim (bool): whether reduce the channel-dim when end - start == 1
+    Returns:
+        tuple: a sliced tensor tuple
+    """
+    result = []
+    if reduce_dim:
+        assert end - start == 1
+    for t in ttuple:
+        if end - start == 1 and reduce_dim:
+            result.append(t[start, ...])
+        else:
+            result.append(t[start:end, ...])
+    return tuple(result)
+def combine_tensor_tuple(ttuple1, ttuple2, dim):
+    result = []
+    assert len(ttuple1) == len(ttuple2)
+    for t1, t2 in zip(ttuple1, ttuple2):
+        result.append(torch.cat((t1, t2), dim=dim))
+    return tuple(result)
+def create_model(mclass, mname, **kwargs):
+    """ Create a nn.Module and setup it on multiple GPUs.
+    """
+    model = mclass(**kwargs)
+    model = torch.nn.DataParallel(model)
+    model = model.cuda()
+    logger.log_info('  ' + '=' * 76 + '\n  {0} parameters \n{1}'.format(mname, model_str(model)))
+    return model
+def model_str(module):
+    """ Output model structure and parameters number as strings.
+    """
+    row_format = '  {name:<40} {shape:>20} = {total_size:>12,d}'
+    lines = ['  ' + '-' * 76,]
+    params = list(module.named_parameters())
+    for name, param in params:
+        lines.append(row_format.format(name=name,
+            shape=' * '.join(str(p) for p in param.size()), total_size=param.numel()))
+    lines.append('  ' + '-' * 76)
+    lines.append(row_format.format(name='all parameters', shape='sum of above',
+        total_size=sum(int(param.numel()) for name, param in params)))
+    lines.append('  ' + '=' * 76)
+    lines.append('')
+    return '\n'.join(lines)
+def pytorch_support(required_version='1.0.0', info_str=''):
+    if torch.__version__ < required_version:
+        logger.log_err('{0} required PyTorch >= {1}\n'
+                       'However, current PyTorch == {2}\n'
+                       .format(info_str, required_version, torch.__version__))
+    else:
+        return True

harmonizer/src/train/torchtask/nn/lrer.py ADDED Viewed

	@@ -0,0 +1,179 @@

+import math
+import torch
+import torch.optim as optim
+from torchtask.utils import cmd, logger
+from torchtask.nn.func import pytorch_support
+""" This file wraps the learning rate schedulers used in the script.
+"""
+EPOCH_LRERS = ['steplr', 'multisteplr', 'exponentiallr', 'cosineannealinglr']
+ITER_LRERS = ['polynomiallr']
+VALID_LRER = EPOCH_LRERS + ITER_LRERS
+def add_parser_arguments(parser):
+    """ Add the arguments related to the learning rate (LR) schedulers.
+    This 'add_parser_arguments' function will be called every time.
+    Please do not use the argument's name that are already defined in is function.
+    The default value '-1' means that the default value corresponding to
+    different LR schedulers will be used.
+    """
+    parser.add_argument('--last-epoch', type=int, default=-1, metavar='',
+                        help='lr scheduler - the index of last epoch required by [all]')
+    parser.add_argument('--step-size', type=int, default=-1, metavar='',
+                        help='lr scheduler - period (epoch) of learning rate decay required by [steplr]')
+    parser.add_argument('--milestones', type=cmd.str2intlist, default=[], metavar='',
+                        help='lr scheduler - increased list of epoch indices required by [multisteplr]')
+    parser.add_argument('--gamma', type=float, default=-1, metavar='',
+                        help='lr scheduler - multiplicative factor of learning rate decay required by [steplr, multisteplr, exponentiallr]')
+    parser.add_argument('--T-max', type=int, default=-1, metavar='',
+                        help='lr scheduler - maximum number of epochs required by [cosineannealinglr]')
+    parser.add_argument('--eta-min', type=float, default=-1, metavar='',
+                        help='lr scheduler - minimum learning rate required by [cosineannealinglr]')
+    parser.add_argument('--power', type=float, default=-1, metavar='',
+                        help='lr scheduler - power factor of learning rate decay required by [polynomiallr]')
+# ---------------------------------------------------------------------
+# Wrapper of Learning Rate Scheduler
+# ---------------------------------------------------------------------
+def steplr(args):
+    """ Wrapper of torch.optim.lr_scheduler.StepLR (PyTorch >= 1.0.0).
+    Sets the learning rate of each parameter group to the initial lr decayed by gamma every
+    step_size epochs. When last_epoch=-1, sets initial lr as lr.
+    """
+    args.step_size = args.epochs if args.step_size == -1 else args.step_size
+    args.gamma = 0.1 if args.gamma == -1 else args.gamma
+    args.last_epoch = -1 if args.last_epoch == -1 else args.last_epoch
+    def steplr_wrapper(optimizer):
+        pytorch_support(required_version='1.0.0', info_str='LRScheduler - StepLR')
+        return optim.lr_scheduler.StepLR(
+            optimizer, step_size=args.step_size, gamma=args.gamma, last_epoch=args.last_epoch)
+    return steplr_wrapper
+def multisteplr(args):
+    """ Wrapper of torch.optim.lr_scheduler.MultiStepLR (PyTorch >= 1.0.0).
+    Set the learning rate of each parameter group to the initial lr decayed by gamma once the
+    number of epoch reaches one of the milestones. When last_epoch=-1, sets initial lr as lr.
+    """
+    args.milestones = [i for i in range(1, args.epochs)] if args.milestones == [] else args.milestones
+    args.gamma = 0.1 if args.gamma == -1 else args.gamma
+    args.last_epoch = -1 if args.last_epoch == -1 else args.last_epoch
+    def multisteplr_wrapper(optimizer):
+        pytorch_support(required_version='1.0.0', info_str='LRScheduler - MultiStepLR')
+        return optim.lr_scheduler.MultiStepLR(
+            optimizer, milestones=args.milestones, gamma=args.gamma, last_epoch=args.last_epoch)
+    return multisteplr_wrapper
+def exponentiallr(args):
+    """ Wrapper of torch.optim.lr_scheduler.ExponentialLR (PyTorch >= 1.0.0).
+    Set the learning rate of each parameter group to the initial lr decayed by gamma every epoch.
+    When last_epoch=-1, sets initial lr as lr.
+    """
+    args.gamma = 0.1 if args.gamma == -1 else args.gamma
+    args.last_epoch = -1 if args.last_epoch == -1 else args.last_epoch
+    def exponentiallr_wrapper(optimizer):
+        pytorch_support(required_version='1.0.0', info_str='LRScheduler - ExponentialLR')
+        return optim.lr_scheduler.ExponentialLR(
+            optimizer, gamma=args.gamma, last_epoch=args.last_epoch)
+    return exponentiallr_wrapper
+def cosineannealinglr(args):
+    """ Wrapper of torch.optim.lr_schduler.CosineAnnealingLR (PyTorch >= 1.0.0).
+    Set the learning rate of each parameter group using a cosine annealing schedule.
+    When last_epoch=-1, sets initial lr as lr.
+    """
+    args.T_max = args.epochs if args.T_max == -1 else args.T_max
+    args.eta_min = 0 if args.eta_min == -1 else args.eta_min
+    args.last_epoch = -1 if args.last_epoch == -1 else args.last_epoch
+    def cosineannealinglr_wrapper(optimizer):
+        pytorch_support(required_version='1.0.0', info_str='LRScheduler - CosineAnnealingLR')
+        return optim.lr_scheduler.CosineAnnealingLR(
+            optimizer, T_max=args.T_max, eta_min=args.eta_min, last_epoch=args.last_epoch)
+    return cosineannealinglr_wrapper
+def polynomiallr(args):
+    """ Wrapper of torchtask.nn.lrer.PolynomialLR (PyTorch >= 1.0.0).
+    Set the learning rate of each parmeter group to the initial lr decayed by power every
+    iteration. When last_epoch=-1, sets initial lr as lr.
+    """
+    args.power = 0.9 if args.power == -1 else args.power
+    args.last_epoch = -1 if args.last_epoch == -1 else args.last_epoch
+    def polynomiallr_wrapper(optimizer):
+        pytorch_support(required_version='1.0.0', info_str='LRScheduler - PolynomialLR')
+        return PolynomialLR(optimizer, epochs=args.epochs, iters_per_epoch=args.iters_per_epoch,
+                            power=args.power, last_epoch=args.last_epoch)
+    return polynomiallr_wrapper
+# ---------------------------------------------------------------------
+# Implementation of Learning Rate Scheduler
+# ---------------------------------------------------------------------
+class PolynomialLR(torch.optim.lr_scheduler._LRScheduler):
+    """ Polynomial decay learning rate scheduler.
+    """
+    def __init__(self, optimizer, epochs, iters_per_epoch, power=0.9, last_epoch=-1):
+        self.epochs = epochs
+        self.iters_per_epoch = iters_per_epoch
+        self.max_iters = self.epochs * self.iters_per_epoch
+        self.cur_iter = 0
+        self.power = power
+        self.is_warn = False
+        super(PolynomialLR, self).__init__(optimizer, last_epoch)
+    def get_lr(self):
+        return [base_lr * ((1 - float(self.cur_iter) / self.max_iters) ** self.power)
+                for base_lr in self.base_lrs]
+    def step(self, epoch=None):
+        if epoch is not None and epoch != 0:
+            # update lr after each epoch if epoch is given
+            # after each epoch, set epoch += 1 and call this function
+            if not self.is_warn:
+                logger.log_warn('PolynomialLR is designed for updating learning rate after each iteration.\n'
+                                'However, it will be updated after each epoch now, please be careful.\n')
+                self.is_warn = True
+            self.last_epoch = epoch
+            assert self.last_epoch <= self.epochs
+            self.cur_iter = self.last_epoch * self.iters_per_epoch
+        elif epoch is None:
+            # update lr after each iteration if epoch is None
+            self.cur_iter += 1
+            self.last_epoch = math.floor(self.cur_iter / self.iters_per_epoch)
+        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
+            param_group['lr'] = lr

harmonizer/src/train/torchtask/nn/module/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .third_party import SynchronizedBatchNorm2d, patch_replication_callback
+from .gaussian_blur import GaussianBlurLayer
+from .gaussian_noise import GaussianNoiseLayer

harmonizer/src/train/torchtask/nn/module/gaussian_blur.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import math
+import numpy as np
+import scipy.ndimage
+import torch
+import torch.nn as nn
+from torchtask.utils import logger
+class GaussianBlurLayer(nn.Module):
+    """ Add Gaussian Blur to a 4D tensor
+    This layer takes a 4D tensor of {N, C, H, W} as input.
+    The Gaussian blur will be performed in given channel number (C) splitly.
+    """
+    def __init__(self, channels, kernel_size):
+        """
+        Arguments:
+            channels (int): Channel for input tensor
+            kernel_size (int): Size of the kernel used in blurring
+        """
+        super(GaussianBlurLayer, self).__init__()
+        self.channels = channels
+        self.kernel_size = kernel_size
+        assert self.kernel_size % 2 != 0
+        self.op = nn.Sequential(
+            nn.ReflectionPad2d(math.floor(self.kernel_size / 2)),
+            nn.Conv2d(channels, channels, self.kernel_size,
+                      stride=1, padding=0, bias=None, groups=channels)
+        )
+        self._init_kernel()
+    def forward(self, x):
+        """
+        Arguments:
+            x (torch.Tensor): input 4D tensor
+        Returns:
+            torch.Tensor: Blurred version of the input
+        """
+        if not len(list(x.shape)) == 4:
+            logger.log_err('\'GaussianBlurLayer\' requires a 4D tensor as input\n')
+        elif not x.shape[1] == self.channels:
+            logger.log_err('In \'GaussianBlurLayer\', the required channel ({0}) is'
+                    'not the same as input ({1})\n'.format(self.channels, x.shape[1]))
+        return self.op(x)
+    def _init_kernel(self):
+        sigma = 0.3 * ((self.kernel_size - 1) * 0.5 - 1) + 0.8
+        n = np.zeros((self.kernel_size, self.kernel_size))
+        i = math.floor(self.kernel_size / 2)
+        n[i, i] = 1
+        kernel = scipy.ndimage.gaussian_filter(n, sigma)
+        for name, param in self.named_parameters():
+            param.data.copy_(torch.from_numpy(kernel))

harmonizer/src/train/torchtask/nn/module/gaussian_noise.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import random
+import torch
+import torch.nn as nn
+class GaussianNoiseLayer(nn.Module):
+    """ Add Gaussian noise to a 4D tensor
+    """
+    def __init__(self, std):
+        super(GaussianNoiseLayer, self).__init__()
+        self.std = std
+        self.noise = torch.zeros(0)
+        self.enable = False if self.std is None else True
+    def forward(self, inp):
+        if not self.enable:
+            return inp
+        if self.noise.shape != inp.shape:
+            self.noise = torch.zeros(inp.shape).cuda()
+        self.noise.data.normal_(0, std=random.uniform(0, self.std))
+        imax = inp.max(dim=3, keepdim=True)[0].max(dim=2, keepdim=True)[0].max(dim=1, keepdim=True)[0]
+        imin = inp.min(dim=3, keepdim=True)[0].min(dim=2, keepdim=True)[0].min(dim=1, keepdim=True)[0]
+        # normalize to [0, 1]
+        inp.sub_(imin).div_(imax - imin + 1e-9)
+        # add noise
+        inp.add_(self.noise)
+        # clip to [0, 1]
+        upper_bound = (inp > 1.0).float()
+        lower_bound = (inp < 0.0).float()
+        inp.mul_(1 - upper_bound).add_(upper_bound)
+        inp.mul_(1 - lower_bound)
+        # de-normalize
+        inp.mul_(imax - imin + 1e-9).add_(imin)
+        return inp

harmonizer/src/train/torchtask/nn/module/third_party/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .sync_batchnorm import SynchronizedBatchNorm2d, patch_replication_callback

harmonizer/src/train/torchtask/nn/module/third_party/sync_batchnorm/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# -*- coding: utf-8 -*-
+# File   : __init__.py
+# Author : Jiayuan Mao
+# Email  : maojiayuan@gmail.com
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+from .batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, SynchronizedBatchNorm3d
+from .replicate import DataParallelWithCallback, patch_replication_callback

harmonizer/src/train/torchtask/nn/module/third_party/sync_batchnorm/batchnorm.py ADDED Viewed

	@@ -0,0 +1,282 @@

+# -*- coding: utf-8 -*-
+# File   : batchnorm.py
+# Author : Jiayuan Mao
+# Email  : maojiayuan@gmail.com
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+import collections
+import torch
+import torch.nn.functional as F
+from torch.nn.modules.batchnorm import _BatchNorm
+from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
+from .comm import SyncMaster
+__all__ = ['SynchronizedBatchNorm1d', 'SynchronizedBatchNorm2d', 'SynchronizedBatchNorm3d']
+def _sum_ft(tensor):
+    """sum over the first and last dimention"""
+    return tensor.sum(dim=0).sum(dim=-1)
+def _unsqueeze_ft(tensor):
+    """add new dementions at the front and the tail"""
+    return tensor.unsqueeze(0).unsqueeze(-1)
+_ChildMessage = collections.namedtuple('_ChildMessage', ['sum', 'ssum', 'sum_size'])
+_MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std'])
+class _SynchronizedBatchNorm(_BatchNorm):
+    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True):
+        super(_SynchronizedBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine)
+        self._sync_master = SyncMaster(self._data_parallel_master)
+        self._is_parallel = False
+        self._parallel_id = None
+        self._slave_pipe = None
+    def forward(self, input):
+        # If it is not parallel computation or is in evaluation mode, use PyTorch's implementation.
+        if not (self._is_parallel and self.training):
+            return F.batch_norm(
+                input, self.running_mean, self.running_var, self.weight, self.bias,
+                self.training, self.momentum, self.eps)
+        # Resize the input to (B, C, -1).
+        input_shape = input.size()
+        input = input.view(input.size(0), self.num_features, -1)
+        # Compute the sum and square-sum.
+        sum_size = input.size(0) * input.size(2)
+        input_sum = _sum_ft(input)
+        input_ssum = _sum_ft(input ** 2)
+        # Reduce-and-broadcast the statistics.
+        if self._parallel_id == 0:
+            mean, inv_std = self._sync_master.run_master(_ChildMessage(input_sum, input_ssum, sum_size))
+        else:
+            mean, inv_std = self._slave_pipe.run_slave(_ChildMessage(input_sum, input_ssum, sum_size))
+        # Compute the output.
+        if self.affine:
+            # MJY:: Fuse the multiplication for speed.
+            output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std * self.weight) + _unsqueeze_ft(self.bias)
+        else:
+            output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std)
+        # Reshape it.
+        return output.view(input_shape)
+    def __data_parallel_replicate__(self, ctx, copy_id):
+        self._is_parallel = True
+        self._parallel_id = copy_id
+        # parallel_id == 0 means master device.
+        if self._parallel_id == 0:
+            ctx.sync_master = self._sync_master
+        else:
+            self._slave_pipe = ctx.sync_master.register_slave(copy_id)
+    def _data_parallel_master(self, intermediates):
+        """Reduce the sum and square-sum, compute the statistics, and broadcast it."""
+        # Always using same "device order" makes the ReduceAdd operation faster.
+        # Thanks to:: Tete Xiao (http://tetexiao.com/)
+        intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device())
+        to_reduce = [i[1][:2] for i in intermediates]
+        to_reduce = [j for i in to_reduce for j in i]  # flatten
+        target_gpus = [i[1].sum.get_device() for i in intermediates]
+        sum_size = sum([i[1].sum_size for i in intermediates])
+        sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce)
+        mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size)
+        broadcasted = Broadcast.apply(target_gpus, mean, inv_std)
+        outputs = []
+        for i, rec in enumerate(intermediates):
+            outputs.append((rec[0], _MasterMessage(*broadcasted[i * 2:i * 2 + 2])))
+        return outputs
+    def _compute_mean_std(self, sum_, ssum, size):
+        """Compute the mean and standard-deviation with sum and square-sum. This method
+        also maintains the moving average on the master device."""
+        assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.'
+        mean = sum_ / size
+        sumvar = ssum - sum_ * mean
+        unbias_var = sumvar / (size - 1)
+        bias_var = sumvar / size
+        self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data
+        self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data
+        return mean, bias_var.clamp(self.eps) ** -0.5
+class SynchronizedBatchNorm1d(_SynchronizedBatchNorm):
+    r"""Applies Synchronized Batch Normalization over a 2d or 3d input that is seen as a
+    mini-batch.
+    .. math::
+        y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
+    This module differs from the built-in PyTorch BatchNorm1d as the mean and
+    standard-deviation are reduced across all devices during training.
+    For example, when one uses `nn.DataParallel` to wrap the network during
+    training, PyTorch's implementation normalize the tensor on each device using
+    the statistics only on that device, which accelerated the computation and
+    is also easy to implement, but the statistics might be inaccurate.
+    Instead, in this synchronized version, the statistics will be computed
+    over all training samples distributed on multiple devices.
+    Note that, for one-GPU or CPU-only case, this module behaves exactly same
+    as the built-in PyTorch implementation.
+    The mean and standard-deviation are calculated per-dimension over
+    the mini-batches and gamma and beta are learnable parameter vectors
+    of size C (where C is the input size).
+    During training, this layer keeps a running estimate of its computed mean
+    and variance. The running sum is kept with a default momentum of 0.1.
+    During evaluation, this running mean/variance is used for normalization.
+    Because the BatchNorm is done over the `C` dimension, computing statistics
+    on `(N, L)` slices, it's common terminology to call this Temporal BatchNorm
+    Args:
+        num_features: num_features from an expected input of size
+            `batch_size x num_features [x width]`
+        eps: a value added to the denominator for numerical stability.
+            Default: 1e-5
+        momentum: the value used for the running_mean and running_var
+            computation. Default: 0.1
+        affine: a boolean value that when set to ``True``, gives the layer learnable
+            affine parameters. Default: ``True``
+    Shape:
+        - Input: :math:`(N, C)` or :math:`(N, C, L)`
+        - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input)
+    Examples:
+        >>> # With Learnable Parameters
+        >>> m = SynchronizedBatchNorm1d(100)
+        >>> # Without Learnable Parameters
+        >>> m = SynchronizedBatchNorm1d(100, affine=False)
+        >>> input = torch.autograd.Variable(torch.randn(20, 100))
+        >>> output = m(input)
+    """
+    def _check_input_dim(self, input):
+        if input.dim() != 2 and input.dim() != 3:
+            raise ValueError('expected 2D or 3D input (got {}D input)'
+                             .format(input.dim()))
+        super(SynchronizedBatchNorm1d, self)._check_input_dim(input)
+class SynchronizedBatchNorm2d(_SynchronizedBatchNorm):
+    r"""Applies Batch Normalization over a 4d input that is seen as a mini-batch
+    of 3d inputs
+    .. math::
+        y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
+    This module differs from the built-in PyTorch BatchNorm2d as the mean and
+    standard-deviation are reduced across all devices during training.
+    For example, when one uses `nn.DataParallel` to wrap the network during
+    training, PyTorch's implementation normalize the tensor on each device using
+    the statistics only on that device, which accelerated the computation and
+    is also easy to implement, but the statistics might be inaccurate.
+    Instead, in this synchronized version, the statistics will be computed
+    over all training samples distributed on multiple devices.
+    Note that, for one-GPU or CPU-only case, this module behaves exactly same
+    as the built-in PyTorch implementation.
+    The mean and standard-deviation are calculated per-dimension over
+    the mini-batches and gamma and beta are learnable parameter vectors
+    of size C (where C is the input size).
+    During training, this layer keeps a running estimate of its computed mean
+    and variance. The running sum is kept with a default momentum of 0.1.
+    During evaluation, this running mean/variance is used for normalization.
+    Because the BatchNorm is done over the `C` dimension, computing statistics
+    on `(N, H, W)` slices, it's common terminology to call this Spatial BatchNorm
+    Args:
+        num_features: num_features from an expected input of
+            size batch_size x num_features x height x width
+        eps: a value added to the denominator for numerical stability.
+            Default: 1e-5
+        momentum: the value used for the running_mean and running_var
+            computation. Default: 0.1
+        affine: a boolean value that when set to ``True``, gives the layer learnable
+            affine parameters. Default: ``True``
+    Shape:
+        - Input: :math:`(N, C, H, W)`
+        - Output: :math:`(N, C, H, W)` (same shape as input)
+    Examples:
+        >>> # With Learnable Parameters
+        >>> m = SynchronizedBatchNorm2d(100)
+        >>> # Without Learnable Parameters
+        >>> m = SynchronizedBatchNorm2d(100, affine=False)
+        >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45))
+        >>> output = m(input)
+    """
+    def _check_input_dim(self, input):
+        if input.dim() != 4:
+            raise ValueError('expected 4D input (got {}D input)'
+                             .format(input.dim()))
+        super(SynchronizedBatchNorm2d, self)._check_input_dim(input)
+class SynchronizedBatchNorm3d(_SynchronizedBatchNorm):
+    r"""Applies Batch Normalization over a 5d input that is seen as a mini-batch
+    of 4d inputs
+    .. math::
+        y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
+    This module differs from the built-in PyTorch BatchNorm3d as the mean and
+    standard-deviation are reduced across all devices during training.
+    For example, when one uses `nn.DataParallel` to wrap the network during
+    training, PyTorch's implementation normalize the tensor on each device using
+    the statistics only on that device, which accelerated the computation and
+    is also easy to implement, but the statistics might be inaccurate.
+    Instead, in this synchronized version, the statistics will be computed
+    over all training samples distributed on multiple devices.
+    Note that, for one-GPU or CPU-only case, this module behaves exactly same
+    as the built-in PyTorch implementation.
+    The mean and standard-deviation are calculated per-dimension over
+    the mini-batches and gamma and beta are learnable parameter vectors
+    of size C (where C is the input size).
+    During training, this layer keeps a running estimate of its computed mean
+    and variance. The running sum is kept with a default momentum of 0.1.
+    During evaluation, this running mean/variance is used for normalization.
+    Because the BatchNorm is done over the `C` dimension, computing statistics
+    on `(N, D, H, W)` slices, it's common terminology to call this Volumetric BatchNorm
+    or Spatio-temporal BatchNorm
+    Args:
+        num_features: num_features from an expected input of
+            size batch_size x num_features x depth x height x width
+        eps: a value added to the denominator for numerical stability.
+            Default: 1e-5
+        momentum: the value used for the running_mean and running_var
+            computation. Default: 0.1
+        affine: a boolean value that when set to ``True``, gives the layer learnable
+            affine parameters. Default: ``True``
+    Shape:
+        - Input: :math:`(N, C, D, H, W)`
+        - Output: :math:`(N, C, D, H, W)` (same shape as input)
+    Examples:
+        >>> # With Learnable Parameters
+        >>> m = SynchronizedBatchNorm3d(100)
+        >>> # Without Learnable Parameters
+        >>> m = SynchronizedBatchNorm3d(100, affine=False)
+        >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45, 10))
+        >>> output = m(input)
+    """
+    def _check_input_dim(self, input):
+        if input.dim() != 5:
+            raise ValueError('expected 5D input (got {}D input)'
+                             .format(input.dim()))
+        super(SynchronizedBatchNorm3d, self)._check_input_dim(input)

harmonizer/src/train/torchtask/nn/module/third_party/sync_batchnorm/comm.py ADDED Viewed

	@@ -0,0 +1,129 @@

+# -*- coding: utf-8 -*-
+# File   : comm.py
+# Author : Jiayuan Mao
+# Email  : maojiayuan@gmail.com
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+import queue
+import collections
+import threading
+__all__ = ['FutureResult', 'SlavePipe', 'SyncMaster']
+class FutureResult(object):
+    """A thread-safe future implementation. Used only as one-to-one pipe."""
+    def __init__(self):
+        self._result = None
+        self._lock = threading.Lock()
+        self._cond = threading.Condition(self._lock)
+    def put(self, result):
+        with self._lock:
+            assert self._result is None, 'Previous result has\'t been fetched.'
+            self._result = result
+            self._cond.notify()
+    def get(self):
+        with self._lock:
+            if self._result is None:
+                self._cond.wait()
+            res = self._result
+            self._result = None
+            return res
+_MasterRegistry = collections.namedtuple('MasterRegistry', ['result'])
+_SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result'])
+class SlavePipe(_SlavePipeBase):
+    """Pipe for master-slave communication."""
+    def run_slave(self, msg):
+        self.queue.put((self.identifier, msg))
+        ret = self.result.get()
+        self.queue.put(True)
+        return ret
+class SyncMaster(object):
+    """An abstract `SyncMaster` object.
+    - During the replication, as the data parallel will trigger an callback of each module, all slave devices should
+    call `register(id)` and obtain an `SlavePipe` to communicate with the master.
+    - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected,
+    and passed to a registered callback.
+    - After receiving the messages, the master device should gather the information and determine to message passed
+    back to each slave devices.
+    """
+    def __init__(self, master_callback):
+        """
+        Args:
+            master_callback: a callback to be invoked after having collected messages from slave devices.
+        """
+        self._master_callback = master_callback
+        self._queue = queue.Queue()
+        self._registry = collections.OrderedDict()
+        self._activated = False
+    def __getstate__(self):
+        return {'master_callback': self._master_callback}
+    def __setstate__(self, state):
+        self.__init__(state['master_callback'])
+    def register_slave(self, identifier):
+        """
+        Register an slave device.
+        Args:
+            identifier: an identifier, usually is the device id.
+        Returns: a `SlavePipe` object which can be used to communicate with the master device.
+        """
+        if self._activated:
+            assert self._queue.empty(), 'Queue is not clean before next initialization.'
+            self._activated = False
+            self._registry.clear()
+        future = FutureResult()
+        self._registry[identifier] = _MasterRegistry(future)
+        return SlavePipe(identifier, self._queue, future)
+    def run_master(self, master_msg):
+        """
+        Main entry for the master device in each forward pass.
+        The messages were first collected from each devices (including the master device), and then
+        an callback will be invoked to compute the message to be sent back to each devices
+        (including the master device).
+        Args:
+            master_msg: the message that the master want to send to itself. This will be placed as the first
+            message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example.
+        Returns: the message to be sent back to the master device.
+        """
+        self._activated = True
+        intermediates = [(0, master_msg)]
+        for i in range(self.nr_slaves):
+            intermediates.append(self._queue.get())
+        results = self._master_callback(intermediates)
+        assert results[0][0] == 0, 'The first result should belongs to the master.'
+        for i, res in results:
+            if i == 0:
+                continue
+            self._registry[i].result.put(res)
+        for i in range(self.nr_slaves):
+            assert self._queue.get() is True
+        return results[0][1]
+    @property
+    def nr_slaves(self):
+        return len(self._registry)

harmonizer/src/train/torchtask/nn/module/third_party/sync_batchnorm/replicate.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# -*- coding: utf-8 -*-
+# File   : replicate.py
+# Author : Jiayuan Mao
+# Email  : maojiayuan@gmail.com
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+import functools
+from torch.nn.parallel.data_parallel import DataParallel
+__all__ = [
+    'CallbackContext',
+    'execute_replication_callbacks',
+    'DataParallelWithCallback',
+    'patch_replication_callback'
+]
+class CallbackContext(object):
+    pass
+def execute_replication_callbacks(modules):
+    """
+    Execute an replication callback `__data_parallel_replicate__` on each module created by original replication.
+    The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`
+    Note that, as all modules are isomorphism, we assign each sub-module with a context
+    (shared among multiple copies of this module on different devices).
+    Through this context, different copies can share some information.
+    We guarantee that the callback on the master copy (the first copy) will be called ahead of calling the callback
+    of any slave copies.
+    """
+    master_copy = modules[0]
+    nr_modules = len(list(master_copy.modules()))
+    ctxs = [CallbackContext() for _ in range(nr_modules)]
+    for i, module in enumerate(modules):
+        for j, m in enumerate(module.modules()):
+            if hasattr(m, '__data_parallel_replicate__'):
+                m.__data_parallel_replicate__(ctxs[j], i)
+class DataParallelWithCallback(DataParallel):
+    """
+    Data Parallel with a replication callback.
+    An replication callback `__data_parallel_replicate__` of each module will be invoked after being created by
+    original `replicate` function.
+    The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`
+    Examples:
+        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
+        > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
+        # sync_bn.__data_parallel_replicate__ will be invoked.
+    """
+    def replicate(self, module, device_ids):
+        modules = super(DataParallelWithCallback, self).replicate(module, device_ids)
+        execute_replication_callbacks(modules)
+        return modules
+def patch_replication_callback(data_parallel):
+    """
+    Monkey-patch an existing `DataParallel` object. Add the replication callback.
+    Useful when you have customized `DataParallel` implementation.
+    Examples:
+        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
+        > sync_bn = DataParallel(sync_bn, device_ids=[0, 1])
+        > patch_replication_callback(sync_bn)
+        # this is equivalent to
+        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
+        > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
+    """
+    assert isinstance(data_parallel, DataParallel)
+    old_replicate = data_parallel.replicate
+    @functools.wraps(old_replicate)
+    def new_replicate(module, device_ids):
+        modules = old_replicate(module, device_ids)
+        execute_replication_callbacks(modules)
+        return modules
+    data_parallel.replicate = new_replicate

harmonizer/src/train/torchtask/nn/module/third_party/sync_batchnorm/unittest.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# -*- coding: utf-8 -*-
+# File   : unittest.py
+# Author : Jiayuan Mao
+# Email  : maojiayuan@gmail.com
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+import unittest
+import numpy as np
+from torch.autograd import Variable
+def as_numpy(v):
+    if isinstance(v, Variable):
+        v = v.data
+    return v.cpu().numpy()
+class TorchTestCase(unittest.TestCase):
+    def assertTensorClose(self, a, b, atol=1e-3, rtol=1e-3):
+        npa, npb = as_numpy(a), as_numpy(b)
+        self.assertTrue(
+                np.allclose(npa, npb, atol=atol),
+                'Tensor close check failed\n{}\n{}\nadiff={}, rdiff={}'.format(a, b, np.abs(npa - npb).max(), np.abs((npa - npb) / np.fmax(npa, 1e-5)).max())
+        )

harmonizer/src/train/torchtask/nn/optimizer.py ADDED Viewed

	@@ -0,0 +1,247 @@

+import math
+import torch
+import torch.optim as optim
+from torch.optim.optimizer import Optimizer
+from torchtask.utils import cmd
+from torchtask.nn.func import pytorch_support
+""" This file wraps the optimizers used in the script.
+"""
+VALID_OPTIMIZER = ['sgd', 'rmsprop', 'adam', 'wdadam']
+def add_parser_arguments(parser):
+    """ Add the arguments related to the optimizer.
+    This 'add_parser_arguments' function will be called every time.
+    Please do not use the argument's name that are already defined in is function.
+    The default value '-1' means that the default value corresponding to
+    different LR schedulers will be used.
+    """
+    parser.add_argument('--lr', type=float, default=-1, metavar='',
+                        help='optimizer - learning rate (required by [all])')
+    parser.add_argument('--dampening', type=float, default=-1, metavar='',
+                        help='optimizer - dampening for momentum (required by [sgd])')
+    parser.add_argument('--nesterov', type=cmd.str2bool, default=False, metavar='',
+                        help='optimizer - enables Nesterov momentum if True (required by [sgd])')
+    parser.add_argument('--weight-decay', type=float, default=-1, metavar='',
+                        help='optimizer - weight decay (L2 penalty) (required by [sgd, rmsprop, adam, wdadam])')
+    parser.add_argument('--momentum', type=float, default=-1, metavar='',
+                        help='optimizer - momentum factor (required by [sgd, rmsprop])')
+    parser.add_argument('--alpha', type=float, default=-1, metavar='',
+                        help='smoothing constant (required by [rmsprop])')
+    parser.add_argument('--centered', type=cmd.str2bool, default=False, metavar='',
+                        help='if True, compute the centered RMSProp, the gradient is normalized by an estimation of its variance ( required by [rmsprop])')
+    parser.add_argument('--eps', type=float, default=-1, metavar='',
+                        help='optimizer - term added to the denominator to improve numerical stability  (required by [rmsprop, adam, wdadam])')
+    parser.add_argument('--beta1', type=float, default=-1, metavar='',
+                        help='optimizer - coefficients used for computing running averages of gradient and its square (required by [adam, wdadam])')
+    parser.add_argument('--beta2', type=float, default=-1, metavar='',
+                        help='optimizer - coefficients used for computing running averages of gradient and its square (required by [adam, wdadam])')
+    parser.add_argument('--amsgrad', type=cmd.str2bool, default=False, metavar='',
+                        help='optimizer - use the AMSGrad variant if True (required by [wdadam])')
+# ---------------------------------------------------------------------
+# Wrapper of Optimizer
+# ---------------------------------------------------------------------
+def sgd(args):
+    """ Wrapper of torch.optim.SGD (PyTorch >= 1.0.0).
+    Implements stochastic gradient descent (optionally with momentum).
+    """
+    args.lr = 0.01 if args.lr == -1 else args.lr
+    args.weight_decay = 0 if args.weight_decay == -1 else args.weight_decay
+    args.momentum = 0 if args.momentum == -1 else args.momentum
+    args.dampening = 0 if args.dampening == -1 else args.dampening
+    args.nesterov = False if args.nesterov == False else args.nesterov
+    def sgd_wrapper(param_groups):
+        pytorch_support(required_version='1.0.0', info_str='Optimizer - SGD')
+        return optim.SGD(
+            param_groups,
+            lr=args.lr, momentum=args.momentum, dampening=args.dampening,
+            weight_decay=args.weight_decay, nesterov=args.nesterov)
+    return sgd_wrapper
+def rmsprop(args):
+    """ Wrapper of torch.optim.RMSprop (PyTorch >= 1.0.0).
+    Implements RMSprop algorithm.
+    Proposed by G. Hinton in his course.
+    The centered version first appears in Generating Sequences With Recurrent Neural Networks.
+    """
+    args.lr = 0.01 if args.lr == -1 else args.lr
+    args.alpha = 0.99 if args.alpha == -1 else args.alpha
+    args.eps = 1e-08 if args.eps == -1 else args.eps
+    args.weight_decay = 0 if args.weight_decay == -1 else args.weight_decay
+    args.momentum = 0 if args.momentum == -1 else args.momentum
+    args.centered = False if args.centered == False else args.centered
+    def rmsprop_wrapper(param_groups):
+        pytorch_support(required_version='1.0.0', info_str='Optimizer - RMSprop')
+        return optim.RMSprop(
+            param_groups,
+            lr=args.lr, alpha=args.alpha, eps=args.eps, weight_decay=args.weight_decay,
+            momentum=args.momentum, centered=args.centered)
+    return rmsprop_wrapper
+def adam(args):
+    """ Wrapper of torch.optim.Adam (PyTorch >= 1.0.0).
+    Implements Adam algorithm.
+    It has been proposed in 'Adam: A Method for Stochastic Optimization'.
+    """
+    args.lr = 0.001 if args.lr == -1 else args.lr
+    args.beta1 = 0.9 if args.beta1 == -1 else args.beta1
+    args.beta2 = 0.999 if args.beta2 == -1 else args.beta2
+    args.eps = 1e-08 if args.eps == -1 else args.eps
+    args.weight_decay = 0.0 if args.weight_decay == -1 else args.weight_decay
+    def adam_wrapper(param_groups):
+        pytorch_support(required_version='1.0.0', info_str='Optimizer - Adam')
+        return optim.Adam(
+            param_groups,
+            lr=args.lr, betas=(args.beta1, args.beta2), eps=args.eps,
+            weight_decay=args.weight_decay)
+    return adam_wrapper
+def wdadam(args):
+    """ Wrapper of torchtask.nn.optimizer.WDAdam (PyTorch >= 1.0.0).
+    Implements Adam algorithm with weight decay and AMSGrad.
+    """
+    args.lr = 0.001 if args.lr == -1 else args.lr
+    args.beta1 = 0.9 if args.beta1 == -1 else args.beta1
+    args.beta2 = 0.999 if args.beta2 == -1 else args.beta2
+    args.eps = 1e-08 if args.eps == -1 else args.eps
+    args.weight_decay = 0.0 if args.weight_decay == -1 else args.weight_decay
+    args.amsgrad = False if args.amsgrad == False else args.amsgrad
+    def wdadam_wrapper(param_groups):
+        pytorch_support(required_version='1.0.0', info_str='Optimizer - WDAdam')
+        return WDAdam(
+            param_groups,
+            lr=args.lr, betas=(args.beta1, args.beta2), eps=args.eps,
+            weight_decay=args.weight_decay, amsgrad=args.amsgrad)
+    return wdadam_wrapper
+# ---------------------------------------------------------------------
+# Implementation of Optimizer
+# ---------------------------------------------------------------------
+class WDAdam(Optimizer):
+    """ Implements Adam algorithm with weight decay and AMSGrad.
+    It has been proposed in `Adam: A Method for Stochastic Optimization`.
+    Arguments:
+        params (iterable): iterable of parameters to optimize or dicts defining
+            parameter groups
+        lr (float, optional): learning rate (default: 1e-3)
+        betas (Tuple[float, float], optional): coefficients used for computing
+            running averages of gradient and its square (default: (0.9, 0.999))
+        eps (float, optional): term added to the denominator to improve
+            numerical stability (default: 1e-8)
+        weight_decay (float, optional): weight decay using the method from
+            the paper `Fixing Weight Decay Regularization in Adam` (default: 0)
+        amsgrad (boolean, optional): whether to use the AMSGrad variant of this
+            algorithm from the paper `On the Convergence of Adam and Beyond`_
+    """
+    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=False):
+        if not 0.0 <= lr:
+            raise ValueError("Invalid learning rate: {0}".format(lr))
+        if not 0.0 <= eps:
+            raise ValueError("Invalid epsilon value: {0}".format(eps))
+        if not 0.0 <= betas[0] < 1.0:
+            raise ValueError("Invalid beta parameter at index 0: {0}".format(betas[0]))
+        if not 0.0 <= betas[1] < 1.0:
+            raise ValueError("Invalid beta parameter at index 1: {0}".format(betas[1]))
+        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay / lr, amsgrad=amsgrad)
+        super(WDAdam, self).__init__(params, defaults)
+    def __setstate__(self, state):
+        super(WDAdam, self).__setstate__(state)
+        for group in self.param_groups:
+            group.setdefault('amsgrad', False)
+    def step(self, closure=None):
+        """ Performs a single optimization step.
+        Arguments:
+            closure (callable, optional): A closure that reevaluates the model
+                and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                grad = p.grad.data
+                if grad.is_sparse:
+                    raise RuntimeError('Adam does not support sparse gradients')
+                amsgrad = group['amsgrad']
+                # State initialization
+                state = self.state[p]
+                if len(state) == 0:
+                    state['step'] = 0
+                    # Exponential moving average of gradient values
+                    state['exp_avg'] = torch.zeros_like(p.data)
+                    # Exponential moving average of squared gradient values
+                    state['exp_avg_sq'] = torch.zeros_like(p.data)
+                    # Maintains max of all exp. moving avg. of sq. grad. values
+                    if amsgrad:
+                        state['max_exp_avg_sq'] = torch.zeros_like(p.data)
+                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
+                if amsgrad:
+                    max_exp_avg_sq = state['max_exp_avg_sq']
+                beta1, beta2 = group['betas']
+                state['step'] += 1
+                # Decay the first and second moment running average coefficient
+                exp_avg.mul_(beta1).add_(1 - beta1, grad)
+                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
+                if amsgrad:
+                    # Maintains the maximum of all 2nd moment running avg. till now
+                    torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
+                    # Use the max. for normalizing running avg. of gradient
+                    denom = max_exp_avg_sq.sqrt().add_(group['eps'])
+                else:
+                    denom = exp_avg_sq.sqrt().add_(group['eps'])
+                bias_correction1 = 1 - beta1 ** state['step']
+                bias_correction2 = 1 - beta2 ** state['step']
+                step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
+                if group['weight_decay'] != 0:
+                    p.data.add_(-group['weight_decay'] * group['lr'], p.data)
+                p.data.addcdiv_(-step_size, exp_avg, denom)
+        return loss

harmonizer/src/train/torchtask/requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+numpy
+scipy
+Pillow
+pyyaml
+opencv-python

harmonizer/src/train/torchtask/runner.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import sys
+import argparse
+from torchtask.utils import cmd
+from torchtask.nn import optimizer, lrer
+from torchtask.nn.func import pytorch_support
+def create_parser():
+    parser = argparse.ArgumentParser(description='TorchTask Script Parser')
+    optimizer.add_parser_arguments(parser)
+    lrer.add_parser_arguments(parser)
+    return parser
+def run_script(config, proxy_file, proxy_class):
+    # TorchTask requires PyTorch >= 1.0.0
+    pytorch_support(required_version='1.0.0', info_str='TorchTask')
+    # help information
+    if len(sys.argv) > 1 and sys.argv[1] in ['help', '--help', 'h', '-h']:
+        config['h'] = True
+    # create parser and parse args from config
+    parser = create_parser()
+    proxy_file.add_parser_arguments(parser)
+    args = cmd.parse_args(parser, config)
+    task_proxy = proxy_class(args)
+    task_proxy.run()

harmonizer/src/train/torchtask/template/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from . import func as func_template
+from . import data as data_template
+from . import model as model_template
+from . import criterion as criterion_template
+from . import proxy as proxy_template
+from . import trainer as trainer_template
+__all__ = [
+  'func_template',
+  'data_template',
+  'model_template',
+  'criterion_template',
+  'proxy_template',
+  'trainer_template',
+]