Spaces:

akhaliq
/

PaintTransformer

Runtime error

App Files Files Community

akhaliq3 commited on Aug 10, 2021

Commit

035e10c

•

1 Parent(s): 80e980c

spaces demo

Browse files

Files changed (33) hide show

LICENSE +21 -0
inference/.DS_Store +0 -0
inference/brush/brush_large_horizontal.png +0 -0
inference/brush/brush_large_vertical.png +0 -0
inference/brush/brush_small_horizontal.png +0 -0
inference/brush/brush_small_vertical.png +0 -0
inference/inference.py +496 -0
inference/input/.DS_Store +0 -0
inference/input/temp.txt +0 -0
inference/morphology.py +51 -0
inference/network.py +84 -0
train/brush/brush_large_horizontal.png +0 -0
train/brush/brush_large_vertical.png +0 -0
train/brush/brush_small_horizontal.png +0 -0
train/brush/brush_small_vertical.png +0 -0
train/data/__init__.py +94 -0
train/data/base_dataset.py +153 -0
train/data/null_dataset.py +15 -0
train/models/__init__.py +67 -0
train/models/base_model.py +230 -0
train/models/networks.py +143 -0
train/models/painter_model.py +247 -0
train/options/__init__.py +1 -0
train/options/base_options.py +151 -0
train/options/test_options.py +23 -0
train/options/train_options.py +52 -0
train/train.py +58 -0
train/train.sh +14 -0
train/util/__init__.py +1 -0
train/util/html.py +86 -0
train/util/morphology.py +43 -0
train/util/util.py +103 -0
train/util/visualizer.py +224 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2021 Huage001
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

inference/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

inference/brush/brush_large_horizontal.png ADDED Viewed

inference/brush/brush_large_vertical.png ADDED Viewed

inference/brush/brush_small_horizontal.png ADDED Viewed

inference/brush/brush_small_vertical.png ADDED Viewed

inference/inference.py ADDED Viewed

	@@ -0,0 +1,496 @@

+import torch
+import torch.nn.functional as F
+import numpy as np
+from PIL import Image
+import network
+import morphology
+import os
+import math
+idx = 0
+def save_img(img, output_path):
+    result = Image.fromarray((img.data.cpu().numpy().transpose((1, 2, 0)) * 255).astype(np.uint8))
+    result.save(output_path)
+def param2stroke(param, H, W, meta_brushes):
+    """
+    Input a set of stroke parameters and output its corresponding foregrounds and alpha maps.
+    Args:
+        param: a tensor with shape n_strokes x n_param_per_stroke. Here, param_per_stroke is 8:
+        x_center, y_center, width, height, theta, R, G, and B.
+        H: output height.
+        W: output width.
+        meta_brushes: a tensor with shape 2 x 3 x meta_brush_height x meta_brush_width.
+         The first slice on the batch dimension denotes vertical brush and the second one denotes horizontal brush.
+    Returns:
+        foregrounds: a tensor with shape n_strokes x 3 x H x W, containing color information.
+        alphas: a tensor with shape n_strokes x 3 x H x W,
+         containing binary information of whether a pixel is belonging to the stroke (alpha mat), for painting process.
+    """
+    # Firstly, resize the meta brushes to the required shape,
+    # in order to decrease GPU memory especially when the required shape is small.
+    meta_brushes_resize = F.interpolate(meta_brushes, (H, W))
+    b = param.shape[0]
+    # Extract shape parameters and color parameters.
+    param_list = torch.split(param, 1, dim=1)
+    x0, y0, w, h, theta = [item.squeeze(-1) for item in param_list[:5]]
+    R, G, B = param_list[5:]
+    # Pre-compute sin theta and cos theta
+    sin_theta = torch.sin(torch.acos(torch.tensor(-1., device=param.device)) * theta)
+    cos_theta = torch.cos(torch.acos(torch.tensor(-1., device=param.device)) * theta)
+    # index means each stroke should use which meta stroke? Vertical meta stroke or horizontal meta stroke.
+    # When h > w, vertical stroke should be used. When h <= w, horizontal stroke should be used.
+    index = torch.full((b,), -1, device=param.device, dtype=torch.long)
+    index[h > w] = 0
+    index[h <= w] = 1
+    brush = meta_brushes_resize[index.long()]
+    # Calculate warp matrix according to the rules defined by pytorch, in order for warping.
+    warp_00 = cos_theta / w
+    warp_01 = sin_theta * H / (W * w)
+    warp_02 = (1 - 2 * x0) * cos_theta / w + (1 - 2 * y0) * sin_theta * H / (W * w)
+    warp_10 = -sin_theta * W / (H * h)
+    warp_11 = cos_theta / h
+    warp_12 = (1 - 2 * y0) * cos_theta / h - (1 - 2 * x0) * sin_theta * W / (H * h)
+    warp_0 = torch.stack([warp_00, warp_01, warp_02], dim=1)
+    warp_1 = torch.stack([warp_10, warp_11, warp_12], dim=1)
+    warp = torch.stack([warp_0, warp_1], dim=1)
+    # Conduct warping.
+    grid = F.affine_grid(warp, [b, 3, H, W], align_corners=False)
+    brush = F.grid_sample(brush, grid, align_corners=False)
+    # alphas is the binary information suggesting whether a pixel is belonging to the stroke.
+    alphas = (brush > 0).float()
+    brush = brush.repeat(1, 3, 1, 1)
+    alphas = alphas.repeat(1, 3, 1, 1)
+    # Give color to foreground strokes.
+    color_map = torch.cat([R, G, B], dim=1)
+    color_map = color_map.unsqueeze(-1).unsqueeze(-1).repeat(1, 1, H, W)
+    foreground = brush * color_map
+    # Dilation and erosion are used for foregrounds and alphas respectively to prevent artifacts on stroke borders.
+    foreground = morphology.dilation(foreground)
+    alphas = morphology.erosion(alphas)
+    return foreground, alphas
+def param2img_serial(
+        param, decision, meta_brushes, cur_canvas, frame_dir, has_border=False, original_h=None, original_w=None):
+    """
+    Input stroke parameters and decisions for each patch, meta brushes, current canvas, frame directory,
+    and whether there is a border (if intermediate painting results are required).
+    Output the painting results of adding the corresponding strokes on the current canvas.
+    Args:
+        param: a tensor with shape batch size x patch along height dimension x patch along width dimension
+         x n_stroke_per_patch x n_param_per_stroke
+        decision: a 01 tensor with shape batch size x patch along height dimension x patch along width dimension
+         x n_stroke_per_patch
+        meta_brushes: a tensor with shape 2 x 3 x meta_brush_height x meta_brush_width.
+        The first slice on the batch dimension denotes vertical brush and the second one denotes horizontal brush.
+        cur_canvas: a tensor with shape batch size x 3 x H x W,
+         where H and W denote height and width of padded results of original images.
+        frame_dir: directory to save intermediate painting results. None means intermediate results are not required.
+        has_border: on the last painting layer, in order to make sure that the painting results do not miss
+         any important detail, we choose to paint again on this layer but shift patch_size // 2 pixels when
+         cutting patches. In this case, if intermediate results are required, we need to cut the shifted length
+         on the border before saving, or there would be a black border.
+        original_h: to indicate the original height for cropping when saving intermediate results.
+        original_w: to indicate the original width for cropping when saving intermediate results.
+    Returns:
+        cur_canvas: a tensor with shape batch size x 3 x H x W, denoting painting results.
+    """
+    # param: b, h, w, stroke_per_patch, param_per_stroke
+    # decision: b, h, w, stroke_per_patch
+    b, h, w, s, p = param.shape
+    H, W = cur_canvas.shape[-2:]
+    is_odd_y = h % 2 == 1
+    is_odd_x = w % 2 == 1
+    patch_size_y = 2 * H // h
+    patch_size_x = 2 * W // w
+    even_idx_y = torch.arange(0, h, 2, device=cur_canvas.device)
+    even_idx_x = torch.arange(0, w, 2, device=cur_canvas.device)
+    odd_idx_y = torch.arange(1, h, 2, device=cur_canvas.device)
+    odd_idx_x = torch.arange(1, w, 2, device=cur_canvas.device)
+    even_y_even_x_coord_y, even_y_even_x_coord_x = torch.meshgrid([even_idx_y, even_idx_x])
+    odd_y_odd_x_coord_y, odd_y_odd_x_coord_x = torch.meshgrid([odd_idx_y, odd_idx_x])
+    even_y_odd_x_coord_y, even_y_odd_x_coord_x = torch.meshgrid([even_idx_y, odd_idx_x])
+    odd_y_even_x_coord_y, odd_y_even_x_coord_x = torch.meshgrid([odd_idx_y, even_idx_x])
+    cur_canvas = F.pad(cur_canvas, [patch_size_x // 4, patch_size_x // 4,
+                                    patch_size_y // 4, patch_size_y // 4, 0, 0, 0, 0])
+    def partial_render(this_canvas, patch_coord_y, patch_coord_x, stroke_id):
+        canvas_patch = F.unfold(this_canvas, (patch_size_y, patch_size_x),
+                                stride=(patch_size_y // 2, patch_size_x // 2))
+        # canvas_patch: b, 3 * py * px, h * w
+        canvas_patch = canvas_patch.view(b, 3, patch_size_y, patch_size_x, h, w).contiguous()
+        canvas_patch = canvas_patch.permute(0, 4, 5, 1, 2, 3).contiguous()
+        # canvas_patch: b, h, w, 3, py, px
+        selected_canvas_patch = canvas_patch[:, patch_coord_y, patch_coord_x, :, :, :]
+        selected_h, selected_w = selected_canvas_patch.shape[1:3]
+        selected_param = param[:, patch_coord_y, patch_coord_x, stroke_id, :].view(-1, p).contiguous()
+        selected_decision = decision[:, patch_coord_y, patch_coord_x, stroke_id].view(-1).contiguous()
+        selected_foregrounds = torch.zeros(selected_param.shape[0], 3, patch_size_y, patch_size_x,
+                                           device=this_canvas.device)
+        selected_alphas = torch.zeros(selected_param.shape[0], 3, patch_size_y, patch_size_x, device=this_canvas.device)
+        if selected_param[selected_decision, :].shape[0] > 0:
+            selected_foregrounds[selected_decision, :, :, :], selected_alphas[selected_decision, :, :, :] = \
+                param2stroke(selected_param[selected_decision, :], patch_size_y, patch_size_x, meta_brushes)
+        selected_foregrounds = selected_foregrounds.view(
+            b, selected_h, selected_w, 3, patch_size_y, patch_size_x).contiguous()
+        selected_alphas = selected_alphas.view(b, selected_h, selected_w, 3, patch_size_y, patch_size_x).contiguous()
+        selected_decision = selected_decision.view(b, selected_h, selected_w, 1, 1, 1).contiguous()
+        selected_canvas_patch = selected_foregrounds * selected_alphas * selected_decision + selected_canvas_patch * (
+                1 - selected_alphas * selected_decision)
+        this_canvas = selected_canvas_patch.permute(0, 3, 1, 4, 2, 5).contiguous()
+        # this_canvas: b, 3, selected_h, py, selected_w, px
+        this_canvas = this_canvas.view(b, 3, selected_h * patch_size_y, selected_w * patch_size_x).contiguous()
+        # this_canvas: b, 3, selected_h * py, selected_w * px
+        return this_canvas
+    global idx
+    if has_border:
+        factor = 2
+    else:
+        factor = 4
+    if even_idx_y.shape[0] > 0 and even_idx_x.shape[0] > 0:
+        for i in range(s):
+            canvas = partial_render(cur_canvas, even_y_even_x_coord_y, even_y_even_x_coord_x, i)
+            if not is_odd_y:
+                canvas = torch.cat([canvas, cur_canvas[:, :, -patch_size_y // 2:, :canvas.shape[3]]], dim=2)
+            if not is_odd_x:
+                canvas = torch.cat([canvas, cur_canvas[:, :, :canvas.shape[2], -patch_size_x // 2:]], dim=3)
+            cur_canvas = canvas
+            idx += 1
+            if frame_dir is not None:
+                frame = crop(cur_canvas[:, :, patch_size_y // factor:-patch_size_y // factor,
+                             patch_size_x // factor:-patch_size_x // factor], original_h, original_w)
+                save_img(frame[0], os.path.join(frame_dir, '%03d.jpg' % idx))
+    if odd_idx_y.shape[0] > 0 and odd_idx_x.shape[0] > 0:
+        for i in range(s):
+            canvas = partial_render(cur_canvas, odd_y_odd_x_coord_y, odd_y_odd_x_coord_x, i)
+            canvas = torch.cat([cur_canvas[:, :, :patch_size_y // 2, -canvas.shape[3]:], canvas], dim=2)
+            canvas = torch.cat([cur_canvas[:, :, -canvas.shape[2]:, :patch_size_x // 2], canvas], dim=3)
+            if is_odd_y:
+                canvas = torch.cat([canvas, cur_canvas[:, :, -patch_size_y // 2:, :canvas.shape[3]]], dim=2)
+            if is_odd_x:
+                canvas = torch.cat([canvas, cur_canvas[:, :, :canvas.shape[2], -patch_size_x // 2:]], dim=3)
+            cur_canvas = canvas
+            idx += 1
+            if frame_dir is not None:
+                frame = crop(cur_canvas[:, :, patch_size_y // factor:-patch_size_y // factor,
+                             patch_size_x // factor:-patch_size_x // factor], original_h, original_w)
+                save_img(frame[0], os.path.join(frame_dir, '%03d.jpg' % idx))
+    if odd_idx_y.shape[0] > 0 and even_idx_x.shape[0] > 0:
+        for i in range(s):
+            canvas = partial_render(cur_canvas, odd_y_even_x_coord_y, odd_y_even_x_coord_x, i)
+            canvas = torch.cat([cur_canvas[:, :, :patch_size_y // 2, :canvas.shape[3]], canvas], dim=2)
+            if is_odd_y:
+                canvas = torch.cat([canvas, cur_canvas[:, :, -patch_size_y // 2:, :canvas.shape[3]]], dim=2)
+            if not is_odd_x:
+                canvas = torch.cat([canvas, cur_canvas[:, :, :canvas.shape[2], -patch_size_x // 2:]], dim=3)
+            cur_canvas = canvas
+            idx += 1
+            if frame_dir is not None:
+                frame = crop(cur_canvas[:, :, patch_size_y // factor:-patch_size_y // factor,
+                             patch_size_x // factor:-patch_size_x // factor], original_h, original_w)
+                save_img(frame[0], os.path.join(frame_dir, '%03d.jpg' % idx))
+    if even_idx_y.shape[0] > 0 and odd_idx_x.shape[0] > 0:
+        for i in range(s):
+            canvas = partial_render(cur_canvas, even_y_odd_x_coord_y, even_y_odd_x_coord_x, i)
+            canvas = torch.cat([cur_canvas[:, :, :canvas.shape[2], :patch_size_x // 2], canvas], dim=3)
+            if not is_odd_y:
+                canvas = torch.cat([canvas, cur_canvas[:, :, -patch_size_y // 2:, -canvas.shape[3]:]], dim=2)
+            if is_odd_x:
+                canvas = torch.cat([canvas, cur_canvas[:, :, :canvas.shape[2], -patch_size_x // 2:]], dim=3)
+            cur_canvas = canvas
+            idx += 1
+            if frame_dir is not None:
+                frame = crop(cur_canvas[:, :, patch_size_y // factor:-patch_size_y // factor,
+                             patch_size_x // factor:-patch_size_x // factor], original_h, original_w)
+                save_img(frame[0], os.path.join(frame_dir, '%03d.jpg' % idx))
+    cur_canvas = cur_canvas[:, :, patch_size_y // 4:-patch_size_y // 4, patch_size_x // 4:-patch_size_x // 4]
+    return cur_canvas
+def param2img_parallel(param, decision, meta_brushes, cur_canvas):
+    """
+        Input stroke parameters and decisions for each patch, meta brushes, current canvas, frame directory,
+        and whether there is a border (if intermediate painting results are required).
+        Output the painting results of adding the corresponding strokes on the current canvas.
+        Args:
+            param: a tensor with shape batch size x patch along height dimension x patch along width dimension
+             x n_stroke_per_patch x n_param_per_stroke
+            decision: a 01 tensor with shape batch size x patch along height dimension x patch along width dimension
+             x n_stroke_per_patch
+            meta_brushes: a tensor with shape 2 x 3 x meta_brush_height x meta_brush_width.
+            The first slice on the batch dimension denotes vertical brush and the second one denotes horizontal brush.
+            cur_canvas: a tensor with shape batch size x 3 x H x W,
+             where H and W denote height and width of padded results of original images.
+        Returns:
+            cur_canvas: a tensor with shape batch size x 3 x H x W, denoting painting results.
+        """
+    # param: b, h, w, stroke_per_patch, param_per_stroke
+    # decision: b, h, w, stroke_per_patch
+    b, h, w, s, p = param.shape
+    param = param.view(-1, 8).contiguous()
+    decision = decision.view(-1).contiguous().bool()
+    H, W = cur_canvas.shape[-2:]
+    is_odd_y = h % 2 == 1
+    is_odd_x = w % 2 == 1
+    patch_size_y = 2 * H // h
+    patch_size_x = 2 * W // w
+    even_idx_y = torch.arange(0, h, 2, device=cur_canvas.device)
+    even_idx_x = torch.arange(0, w, 2, device=cur_canvas.device)
+    odd_idx_y = torch.arange(1, h, 2, device=cur_canvas.device)
+    odd_idx_x = torch.arange(1, w, 2, device=cur_canvas.device)
+    even_y_even_x_coord_y, even_y_even_x_coord_x = torch.meshgrid([even_idx_y, even_idx_x])
+    odd_y_odd_x_coord_y, odd_y_odd_x_coord_x = torch.meshgrid([odd_idx_y, odd_idx_x])
+    even_y_odd_x_coord_y, even_y_odd_x_coord_x = torch.meshgrid([even_idx_y, odd_idx_x])
+    odd_y_even_x_coord_y, odd_y_even_x_coord_x = torch.meshgrid([odd_idx_y, even_idx_x])
+    cur_canvas = F.pad(cur_canvas, [patch_size_x // 4, patch_size_x // 4,
+                                    patch_size_y // 4, patch_size_y // 4, 0, 0, 0, 0])
+    foregrounds = torch.zeros(param.shape[0], 3, patch_size_y, patch_size_x, device=cur_canvas.device)
+    alphas = torch.zeros(param.shape[0], 3, patch_size_y, patch_size_x, device=cur_canvas.device)
+    valid_foregrounds, valid_alphas = param2stroke(param[decision, :], patch_size_y, patch_size_x, meta_brushes)
+    foregrounds[decision, :, :, :] = valid_foregrounds
+    alphas[decision, :, :, :] = valid_alphas
+    # foreground, alpha: b * h * w * stroke_per_patch, 3, patch_size_y, patch_size_x
+    foregrounds = foregrounds.view(-1, h, w, s, 3, patch_size_y, patch_size_x).contiguous()
+    alphas = alphas.view(-1, h, w, s, 3, patch_size_y, patch_size_x).contiguous()
+    # foreground, alpha: b, h, w, stroke_per_patch, 3, render_size_y, render_size_x
+    decision = decision.view(-1, h, w, s, 1, 1, 1).contiguous()
+    # decision: b, h, w, stroke_per_patch, 1, 1, 1
+    def partial_render(this_canvas, patch_coord_y, patch_coord_x):
+        canvas_patch = F.unfold(this_canvas, (patch_size_y, patch_size_x),
+                                stride=(patch_size_y // 2, patch_size_x // 2))
+        # canvas_patch: b, 3 * py * px, h * w
+        canvas_patch = canvas_patch.view(b, 3, patch_size_y, patch_size_x, h, w).contiguous()
+        canvas_patch = canvas_patch.permute(0, 4, 5, 1, 2, 3).contiguous()
+        # canvas_patch: b, h, w, 3, py, px
+        selected_canvas_patch = canvas_patch[:, patch_coord_y, patch_coord_x, :, :, :]
+        selected_foregrounds = foregrounds[:, patch_coord_y, patch_coord_x, :, :, :, :]
+        selected_alphas = alphas[:, patch_coord_y, patch_coord_x, :, :, :, :]
+        selected_decisions = decision[:, patch_coord_y, patch_coord_x, :, :, :, :]
+        for i in range(s):
+            cur_foreground = selected_foregrounds[:, :, :, i, :, :, :]
+            cur_alpha = selected_alphas[:, :, :, i, :, :, :]
+            cur_decision = selected_decisions[:, :, :, i, :, :, :]
+            selected_canvas_patch = cur_foreground * cur_alpha * cur_decision + selected_canvas_patch * (
+                    1 - cur_alpha * cur_decision)
+        this_canvas = selected_canvas_patch.permute(0, 3, 1, 4, 2, 5).contiguous()
+        # this_canvas: b, 3, h_half, py, w_half, px
+        h_half = this_canvas.shape[2]
+        w_half = this_canvas.shape[4]
+        this_canvas = this_canvas.view(b, 3, h_half * patch_size_y, w_half * patch_size_x).contiguous()
+        # this_canvas: b, 3, h_half * py, w_half * px
+        return this_canvas
+    if even_idx_y.shape[0] > 0 and even_idx_x.shape[0] > 0:
+        canvas = partial_render(cur_canvas, even_y_even_x_coord_y, even_y_even_x_coord_x)
+        if not is_odd_y:
+            canvas = torch.cat([canvas, cur_canvas[:, :, -patch_size_y // 2:, :canvas.shape[3]]], dim=2)
+        if not is_odd_x:
+            canvas = torch.cat([canvas, cur_canvas[:, :, :canvas.shape[2], -patch_size_x // 2:]], dim=3)
+        cur_canvas = canvas
+    if odd_idx_y.shape[0] > 0 and odd_idx_x.shape[0] > 0:
+        canvas = partial_render(cur_canvas, odd_y_odd_x_coord_y, odd_y_odd_x_coord_x)
+        canvas = torch.cat([cur_canvas[:, :, :patch_size_y // 2, -canvas.shape[3]:], canvas], dim=2)
+        canvas = torch.cat([cur_canvas[:, :, -canvas.shape[2]:, :patch_size_x // 2], canvas], dim=3)
+        if is_odd_y:
+            canvas = torch.cat([canvas, cur_canvas[:, :, -patch_size_y // 2:, :canvas.shape[3]]], dim=2)
+        if is_odd_x:
+            canvas = torch.cat([canvas, cur_canvas[:, :, :canvas.shape[2], -patch_size_x // 2:]], dim=3)
+        cur_canvas = canvas
+    if odd_idx_y.shape[0] > 0 and even_idx_x.shape[0] > 0:
+        canvas = partial_render(cur_canvas, odd_y_even_x_coord_y, odd_y_even_x_coord_x)
+        canvas = torch.cat([cur_canvas[:, :, :patch_size_y // 2, :canvas.shape[3]], canvas], dim=2)
+        if is_odd_y:
+            canvas = torch.cat([canvas, cur_canvas[:, :, -patch_size_y // 2:, :canvas.shape[3]]], dim=2)
+        if not is_odd_x:
+            canvas = torch.cat([canvas, cur_canvas[:, :, :canvas.shape[2], -patch_size_x // 2:]], dim=3)
+        cur_canvas = canvas
+    if even_idx_y.shape[0] > 0 and odd_idx_x.shape[0] > 0:
+        canvas = partial_render(cur_canvas, even_y_odd_x_coord_y, even_y_odd_x_coord_x)
+        canvas = torch.cat([cur_canvas[:, :, :canvas.shape[2], :patch_size_x // 2], canvas], dim=3)
+        if not is_odd_y:
+            canvas = torch.cat([canvas, cur_canvas[:, :, -patch_size_y // 2:, -canvas.shape[3]:]], dim=2)
+        if is_odd_x:
+            canvas = torch.cat([canvas, cur_canvas[:, :, :canvas.shape[2], -patch_size_x // 2:]], dim=3)
+        cur_canvas = canvas
+    cur_canvas = cur_canvas[:, :, patch_size_y // 4:-patch_size_y // 4, patch_size_x // 4:-patch_size_x // 4]
+    return cur_canvas
+def read_img(img_path, img_type='RGB', h=None, w=None):
+    img = Image.open(img_path).convert(img_type)
+    if h is not None and w is not None:
+        img = img.resize((w, h), resample=Image.NEAREST)
+    img = np.array(img)
+    if img.ndim == 2:
+        img = np.expand_dims(img, axis=-1)
+    img = img.transpose((2, 0, 1))
+    img = torch.from_numpy(img).unsqueeze(0).float() / 255.
+    return img
+def pad(img, H, W):
+    b, c, h, w = img.shape
+    pad_h = (H - h) // 2
+    pad_w = (W - w) // 2
+    remainder_h = (H - h) % 2
+    remainder_w = (W - w) % 2
+    img = torch.cat([torch.zeros((b, c, pad_h, w), device=img.device), img,
+                     torch.zeros((b, c, pad_h + remainder_h, w), device=img.device)], dim=-2)
+    img = torch.cat([torch.zeros((b, c, H, pad_w), device=img.device), img,
+                     torch.zeros((b, c, H, pad_w + remainder_w), device=img.device)], dim=-1)
+    return img
+def crop(img, h, w):
+    H, W = img.shape[-2:]
+    pad_h = (H - h) // 2
+    pad_w = (W - w) // 2
+    remainder_h = (H - h) % 2
+    remainder_w = (W - w) % 2
+    img = img[:, :, pad_h:H - pad_h - remainder_h, pad_w:W - pad_w - remainder_w]
+    return img
+def main(input_path, model_path, output_dir, need_animation=False, resize_h=None, resize_w=None, serial=False):
+    if not os.path.exists(output_dir):
+        os.mkdir(output_dir)
+    input_name = os.path.basename(input_path)
+    output_path = os.path.join(output_dir, input_name)
+    frame_dir = None
+    if need_animation:
+        if not serial:
+            print('It must be under serial mode if animation results are required, so serial flag is set to True!')
+            serial = True
+        frame_dir = os.path.join(output_dir, input_name[:input_name.find('.')])
+        if not os.path.exists(frame_dir):
+            os.mkdir(frame_dir)
+    patch_size = 32
+    stroke_num = 8
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    net_g = network.Painter(5, stroke_num, 256, 8, 3, 3).to(device)
+    net_g.load_state_dict(torch.load(model_path))
+    net_g.eval()
+    for param in net_g.parameters():
+        param.requires_grad = False
+    brush_large_vertical = read_img('brush/brush_large_vertical.png', 'L').to(device)
+    brush_large_horizontal = read_img('brush/brush_large_horizontal.png', 'L').to(device)
+    meta_brushes = torch.cat(
+        [brush_large_vertical, brush_large_horizontal], dim=0)
+    with torch.no_grad():
+        original_img = read_img(input_path, 'RGB', resize_h, resize_w).to(device)
+        original_h, original_w = original_img.shape[-2:]
+        K = max(math.ceil(math.log2(max(original_h, original_w) / patch_size)), 0)
+        original_img_pad_size = patch_size * (2 ** K)
+        original_img_pad = pad(original_img, original_img_pad_size, original_img_pad_size)
+        final_result = torch.zeros_like(original_img_pad).to(device)
+        for layer in range(0, K + 1):
+            layer_size = patch_size * (2 ** layer)
+            img = F.interpolate(original_img_pad, (layer_size, layer_size))
+            result = F.interpolate(final_result, (patch_size * (2 ** layer), patch_size * (2 ** layer)))
+            img_patch = F.unfold(img, (patch_size, patch_size), stride=(patch_size, patch_size))
+            result_patch = F.unfold(result, (patch_size, patch_size),
+                                    stride=(patch_size, patch_size))
+            # There are patch_num * patch_num patches in total
+            patch_num = (layer_size - patch_size) // patch_size + 1
+            # img_patch, result_patch: b, 3 * output_size * output_size, h * w
+            img_patch = img_patch.permute(0, 2, 1).contiguous().view(-1, 3, patch_size, patch_size).contiguous()
+            result_patch = result_patch.permute(0, 2, 1).contiguous().view(
+                -1, 3, patch_size, patch_size).contiguous()
+            shape_param, stroke_decision = net_g(img_patch, result_patch)
+            stroke_decision = network.SignWithSigmoidGrad.apply(stroke_decision)
+            grid = shape_param[:, :, :2].view(img_patch.shape[0] * stroke_num, 1, 1, 2).contiguous()
+            img_temp = img_patch.unsqueeze(1).contiguous().repeat(1, stroke_num, 1, 1, 1).view(
+                img_patch.shape[0] * stroke_num, 3, patch_size, patch_size).contiguous()
+            color = F.grid_sample(img_temp, 2 * grid - 1, align_corners=False).view(
+                img_patch.shape[0], stroke_num, 3).contiguous()
+            stroke_param = torch.cat([shape_param, color], dim=-1)
+            # stroke_param: b * h * w, stroke_per_patch, param_per_stroke
+            # stroke_decision: b * h * w, stroke_per_patch, 1
+            param = stroke_param.view(1, patch_num, patch_num, stroke_num, 8).contiguous()
+            decision = stroke_decision.view(1, patch_num, patch_num, stroke_num).contiguous().bool()
+            # param: b, h, w, stroke_per_patch, 8
+            # decision: b, h, w, stroke_per_patch
+            param[..., :2] = param[..., :2] / 2 + 0.25
+            param[..., 2:4] = param[..., 2:4] / 2
+            if serial:
+                final_result = param2img_serial(param, decision, meta_brushes, final_result,
+                                                frame_dir, False, original_h, original_w)
+            else:
+                final_result = param2img_parallel(param, decision, meta_brushes, final_result)
+        border_size = original_img_pad_size // (2 * patch_num)
+        img = F.interpolate(original_img_pad, (patch_size * (2 ** layer), patch_size * (2 ** layer)))
+        result = F.interpolate(final_result, (patch_size * (2 ** layer), patch_size * (2 ** layer)))
+        img = F.pad(img, [patch_size // 2, patch_size // 2, patch_size // 2, patch_size // 2,
+                          0, 0, 0, 0])
+        result = F.pad(result, [patch_size // 2, patch_size // 2, patch_size // 2, patch_size // 2,
+                                0, 0, 0, 0])
+        img_patch = F.unfold(img, (patch_size, patch_size), stride=(patch_size, patch_size))
+        result_patch = F.unfold(result, (patch_size, patch_size), stride=(patch_size, patch_size))
+        final_result = F.pad(final_result, [border_size, border_size, border_size, border_size, 0, 0, 0, 0])
+        h = (img.shape[2] - patch_size) // patch_size + 1
+        w = (img.shape[3] - patch_size) // patch_size + 1
+        # img_patch, result_patch: b, 3 * output_size * output_size, h * w
+        img_patch = img_patch.permute(0, 2, 1).contiguous().view(-1, 3, patch_size, patch_size).contiguous()
+        result_patch = result_patch.permute(0, 2, 1).contiguous().view(-1, 3, patch_size, patch_size).contiguous()
+        shape_param, stroke_decision = net_g(img_patch, result_patch)
+        grid = shape_param[:, :, :2].view(img_patch.shape[0] * stroke_num, 1, 1, 2).contiguous()
+        img_temp = img_patch.unsqueeze(1).contiguous().repeat(1, stroke_num, 1, 1, 1).view(
+            img_patch.shape[0] * stroke_num, 3, patch_size, patch_size).contiguous()
+        color = F.grid_sample(img_temp, 2 * grid - 1, align_corners=False).view(
+            img_patch.shape[0], stroke_num, 3).contiguous()
+        stroke_param = torch.cat([shape_param, color], dim=-1)
+        # stroke_param: b * h * w, stroke_per_patch, param_per_stroke
+        # stroke_decision: b * h * w, stroke_per_patch, 1
+        param = stroke_param.view(1, h, w, stroke_num, 8).contiguous()
+        decision = stroke_decision.view(1, h, w, stroke_num).contiguous().bool()
+        # param: b, h, w, stroke_per_patch, 8
+        # decision: b, h, w, stroke_per_patch
+        param[..., :2] = param[..., :2] / 2 + 0.25
+        param[..., 2:4] = param[..., 2:4] / 2
+        if serial:
+            final_result = param2img_serial(param, decision, meta_brushes, final_result,
+                                            frame_dir, True, original_h, original_w)
+        else:
+            final_result = param2img_parallel(param, decision, meta_brushes, final_result)
+        final_result = final_result[:, :, border_size:-border_size, border_size:-border_size]
+        final_result = crop(final_result, original_h, original_w)
+        save_img(final_result[0], output_path)
+if __name__ == '__main__':
+    main(input_path='input/chicago.jpg',
+         model_path='model.pth',
+         output_dir='output/',
+         need_animation=False,  # whether need intermediate results for animation.
+         resize_h=None,         # resize original input to this size. None means do not resize.
+         resize_w=None,         # resize original input to this size. None means do not resize.
+         serial=False)          # if need animation, serial must be True.

inference/input/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

inference/input/temp.txt ADDED Viewed

File without changes

inference/morphology.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class Erosion2d(nn.Module):
+    def __init__(self, m=1):
+        super(Erosion2d, self).__init__()
+        self.m = m
+        self.pad = [m, m, m, m]
+        self.unfold = nn.Unfold(2 * m + 1, padding=0, stride=1)
+    def forward(self, x):
+        batch_size, c, h, w = x.shape
+        x_pad = F.pad(x, pad=self.pad, mode='constant', value=1e9)
+        channel = self.unfold(x_pad).view(batch_size, c, -1, h, w)
+        result = torch.min(channel, dim=2)[0]
+        return result
+def erosion(x, m=1):
+    b, c, h, w = x.shape
+    x_pad = F.pad(x, pad=[m, m, m, m], mode='constant', value=1e9)
+    channel = nn.functional.unfold(x_pad, 2 * m + 1, padding=0, stride=1).view(b, c, -1, h, w)
+    result = torch.min(channel, dim=2)[0]
+    return result
+class Dilation2d(nn.Module):
+    def __init__(self, m=1):
+        super(Dilation2d, self).__init__()
+        self.m = m
+        self.pad = [m, m, m, m]
+        self.unfold = nn.Unfold(2 * m + 1, padding=0, stride=1)
+    def forward(self, x):
+        batch_size, c, h, w = x.shape
+        x_pad = F.pad(x, pad=self.pad, mode='constant', value=-1e9)
+        channel = self.unfold(x_pad).view(batch_size, c, -1, h, w)
+        result = torch.max(channel, dim=2)[0]
+        return result
+def dilation(x, m=1):
+    b, c, h, w = x.shape
+    x_pad = F.pad(x, pad=[m, m, m, m], mode='constant', value=-1e9)
+    channel = nn.functional.unfold(x_pad, 2 * m + 1, padding=0, stride=1).view(b, c, -1, h, w)
+    result = torch.max(channel, dim=2)[0]
+    return result

inference/network.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import torch
+import torch.nn as nn
+class SignWithSigmoidGrad(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        result = (x > 0).float()
+        sigmoid_result = torch.sigmoid(x)
+        ctx.save_for_backward(sigmoid_result)
+        return result
+    @staticmethod
+    def backward(ctx, grad_result):
+        (sigmoid_result,) = ctx.saved_tensors
+        if ctx.needs_input_grad[0]:
+            grad_input = grad_result * sigmoid_result * (1 - sigmoid_result)
+        else:
+            grad_input = None
+        return grad_input
+class Painter(nn.Module):
+    def __init__(self, param_per_stroke, total_strokes, hidden_dim, n_heads=8, n_enc_layers=3, n_dec_layers=3):
+        super().__init__()
+        self.enc_img = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(3, 32, 3, 1),
+            nn.BatchNorm2d(32),
+            nn.ReLU(True),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(32, 64, 3, 2),
+            nn.BatchNorm2d(64),
+            nn.ReLU(True),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(64, 128, 3, 2),
+            nn.BatchNorm2d(128),
+            nn.ReLU(True))
+        self.enc_canvas = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(3, 32, 3, 1),
+            nn.BatchNorm2d(32),
+            nn.ReLU(True),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(32, 64, 3, 2),
+            nn.BatchNorm2d(64),
+            nn.ReLU(True),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(64, 128, 3, 2),
+            nn.BatchNorm2d(128),
+            nn.ReLU(True))
+        self.conv = nn.Conv2d(128 * 2, hidden_dim, 1)
+        self.transformer = nn.Transformer(hidden_dim, n_heads, n_enc_layers, n_dec_layers)
+        self.linear_param = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.ReLU(True),
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.ReLU(True),
+            nn.Linear(hidden_dim, param_per_stroke))
+        self.linear_decider = nn.Linear(hidden_dim, 1)
+        self.query_pos = nn.Parameter(torch.rand(total_strokes, hidden_dim))
+        self.row_embed = nn.Parameter(torch.rand(8, hidden_dim // 2))
+        self.col_embed = nn.Parameter(torch.rand(8, hidden_dim // 2))
+    def forward(self, img, canvas):
+        b, _, H, W = img.shape
+        img_feat = self.enc_img(img)
+        canvas_feat = self.enc_canvas(canvas)
+        h, w = img_feat.shape[-2:]
+        feat = torch.cat([img_feat, canvas_feat], dim=1)
+        feat_conv = self.conv(feat)
+        pos_embed = torch.cat([
+            self.col_embed[:w].unsqueeze(0).contiguous().repeat(h, 1, 1),
+            self.row_embed[:h].unsqueeze(1).contiguous().repeat(1, w, 1),
+        ], dim=-1).flatten(0, 1).unsqueeze(1)
+        hidden_state = self.transformer(pos_embed + feat_conv.flatten(2).permute(2, 0, 1).contiguous(),
+                                        self.query_pos.unsqueeze(1).contiguous().repeat(1, b, 1))
+        hidden_state = hidden_state.permute(1, 0, 2).contiguous()
+        param = self.linear_param(hidden_state)
+        decision = self.linear_decider(hidden_state)
+        return param, decision

train/brush/brush_large_horizontal.png ADDED Viewed

train/brush/brush_large_vertical.png ADDED Viewed

train/brush/brush_small_horizontal.png ADDED Viewed

train/brush/brush_small_vertical.png ADDED Viewed

train/data/__init__.py ADDED Viewed

	@@ -0,0 +1,94 @@

+"""This package includes all the modules related to data loading and preprocessing
+ To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset.
+ You need to implement four functions:
+    -- <__init__>:                      initialize the class, first call BaseDataset.__init__(self, opt).
+    -- <__len__>:                       return the size of dataset.
+    -- <__getitem__>:                   get a data point from data loader.
+    -- <modify_commandline_options>:    (optionally) add dataset-specific options and set default options.
+Now you can use the dataset class by specifying flag '--dataset_mode dummy'.
+See our template dataset class 'template_dataset.py' for more details.
+"""
+import importlib
+import torch.utils.data
+from data.base_dataset import BaseDataset
+def find_dataset_using_name(dataset_name):
+    """Import the module "data/[dataset_name]_dataset.py".
+    In the file, the class called DatasetNameDataset() will
+    be instantiated. It has to be a subclass of BaseDataset,
+    and it is case-insensitive.
+    """
+    dataset_filename = "data." + dataset_name + "_dataset"
+    datasetlib = importlib.import_module(dataset_filename)
+    dataset = None
+    target_dataset_name = dataset_name.replace('_', '') + 'dataset'
+    for name, cls in datasetlib.__dict__.items():
+        if name.lower() == target_dataset_name.lower() \
+           and issubclass(cls, BaseDataset):
+            dataset = cls
+    if dataset is None:
+        raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name))
+    return dataset
+def get_option_setter(dataset_name):
+    """Return the static method <modify_commandline_options> of the dataset class."""
+    dataset_class = find_dataset_using_name(dataset_name)
+    return dataset_class.modify_commandline_options
+def create_dataset(opt):
+    """Create a dataset given the option.
+    This function wraps the class CustomDatasetDataLoader.
+        This is the main interface between this package and 'train.py'/'test.py'
+    Example:
+        >>> from data import create_dataset
+        >>> dataset = create_dataset(opt)
+    """
+    data_loader = CustomDatasetDataLoader(opt)
+    dataset = data_loader.load_data()
+    return dataset
+class CustomDatasetDataLoader():
+    """Wrapper class of Dataset class that performs multi-threaded data loading"""
+    def __init__(self, opt):
+        """Initialize this class
+        Step 1: create a dataset instance given the name [dataset_mode]
+        Step 2: create a multi-threaded data loader.
+        """
+        self.opt = opt
+        dataset_class = find_dataset_using_name(opt.dataset_mode)
+        self.dataset = dataset_class(opt)
+        print("dataset [%s] was created" % type(self.dataset).__name__)
+        self.dataloader = torch.utils.data.DataLoader(
+            self.dataset,
+            batch_size=opt.batch_size,
+            shuffle=not opt.serial_batches,
+            num_workers=int(opt.num_threads),
+            drop_last=True)
+    def load_data(self):
+        return self
+    def __len__(self):
+        """Return the number of data in the dataset"""
+        return min(len(self.dataset), self.opt.max_dataset_size)
+    def __iter__(self):
+        """Return a batch of data"""
+        for i, data in enumerate(self.dataloader):
+            if i * self.opt.batch_size >= self.opt.max_dataset_size:
+                break
+            yield data

train/data/base_dataset.py ADDED Viewed

	@@ -0,0 +1,153 @@

+"""This module implements an abstract base class (ABC) 'BaseDataset' for datasets.
+It also includes common transformation functions (e.g., get_transform, __scale_width), which can be later used in subclasses.
+"""
+import random
+import numpy as np
+import torch.utils.data as data
+from PIL import Image
+import torchvision.transforms as transforms
+from abc import ABC, abstractmethod
+class BaseDataset(data.Dataset, ABC):
+    """This class is an abstract base class (ABC) for datasets.
+    To create a subclass, you need to implement the following four functions:
+    -- <__init__>:                      initialize the class, first call BaseDataset.__init__(self, opt).
+    -- <__len__>:                       return the size of dataset.
+    -- <__getitem__>:                   get a data point.
+    -- <modify_commandline_options>:    (optionally) add dataset-specific options and set default options.
+    """
+    def __init__(self, opt):
+        """Initialize the class; save the options in the class
+        Parameters:
+            opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions
+        """
+        self.opt = opt
+        self.root = opt.dataroot
+    @staticmethod
+    def modify_commandline_options(parser, is_train):
+        """Add new dataset-specific options, and rewrite default values for existing options.
+        Parameters:
+            parser          -- original option parser
+            is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options.
+        Returns:
+            the modified parser.
+        """
+        return parser
+    @abstractmethod
+    def __len__(self):
+        """Return the total number of images in the dataset."""
+        return 0
+    @abstractmethod
+    def __getitem__(self, index):
+        """Return a data point and its metadata information.
+        Parameters:
+            index - - a random integer for data indexing
+        Returns:
+            a dictionary of data with their names. It ususally contains the data itself and its metadata information.
+        """
+        pass
+def get_params(opt, size):
+    w, h = size
+    new_h = h
+    new_w = w
+    if opt.preprocess == 'resize_and_crop':
+        new_h = new_w = opt.load_size
+    elif opt.preprocess == 'scale_width_and_crop':
+        new_w = opt.load_size
+        new_h = opt.load_size * h // w
+    x = random.randint(0, np.maximum(0, new_w - opt.crop_size))
+    y = random.randint(0, np.maximum(0, new_h - opt.crop_size))
+    flip = random.random() > 0.5
+    return {'crop_pos': (x, y), 'flip': flip}
+def get_transform(opt, params=None, grayscale=False, method=Image.BICUBIC, convert=True):
+    transform_list = []
+    if grayscale:
+        transform_list.append(transforms.Grayscale(1))
+    if 'resize' in opt.preprocess:
+        osize = [opt.load_size, opt.load_size]
+        transform_list.append(transforms.Resize(osize, method))
+    elif 'scale_width' in opt.preprocess:
+        transform_list.append(transforms.Lambda(lambda img: __scale_width(img, opt.load_size, opt.crop_size, method)))
+    if 'crop' in opt.preprocess:
+        if params is None:
+            transform_list.append(transforms.RandomCrop(opt.crop_size))
+        else:
+            transform_list.append(transforms.Lambda(lambda img: __crop(img, params['crop_pos'], opt.crop_size)))
+    if opt.preprocess == 'none':
+        transform_list.append(transforms.Lambda(lambda img: __make_power_2(img, base=4, method=method)))
+    if not opt.no_flip:
+        if params is None:
+            transform_list.append(transforms.RandomHorizontalFlip())
+        elif params['flip']:
+            transform_list.append(transforms.Lambda(lambda img: __flip(img, params['flip'])))
+    if convert:
+        transform_list += [transforms.ToTensor()]
+    return transforms.Compose(transform_list)
+def __make_power_2(img, base, method=Image.BICUBIC):
+    ow, oh = img.size
+    h = int(round(oh / base) * base)
+    w = int(round(ow / base) * base)
+    if h == oh and w == ow:
+        return img
+    __print_size_warning(ow, oh, w, h)
+    return img.resize((w, h), method)
+def __scale_width(img, target_size, crop_size, method=Image.BICUBIC):
+    ow, oh = img.size
+    if ow == target_size and oh >= crop_size:
+        return img
+    w = target_size
+    h = int(max(target_size * oh / ow, crop_size))
+    return img.resize((w, h), method)
+def __crop(img, pos, size):
+    ow, oh = img.size
+    x1, y1 = pos
+    tw = th = size
+    if (ow > tw or oh > th):
+        return img.crop((x1, y1, x1 + tw, y1 + th))
+    return img
+def __flip(img, flip):
+    if flip:
+        return img.transpose(Image.FLIP_LEFT_RIGHT)
+    return img
+def __print_size_warning(ow, oh, w, h):
+    """Print warning information about image size(only print once)"""
+    if not hasattr(__print_size_warning, 'has_printed'):
+        print("The image size needs to be a multiple of 4. "
+              "The loaded image size was (%d, %d), so it was adjusted to "
+              "(%d, %d). This adjustment will be done to all images "
+              "whose sizes are not multiples of 4" % (ow, oh, w, h))
+        __print_size_warning.has_printed = True

train/data/null_dataset.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from data.base_dataset import BaseDataset
+import os
+class NullDataset(BaseDataset):
+    def __init__(self, opt):
+        BaseDataset.__init__(self, opt)
+    def __getitem__(self, index):
+        return {'A_paths': os.path.join(self.opt.dataroot, '%d.jpg' % index)}
+    def __len__(self):
+        """Return the total number of images in the dataset."""
+        return self.opt.max_dataset_size

train/models/__init__.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""This package contains modules related to objective functions, optimizations, and network architectures.
+To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel.
+You need to implement the following five functions:
+    -- <__init__>:                      initialize the class; first call BaseModel.__init__(self, opt).
+    -- <set_input>:                     unpack data from dataset and apply preprocessing.
+    -- <forward>:                       produce intermediate results.
+    -- <optimize_parameters>:           calculate loss, gradients, and update network weights.
+    -- <modify_commandline_options>:    (optionally) add model-specific options and set default options.
+In the function <__init__>, you need to define four lists:
+    -- self.loss_names (str list):          specify the training losses that you want to plot and save.
+    -- self.model_names (str list):         define networks used in our training.
+    -- self.visual_names (str list):        specify the images that you want to display and save.
+    -- self.optimizers (optimizer list):    define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage.
+Now you can use the model class by specifying flag '--model dummy'.
+See our template model class 'template_model.py' for more details.
+"""
+import importlib
+from models.base_model import BaseModel
+def find_model_using_name(model_name):
+    """Import the module "models/[model_name]_model.py".
+    In the file, the class called DatasetNameModel() will
+    be instantiated. It has to be a subclass of BaseModel,
+    and it is case-insensitive.
+    """
+    model_filename = "models." + model_name + "_model"
+    modellib = importlib.import_module(model_filename)
+    model = None
+    target_model_name = model_name.replace('_', '') + 'model'
+    for name, cls in modellib.__dict__.items():
+        if name.lower() == target_model_name.lower() \
+           and issubclass(cls, BaseModel):
+            model = cls
+    if model is None:
+        print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name))
+        exit(0)
+    return model
+def get_option_setter(model_name):
+    """Return the static method <modify_commandline_options> of the model class."""
+    model_class = find_model_using_name(model_name)
+    return model_class.modify_commandline_options
+def create_model(opt):
+    """Create a model given the option.
+    This function warps the class CustomDatasetDataLoader.
+    This is the main interface between this package and 'train.py'/'test.py'
+    Example:
+        >>> from models import create_model
+        >>> model = create_model(opt)
+    """
+    model = find_model_using_name(opt.model)
+    instance = model(opt)
+    print("model [%s] was created" % type(instance).__name__)
+    return instance

train/models/base_model.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import os
+import torch
+from collections import OrderedDict
+from abc import ABC, abstractmethod
+from . import networks
+class BaseModel(ABC):
+    """This class is an abstract base class (ABC) for models.
+    To create a subclass, you need to implement the following five functions:
+        -- <__init__>:                      initialize the class; first call BaseModel.__init__(self, opt).
+        -- <set_input>:                     unpack data from dataset and apply preprocessing.
+        -- <forward>:                       produce intermediate results.
+        -- <optimize_parameters>:           calculate losses, gradients, and update network weights.
+        -- <modify_commandline_options>:    (optionally) add model-specific options and set default options.
+    """
+    def __init__(self, opt):
+        """Initialize the BaseModel class.
+        Parameters:
+            opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions
+        When creating your custom class, you need to implement your own initialization.
+        In this function, you should first call <BaseModel.__init__(self, opt)>
+        Then, you need to define four lists:
+            -- self.loss_names (str list):          specify the training losses that you want to plot and save.
+            -- self.model_names (str list):         define networks used in our training.
+            -- self.visual_names (str list):        specify the images that you want to display and save.
+            -- self.optimizers (optimizer list):    define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an example.
+        """
+        self.opt = opt
+        self.gpu_ids = opt.gpu_ids
+        self.isTrain = opt.isTrain
+        self.device = torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu')  # get device name: CPU or GPU
+        self.save_dir = os.path.join(opt.checkpoints_dir, opt.name)  # save all the checkpoints to save_dir
+        if opt.preprocess != 'scale_width':  # with [scale_width], input images might have different sizes, which hurts the performance of cudnn.benchmark.
+            torch.backends.cudnn.benchmark = True
+        self.loss_names = []
+        self.model_names = []
+        self.visual_names = []
+        self.optimizers = []
+        self.image_paths = []
+        self.metric = 0  # used for learning rate policy 'plateau'
+    @staticmethod
+    def modify_commandline_options(parser, is_train):
+        """Add new model-specific options, and rewrite default values for existing options.
+        Parameters:
+            parser          -- original option parser
+            is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options.
+        Returns:
+            the modified parser.
+        """
+        return parser
+    @abstractmethod
+    def set_input(self, input):
+        """Unpack input data from the dataloader and perform necessary pre-processing steps.
+        Parameters:
+            input (dict): includes the data itself and its metadata information.
+        """
+        pass
+    @abstractmethod
+    def forward(self):
+        """Run forward pass; called by both functions <optimize_parameters> and <test>."""
+        pass
+    @abstractmethod
+    def optimize_parameters(self):
+        """Calculate losses, gradients, and update network weights; called in every training iteration"""
+        pass
+    def setup(self, opt):
+        """Load and print networks; create schedulers
+        Parameters:
+            opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
+        """
+        if self.isTrain:
+            self.schedulers = [networks.get_scheduler(optimizer, opt) for optimizer in self.optimizers]
+        if not self.isTrain or opt.continue_train:
+            load_suffix = 'iter_%d' % opt.load_iter if opt.load_iter > 0 else opt.epoch
+            self.load_networks(load_suffix)
+        self.print_networks(opt.verbose)
+    def eval(self):
+        """Make models eval mode during test time"""
+        for name in self.model_names:
+            if isinstance(name, str):
+                net = getattr(self, 'net_' + name)
+                net.eval()
+    def test(self):
+        """Forward function used in test time.
+        This function wraps <forward> function in no_grad() so we don't save intermediate steps for backprop
+        It also calls <compute_visuals> to produce additional visualization results
+        """
+        with torch.no_grad():
+            self.forward()
+            self.compute_visuals()
+    def compute_visuals(self):
+        """Calculate additional output images for visdom and HTML visualization"""
+        pass
+    def get_image_paths(self):
+        """ Return image paths that are used to load current data"""
+        return self.image_paths
+    def update_learning_rate(self):
+        """Update learning rates for all the networks; called at the end of every epoch"""
+        old_lr = self.optimizers[0].param_groups[0]['lr']
+        for scheduler in self.schedulers:
+            if self.opt.lr_policy == 'plateau':
+                scheduler.step(self.metric)
+            else:
+                scheduler.step()
+        lr = self.optimizers[0].param_groups[0]['lr']
+        print('learning rate %.7f -> %.7f' % (old_lr, lr))
+    def get_current_visuals(self):
+        """Return visualization images. train.py will display these images with visdom, and save the images to a HTML"""
+        visual_ret = OrderedDict()
+        for name in self.visual_names:
+            if isinstance(name, str):
+                visual_ret[name] = getattr(self, name)
+        return visual_ret
+    def get_current_losses(self):
+        """Return traning losses / errors. train.py will print out these errors on console, and save them to a file"""
+        errors_ret = OrderedDict()
+        for name in self.loss_names:
+            if isinstance(name, str):
+                errors_ret[name] = float(getattr(self, 'loss_' + name))  # float(...) works for both scalar tensor and float number
+        return errors_ret
+    def save_networks(self, epoch):
+        """Save all the networks to the disk.
+        Parameters:
+            epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name)
+        """
+        for name in self.model_names:
+            if isinstance(name, str):
+                save_filename = '%s_net_%s.pth' % (epoch, name)
+                save_path = os.path.join(self.save_dir, save_filename)
+                net = getattr(self, 'net_' + name)
+                if len(self.gpu_ids) > 0 and torch.cuda.is_available():
+                    torch.save(net.module.cpu().state_dict(), save_path)
+                    net.cuda(self.gpu_ids[0])
+                else:
+                    torch.save(net.cpu().state_dict(), save_path)
+    def __patch_instance_norm_state_dict(self, state_dict, module, keys, i=0):
+        """Fix InstanceNorm checkpoints incompatibility (prior to 0.4)"""
+        key = keys[i]
+        if i + 1 == len(keys):  # at the end, pointing to a parameter/buffer
+            if module.__class__.__name__.startswith('InstanceNorm') and \
+                    (key == 'running_mean' or key == 'running_var'):
+                if getattr(module, key) is None:
+                    state_dict.pop('.'.join(keys))
+            if module.__class__.__name__.startswith('InstanceNorm') and \
+               (key == 'num_batches_tracked'):
+                state_dict.pop('.'.join(keys))
+        else:
+            self.__patch_instance_norm_state_dict(state_dict, getattr(module, key), keys, i + 1)
+    def load_networks(self, epoch):
+        """Load all the networks from the disk.
+        Parameters:
+            epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name)
+        """
+        for name in self.model_names:
+            if isinstance(name, str):
+                load_filename = '%s_net_%s.pth' % (epoch, name)
+                load_path = os.path.join(self.save_dir, load_filename)
+                net = getattr(self, 'net_' + name)
+                if isinstance(net, torch.nn.DataParallel):
+                    net = net.module
+                print('loading the model from %s' % load_path)
+                # if you are using PyTorch newer than 0.4 (e.g., built from
+                # GitHub source), you can remove str() on self.device
+                state_dict = torch.load(load_path, map_location=str(self.device))
+                if hasattr(state_dict, '_metadata'):
+                    del state_dict._metadata
+                # patch InstanceNorm checkpoints prior to 0.4
+                for key in list(state_dict.keys()):  # need to copy keys here because we mutate in loop
+                    self.__patch_instance_norm_state_dict(state_dict, net, key.split('.'))
+                net.load_state_dict(state_dict)
+    def print_networks(self, verbose):
+        """Print the total number of parameters in the network and (if verbose) network architecture
+        Parameters:
+            verbose (bool) -- if verbose: print the network architecture
+        """
+        print('---------- Networks initialized -------------')
+        for name in self.model_names:
+            if isinstance(name, str):
+                net = getattr(self, 'net_' + name)
+                num_params = 0
+                for param in net.parameters():
+                    num_params += param.numel()
+                if verbose:
+                    print(net)
+                print('[Network %s] Total number of parameters : %.3f M' % (name, num_params / 1e6))
+        print('-----------------------------------------------')
+    def set_requires_grad(self, nets, requires_grad=False):
+        """Set requies_grad=Fasle for all the networks to avoid unnecessary computations
+        Parameters:
+            nets (network list)   -- a list of networks
+            requires_grad (bool)  -- whether the networks require gradients or not
+        """
+        if not isinstance(nets, list):
+            nets = [nets]
+        for net in nets:
+            if net is not None:
+                for param in net.parameters():
+                    param.requires_grad = requires_grad

train/models/networks.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import torch
+import torch.nn as nn
+from torch.nn import init
+from torch.optim import lr_scheduler
+def get_scheduler(optimizer, opt):
+    if opt.lr_policy == 'linear':
+        def lambda_rule(epoch):
+            # lr_l = 1.0 - max(0, epoch + opt.epoch_count - opt.n_epochs) / float(opt.n_epochs_decay + 1)
+            lr_l = 0.3 ** max(0, (epoch + opt.epoch_count - opt.n_epochs) // 5)
+            return lr_l
+        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule)
+    elif opt.lr_policy == 'step':
+        scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.1)
+    elif opt.lr_policy == 'plateau':
+        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5)
+    elif opt.lr_policy == 'cosine':
+        scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=opt.n_epochs, eta_min=0)
+    else:
+        return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy)
+    return scheduler
+def init_weights(net, init_type='normal', init_gain=0.02):
+    def init_func(m):
+        classname = m.__class__.__name__
+        if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
+            if init_type == 'normal':
+                init.normal_(m.weight.data, 0.0, init_gain)
+            elif init_type == 'xavier':
+                init.xavier_normal_(m.weight.data, gain=init_gain)
+            elif init_type == 'kaiming':
+                init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
+            elif init_type == 'orthogonal':
+                init.orthogonal_(m.weight.data, gain=init_gain)
+            else:
+                raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
+            if hasattr(m, 'bias') and m.bias is not None:
+                init.constant_(m.bias.data, 0.0)
+        elif classname.find('BatchNorm2d') != -1:
+            init.normal_(m.weight.data, 1.0, init_gain)
+            init.constant_(m.bias.data, 0.0)
+    print('initialize network with %s' % init_type)
+    net.apply(init_func)
+def init_net(net, init_type='normal', init_gain=0.02, gpu_ids=()):
+    if len(gpu_ids) > 0:
+        assert (torch.cuda.is_available())
+        net.to(gpu_ids[0])
+        net = torch.nn.DataParallel(net, gpu_ids)  # multi-GPUs
+    init_weights(net, init_type, init_gain=init_gain)
+    return net
+class SignWithSigmoidGrad(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        result = (x > 0).float()
+        sigmoid_result = torch.sigmoid(x)
+        ctx.save_for_backward(sigmoid_result)
+        return result
+    @staticmethod
+    def backward(ctx, grad_result):
+        (sigmoid_result,) = ctx.saved_tensors
+        if ctx.needs_input_grad[0]:
+            grad_input = grad_result * sigmoid_result * (1 - sigmoid_result)
+        else:
+            grad_input = None
+        return grad_input
+class Painter(nn.Module):
+    def __init__(self, param_per_stroke, total_strokes, hidden_dim, n_heads=8, n_enc_layers=3, n_dec_layers=3):
+        super().__init__()
+        self.enc_img = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(3, 32, 3, 1),
+            nn.BatchNorm2d(32),
+            nn.ReLU(True),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(32, 64, 3, 2),
+            nn.BatchNorm2d(64),
+            nn.ReLU(True),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(64, 128, 3, 2),
+            nn.BatchNorm2d(128),
+            nn.ReLU(True))
+        self.enc_canvas = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(3, 32, 3, 1),
+            nn.BatchNorm2d(32),
+            nn.ReLU(True),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(32, 64, 3, 2),
+            nn.BatchNorm2d(64),
+            nn.ReLU(True),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(64, 128, 3, 2),
+            nn.BatchNorm2d(128),
+            nn.ReLU(True))
+        self.conv = nn.Conv2d(128 * 2, hidden_dim, 1)
+        self.transformer = nn.Transformer(hidden_dim, n_heads, n_enc_layers, n_dec_layers)
+        self.linear_param = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.ReLU(True),
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.ReLU(True),
+            nn.Linear(hidden_dim, param_per_stroke))
+        self.linear_decider = nn.Linear(hidden_dim, 1)
+        self.query_pos = nn.Parameter(torch.rand(total_strokes, hidden_dim))
+        self.row_embed = nn.Parameter(torch.rand(8, hidden_dim // 2))
+        self.col_embed = nn.Parameter(torch.rand(8, hidden_dim // 2))
+    def forward(self, img, canvas):
+        b, _, H, W = img.shape
+        img_feat = self.enc_img(img)
+        canvas_feat = self.enc_canvas(canvas)
+        h, w = img_feat.shape[-2:]
+        feat = torch.cat([img_feat, canvas_feat], dim=1)
+        feat_conv = self.conv(feat)
+        pos_embed = torch.cat([
+            self.col_embed[:w].unsqueeze(0).contiguous().repeat(h, 1, 1),
+            self.row_embed[:h].unsqueeze(1).contiguous().repeat(1, w, 1),
+        ], dim=-1).flatten(0, 1).unsqueeze(1)
+        hidden_state = self.transformer(pos_embed + feat_conv.flatten(2).permute(2, 0, 1).contiguous(),
+                                        self.query_pos.unsqueeze(1).contiguous().repeat(1, b, 1))
+        hidden_state = hidden_state.permute(1, 0, 2).contiguous()
+        param = self.linear_param(hidden_state)
+        s = hidden_state.shape[1]
+        grid = param[:, :, :2].view(b * s, 1, 1, 2).contiguous()
+        img_temp = img.unsqueeze(1).contiguous().repeat(1, s, 1, 1, 1).view(b * s, 3, H, W).contiguous()
+        color = nn.functional.grid_sample(img_temp, 2 * grid - 1, align_corners=False).view(b, s, 3).contiguous()
+        decision = self.linear_decider(hidden_state)
+        return torch.cat([param, color, color, torch.rand(b, s, 1, device=img.device)], dim=-1), decision

train/models/painter_model.py ADDED Viewed

	@@ -0,0 +1,247 @@

+import torch
+import numpy as np
+from .base_model import BaseModel
+from . import networks
+from util import morphology
+from scipy.optimize import linear_sum_assignment
+from PIL import Image
+class PainterModel(BaseModel):
+    @staticmethod
+    def modify_commandline_options(parser, is_train=True):
+        parser.set_defaults(dataset_mode='null')
+        parser.add_argument('--used_strokes', type=int, default=8,
+                            help='actually generated strokes number')
+        parser.add_argument('--num_blocks', type=int, default=3,
+                            help='number of transformer blocks for stroke generator')
+        parser.add_argument('--lambda_w', type=float, default=10.0, help='weight for w loss of stroke shape')
+        parser.add_argument('--lambda_pixel', type=float, default=10.0, help='weight for pixel-level L1 loss')
+        parser.add_argument('--lambda_gt', type=float, default=1.0, help='weight for ground-truth loss')
+        parser.add_argument('--lambda_decision', type=float, default=10.0, help='weight for stroke decision loss')
+        parser.add_argument('--lambda_recall', type=float, default=10.0, help='weight of recall for stroke decision loss')
+        return parser
+    def __init__(self, opt):
+        BaseModel.__init__(self, opt)
+        self.loss_names = ['pixel', 'gt', 'w', 'decision']
+        self.visual_names = ['old', 'render', 'rec']
+        self.model_names = ['g']
+        self.d = 12  # xc, yc, w, h, theta, R0, G0, B0, R2, G2, B2, A
+        self.d_shape = 5
+        def read_img(img_path, img_type='RGB'):
+            img = Image.open(img_path).convert(img_type)
+            img = np.array(img)
+            if img.ndim == 2:
+                img = np.expand_dims(img, axis=-1)
+            img = img.transpose((2, 0, 1))
+            img = torch.from_numpy(img).unsqueeze(0).float() / 255.
+            return img
+        brush_large_vertical = read_img('brush/brush_large_vertical.png', 'L').to(self.device)
+        brush_large_horizontal = read_img('brush/brush_large_horizontal.png', 'L').to(self.device)
+        self.meta_brushes = torch.cat(
+            [brush_large_vertical, brush_large_horizontal], dim=0)
+        net_g = networks.Painter(self.d_shape, opt.used_strokes, opt.ngf,
+                                 n_enc_layers=opt.num_blocks, n_dec_layers=opt.num_blocks)
+        self.net_g = networks.init_net(net_g, opt.init_type, opt.init_gain, self.gpu_ids)
+        self.old = None
+        self.render = None
+        self.rec = None
+        self.gt_param = None
+        self.pred_param = None
+        self.gt_decision = None
+        self.pred_decision = None
+        self.patch_size = 32
+        self.loss_pixel = torch.tensor(0., device=self.device)
+        self.loss_gt = torch.tensor(0., device=self.device)
+        self.loss_w = torch.tensor(0., device=self.device)
+        self.loss_decision = torch.tensor(0., device=self.device)
+        self.criterion_pixel = torch.nn.L1Loss().to(self.device)
+        self.criterion_decision = torch.nn.BCEWithLogitsLoss(pos_weight=torch.tensor(opt.lambda_recall)).to(self.device)
+        if self.isTrain:
+            self.optimizer = torch.optim.Adam(self.net_g.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
+            self.optimizers.append(self.optimizer)
+    def param2stroke(self, param, H, W):
+        # param: b, 12
+        b = param.shape[0]
+        param_list = torch.split(param, 1, dim=1)
+        x0, y0, w, h, theta = [item.squeeze(-1) for item in param_list[:5]]
+        R0, G0, B0, R2, G2, B2, _ = param_list[5:]
+        sin_theta = torch.sin(torch.acos(torch.tensor(-1., device=param.device)) * theta)
+        cos_theta = torch.cos(torch.acos(torch.tensor(-1., device=param.device)) * theta)
+        index = torch.full((b,), -1, device=param.device)
+        index[h > w] = 0
+        index[h <= w] = 1
+        brush = self.meta_brushes[index.long()]
+        alphas = torch.cat([brush, brush, brush], dim=1)
+        alphas = (alphas > 0).float()
+        t = torch.arange(0, brush.shape[2], device=param.device).unsqueeze(0) / brush.shape[2]
+        color_map = torch.stack([R0 * (1 - t) + R2 * t, G0 * (1 - t) + G2 * t, B0 * (1 - t) + B2 * t], dim=1)
+        color_map = color_map.unsqueeze(-1).repeat(1, 1, 1, brush.shape[3])
+        brush = brush * color_map
+        warp_00 = cos_theta / w
+        warp_01 = sin_theta * H / (W * w)
+        warp_02 = (1 - 2 * x0) * cos_theta / w + (1 - 2 * y0) * sin_theta * H / (W * w)
+        warp_10 = -sin_theta * W / (H * h)
+        warp_11 = cos_theta / h
+        warp_12 = (1 - 2 * y0) * cos_theta / h - (1 - 2 * x0) * sin_theta * W / (H * h)
+        warp_0 = torch.stack([warp_00, warp_01, warp_02], dim=1)
+        warp_1 = torch.stack([warp_10, warp_11, warp_12], dim=1)
+        warp = torch.stack([warp_0, warp_1], dim=1)
+        grid = torch.nn.functional.affine_grid(warp, torch.Size((b, 3, H, W)), align_corners=False)
+        brush = torch.nn.functional.grid_sample(brush, grid, align_corners=False)
+        alphas = torch.nn.functional.grid_sample(alphas, grid, align_corners=False)
+        return brush, alphas
+    def set_input(self, input_dict):
+        self.image_paths = input_dict['A_paths']
+        with torch.no_grad():
+            old_param = torch.rand(self.opt.batch_size // 4, self.opt.used_strokes, self.d, device=self.device)
+            old_param[:, :, :4] = old_param[:, :, :4] * 0.5 + 0.2
+            old_param[:, :, -4:-1] = old_param[:, :, -7:-4]
+            old_param = old_param.view(-1, self.d).contiguous()
+            foregrounds, alphas = self.param2stroke(old_param, self.patch_size * 2, self.patch_size * 2)
+            foregrounds = morphology.Dilation2d(m=1)(foregrounds)
+            alphas = morphology.Erosion2d(m=1)(alphas)
+            foregrounds = foregrounds.view(self.opt.batch_size // 4, self.opt.used_strokes, 3, self.patch_size * 2,
+                                           self.patch_size * 2).contiguous()
+            alphas = alphas.view(self.opt.batch_size // 4, self.opt.used_strokes, 3, self.patch_size * 2,
+                                 self.patch_size * 2).contiguous()
+            old = torch.zeros(self.opt.batch_size // 4, 3, self.patch_size * 2, self.patch_size * 2, device=self.device)
+            for i in range(self.opt.used_strokes):
+                foreground = foregrounds[:, i, :, :, :]
+                alpha = alphas[:, i, :, :, :]
+                old = foreground * alpha + old * (1 - alpha)
+            old = old.view(self.opt.batch_size // 4, 3, 2, self.patch_size, 2, self.patch_size).contiguous()
+            old = old.permute(0, 2, 4, 1, 3, 5).contiguous()
+            self.old = old.view(self.opt.batch_size, 3, self.patch_size, self.patch_size).contiguous()
+            gt_param = torch.rand(self.opt.batch_size, self.opt.used_strokes, self.d, device=self.device)
+            gt_param[:, :, :4] = gt_param[:, :, :4] * 0.5 + 0.2
+            gt_param[:, :, -4:-1] = gt_param[:, :, -7:-4]
+            self.gt_param = gt_param[:, :, :self.d_shape]
+            gt_param = gt_param.view(-1, self.d).contiguous()
+            foregrounds, alphas = self.param2stroke(gt_param, self.patch_size, self.patch_size)
+            foregrounds = morphology.Dilation2d(m=1)(foregrounds)
+            alphas = morphology.Erosion2d(m=1)(alphas)
+            foregrounds = foregrounds.view(self.opt.batch_size, self.opt.used_strokes, 3, self.patch_size,
+                                           self.patch_size).contiguous()
+            alphas = alphas.view(self.opt.batch_size, self.opt.used_strokes, 3, self.patch_size,
+                                 self.patch_size).contiguous()
+            self.render = self.old.clone()
+            gt_decision = torch.ones(self.opt.batch_size, self.opt.used_strokes, device=self.device)
+            for i in range(self.opt.used_strokes):
+                foreground = foregrounds[:, i, :, :, :]
+                alpha = alphas[:, i, :, :, :]
+                for j in range(i):
+                    iou = (torch.sum(alpha * alphas[:, j, :, :, :], dim=(-3, -2, -1)) + 1e-5) / (
+                            torch.sum(alphas[:, j, :, :, :], dim=(-3, -2, -1)) + 1e-5)
+                    gt_decision[:, i] = ((iou < 0.75) | (~gt_decision[:, j].bool())).float() * gt_decision[:, i]
+                decision = gt_decision[:, i].view(self.opt.batch_size, 1, 1, 1).contiguous()
+                self.render = foreground * alpha * decision + self.render * (1 - alpha * decision)
+            self.gt_decision = gt_decision
+    def forward(self):
+        param, decisions = self.net_g(self.render, self.old)
+        # stroke_param: b, stroke_per_patch, param_per_stroke
+        # decision: b, stroke_per_patch, 1
+        self.pred_decision = decisions.view(-1, self.opt.used_strokes).contiguous()
+        self.pred_param = param[:, :, :self.d_shape]
+        param = param.view(-1, self.d).contiguous()
+        foregrounds, alphas = self.param2stroke(param, self.patch_size, self.patch_size)
+        foregrounds = morphology.Dilation2d(m=1)(foregrounds)
+        alphas = morphology.Erosion2d(m=1)(alphas)
+        # foreground, alpha: b * stroke_per_patch, 3, output_size, output_size
+        foregrounds = foregrounds.view(-1, self.opt.used_strokes, 3, self.patch_size, self.patch_size)
+        alphas = alphas.view(-1, self.opt.used_strokes, 3, self.patch_size, self.patch_size)
+        # foreground, alpha: b, stroke_per_patch, 3, output_size, output_size
+        decisions = networks.SignWithSigmoidGrad.apply(decisions.view(-1, self.opt.used_strokes, 1, 1, 1).contiguous())
+        self.rec = self.old.clone()
+        for j in range(foregrounds.shape[1]):
+            foreground = foregrounds[:, j, :, :, :]
+            alpha = alphas[:, j, :, :, :]
+            decision = decisions[:, j, :, :, :]
+            self.rec = foreground * alpha * decision + self.rec * (1 - alpha * decision)
+    @staticmethod
+    def get_sigma_sqrt(w, h, theta):
+        sigma_00 = w * (torch.cos(theta) ** 2) / 2 + h * (torch.sin(theta) ** 2) / 2
+        sigma_01 = (w - h) * torch.cos(theta) * torch.sin(theta) / 2
+        sigma_11 = h * (torch.cos(theta) ** 2) / 2 + w * (torch.sin(theta) ** 2) / 2
+        sigma_0 = torch.stack([sigma_00, sigma_01], dim=-1)
+        sigma_1 = torch.stack([sigma_01, sigma_11], dim=-1)
+        sigma = torch.stack([sigma_0, sigma_1], dim=-2)
+        return sigma
+    @staticmethod
+    def get_sigma(w, h, theta):
+        sigma_00 = w * w * (torch.cos(theta) ** 2) / 4 + h * h * (torch.sin(theta) ** 2) / 4
+        sigma_01 = (w * w - h * h) * torch.cos(theta) * torch.sin(theta) / 4
+        sigma_11 = h * h * (torch.cos(theta) ** 2) / 4 + w * w * (torch.sin(theta) ** 2) / 4
+        sigma_0 = torch.stack([sigma_00, sigma_01], dim=-1)
+        sigma_1 = torch.stack([sigma_01, sigma_11], dim=-1)
+        sigma = torch.stack([sigma_0, sigma_1], dim=-2)
+        return sigma
+    def gaussian_w_distance(self, param_1, param_2):
+        mu_1, w_1, h_1, theta_1 = torch.split(param_1, (2, 1, 1, 1), dim=-1)
+        w_1 = w_1.squeeze(-1)
+        h_1 = h_1.squeeze(-1)
+        theta_1 = torch.acos(torch.tensor(-1., device=param_1.device)) * theta_1.squeeze(-1)
+        trace_1 = (w_1 ** 2 + h_1 ** 2) / 4
+        mu_2, w_2, h_2, theta_2 = torch.split(param_2, (2, 1, 1, 1), dim=-1)
+        w_2 = w_2.squeeze(-1)
+        h_2 = h_2.squeeze(-1)
+        theta_2 = torch.acos(torch.tensor(-1., device=param_2.device)) * theta_2.squeeze(-1)
+        trace_2 = (w_2 ** 2 + h_2 ** 2) / 4
+        sigma_1_sqrt = self.get_sigma_sqrt(w_1, h_1, theta_1)
+        sigma_2 = self.get_sigma(w_2, h_2, theta_2)
+        trace_12 = torch.matmul(torch.matmul(sigma_1_sqrt, sigma_2), sigma_1_sqrt)
+        trace_12 = torch.sqrt(trace_12[..., 0, 0] + trace_12[..., 1, 1] + 2 * torch.sqrt(
+            trace_12[..., 0, 0] * trace_12[..., 1, 1] - trace_12[..., 0, 1] * trace_12[..., 1, 0]))
+        return torch.sum((mu_1 - mu_2) ** 2, dim=-1) + trace_1 + trace_2 - 2 * trace_12
+    def optimize_parameters(self):
+        self.forward()
+        self.loss_pixel = self.criterion_pixel(self.rec, self.render) * self.opt.lambda_pixel
+        cur_valid_gt_size = 0
+        with torch.no_grad():
+            r_idx = []
+            c_idx = []
+            for i in range(self.gt_param.shape[0]):
+                is_valid_gt = self.gt_decision[i].bool()
+                valid_gt_param = self.gt_param[i, is_valid_gt]
+                cost_matrix_l1 = torch.cdist(self.pred_param[i], valid_gt_param, p=1)
+                pred_param_broad = self.pred_param[i].unsqueeze(1).contiguous().repeat(
+                    1, valid_gt_param.shape[0], 1)
+                valid_gt_param_broad = valid_gt_param.unsqueeze(0).contiguous().repeat(
+                    self.pred_param.shape[1], 1, 1)
+                cost_matrix_w = self.gaussian_w_distance(pred_param_broad, valid_gt_param_broad)
+                decision = self.pred_decision[i]
+                cost_matrix_decision = (1 - decision).unsqueeze(-1).repeat(1, valid_gt_param.shape[0])
+                r, c = linear_sum_assignment((cost_matrix_l1 + cost_matrix_w + cost_matrix_decision).cpu())
+                r_idx.append(torch.tensor(r + self.pred_param.shape[1] * i, device=self.device))
+                c_idx.append(torch.tensor(c + cur_valid_gt_size, device=self.device))
+                cur_valid_gt_size += valid_gt_param.shape[0]
+            r_idx = torch.cat(r_idx, dim=0)
+            c_idx = torch.cat(c_idx, dim=0)
+            paired_gt_decision = torch.zeros(self.gt_decision.shape[0] * self.gt_decision.shape[1], device=self.device)
+            paired_gt_decision[r_idx] = 1.
+        all_valid_gt_param = self.gt_param[self.gt_decision.bool(), :]
+        all_pred_param = self.pred_param.view(-1, self.pred_param.shape[2]).contiguous()
+        all_pred_decision = self.pred_decision.view(-1).contiguous()
+        paired_gt_param = all_valid_gt_param[c_idx, :]
+        paired_pred_param = all_pred_param[r_idx, :]
+        self.loss_gt = self.criterion_pixel(paired_pred_param, paired_gt_param) * self.opt.lambda_gt
+        self.loss_w = self.gaussian_w_distance(paired_pred_param, paired_gt_param).mean() * self.opt.lambda_w
+        self.loss_decision = self.criterion_decision(all_pred_decision, paired_gt_decision) * self.opt.lambda_decision
+        loss = self.loss_pixel + self.loss_gt + self.loss_w + self.loss_decision
+        loss.backward()
+        self.optimizer.step()
+        self.optimizer.zero_grad()

train/options/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """This package options includes option modules: training options, test options, and basic options (used in both training and test)."""

train/options/base_options.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import argparse
+import os
+from util import util
+import torch
+import models
+import data
+class BaseOptions:
+    """This class defines options used during both training and test time.
+    It also implements several helper functions such as parsing, printing, and saving the options.
+    It also gathers additional options defined in <modify_commandline_options> functions
+    in both dataset class and model class.
+    """
+    def __init__(self):
+        """Reset the class; indicates the class hasn't been initialized"""
+        self.initialized = False
+    def initialize(self, parser):
+        """Define the common options that are used in both training and test."""
+        # basic parameters
+        parser.add_argument('--dataroot', default='.',
+                            help='path to images (should have sub-folders trainA, trainB, valA, valB, etc)')
+        parser.add_argument('--name', type=str, default='experiment_name',
+                            help='name of the experiment. It decides where to store samples and models')
+        parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0  0,1,2, 0,2. use -1 for CPU')
+        parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here')
+        # model parameters
+        parser.add_argument('--model', type=str, default='painter',
+                            help='chooses which model to use.')
+        parser.add_argument('--input_nc', type=int, default=3,
+                            help='# of input image channels: 3 for RGB and 1 for grayscale')
+        parser.add_argument('--output_nc', type=int, default=3,
+                            help='# of output image channels: 3 for RGB and 1 for grayscale')
+        parser.add_argument('--ngf', type=int, default=256, help='# of gen filters in the first conv layer')
+        parser.add_argument('--layer_num', type=int, default=2, help='# of resnet block for generator')
+        parser.add_argument('--init_type', type=str, default='normal',
+                            help='network initialization [normal | xavier | kaiming | orthogonal]')
+        parser.add_argument('--init_gain', type=float, default=0.02,
+                            help='scaling factor for normal, xavier and orthogonal.')
+        # dataset parameters
+        parser.add_argument('--dataset_mode', type=str, default='single',
+                            help='chooses how datasets are loaded.')
+        parser.add_argument('--direction', type=str, default='AtoB', help='AtoB or BtoA')
+        parser.add_argument('--serial_batches', action='store_true',
+                            help='if true, takes images in order to make batches, otherwise takes them randomly')
+        parser.add_argument('--num_threads', default=4, type=int, help='# threads for loading data')
+        parser.add_argument('--batch_size', type=int, default=1, help='input batch size')
+        parser.add_argument('--load_size', type=int, default=286, help='scale images to this size')
+        parser.add_argument('--crop_size', type=int, default=256, help='then crop to this size')
+        parser.add_argument('--max_dataset_size', type=int, default=float("inf"),
+                            help='Maximum number of samples allowed per dataset. If the dataset directory contains '
+                                 'more than max_dataset_size, only a subset is loaded.')
+        parser.add_argument('--preprocess', type=str, default='resize_and_crop',
+                            help='scaling and cropping of images at load time [resize_and_crop | crop | scale_width | '
+                                 'scale_width_and_crop | none]')
+        parser.add_argument('--no_flip', action='store_true',
+                            help='if specified, do not flip the images for data augmentation')
+        parser.add_argument('--display_winsize', type=int, default=256,
+                            help='display window size for both visdom and HTML')
+        # additional parameters
+        parser.add_argument('--epoch', type=str, default='latest',
+                            help='which epoch to load? set to latest to use latest cached model')
+        parser.add_argument('--load_iter', type=int, default='0',
+                            help='which iteration to load? if load_iter > 0, the code will load models by iter_['
+                                 'load_iter]; otherwise, the code will load models by [epoch]')
+        parser.add_argument('--verbose', action='store_true', help='if specified, print more debugging information')
+        parser.add_argument('--suffix', default='', type=str,
+                            help='customized suffix: opt.name = opt.name + suffix')
+        self.initialized = True
+        return parser
+    def gather_options(self):
+        """Initialize our parser with basic options(only once).
+        Add additional model-specific and dataset-specific options.
+        These options are defined in the <modify_commandline_options> function
+        in model and dataset classes.
+        """
+        if not self.initialized:  # check if it has been initialized
+            parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+            parser = self.initialize(parser)
+        # get the basic options
+        opt, _ = parser.parse_known_args()
+        # modify model-related parser options
+        model_name = opt.model
+        model_option_setter = models.get_option_setter(model_name)
+        parser = model_option_setter(parser, self.isTrain)
+        opt, _ = parser.parse_known_args()  # parse again with new defaults
+        # modify dataset-related parser options
+        dataset_name = opt.dataset_mode
+        dataset_option_setter = data.get_option_setter(dataset_name)
+        parser = dataset_option_setter(parser, self.isTrain)
+        # save and return the parser
+        self.parser = parser
+        return parser.parse_args()
+    def print_options(self, opt):
+        """Print and save options
+        It will print both current options and default values(if different).
+        It will save options into a text file / [checkpoints_dir] / opt.txt
+        """
+        message = ''
+        message += '----------------- Options ---------------\n'
+        for k, v in sorted(vars(opt).items()):
+            comment = ''
+            default = self.parser.get_default(k)
+            if v != default:
+                comment = '\t[default: %s]' % str(default)
+            message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment)
+        message += '----------------- End -------------------'
+        print(message)
+        # save to the disk
+        expr_dir = os.path.join(opt.checkpoints_dir, opt.name)
+        util.mkdirs(expr_dir)
+        file_name = os.path.join(expr_dir, '{}_opt.txt'.format(opt.phase))
+        with open(file_name, 'wt') as opt_file:
+            opt_file.write(message)
+            opt_file.write('\n')
+    def parse(self):
+        """Parse our options, create checkpoints directory suffix, and set up gpu device."""
+        opt = self.gather_options()
+        opt.isTrain = self.isTrain   # train or test
+        # process opt.suffix
+        if opt.suffix:
+            suffix = ('_' + opt.suffix.format(**vars(opt))) if opt.suffix != '' else ''
+            opt.name = opt.name + suffix
+        self.print_options(opt)
+        # set gpu ids
+        str_ids = opt.gpu_ids.split(',')
+        opt.gpu_ids = []
+        for str_id in str_ids:
+            id = int(str_id)
+            if id >= 0:
+                opt.gpu_ids.append(id)
+        if len(opt.gpu_ids) > 0:
+            torch.cuda.set_device(opt.gpu_ids[0])
+        self.opt = opt
+        return self.opt

train/options/test_options.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from .base_options import BaseOptions
+class TestOptions(BaseOptions):
+    """This class includes test options.
+    It also includes shared options defined in BaseOptions.
+    """
+    def initialize(self, parser):
+        parser = BaseOptions.initialize(self, parser)  # define shared options
+        parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.')
+        parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images')
+        parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
+        # Dropout and Batch norm has different behavior during training and test.
+        parser.add_argument('--eval', action='store_true', help='use eval mode during test time.')
+        parser.add_argument('--num_test', type=int, default=50, help='how many test images to run')
+        # rewrite devalue values
+        parser.set_defaults(model='test')
+        # To avoid cropping, the load_size should be the same as crop_size
+        parser.set_defaults(load_size=parser.get_default('crop_size'))
+        self.isTrain = False
+        return parser

train/options/train_options.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from .base_options import BaseOptions
+class TrainOptions(BaseOptions):
+    """This class includes training options.
+    It also includes shared options defined in BaseOptions.
+    """
+    def initialize(self, parser):
+        parser = BaseOptions.initialize(self, parser)
+        # visdom and HTML visualization parameters
+        parser.add_argument('--display_freq', type=int, default=40,
+                            help='frequency of showing training results on screen')
+        parser.add_argument('--display_ncols', type=int, default=4,
+                            help='if positive, display all images in a single visdom web panel '
+                                 'with certain number of images per row.')
+        parser.add_argument('--display_id', type=int, default=1, help='window id of the web display')
+        parser.add_argument('--display_server', type=str, default="http://localhost",
+                            help='visdom server of the web display')
+        parser.add_argument('--display_env', type=str, default='main',
+                            help='visdom display environment name (default is "main")')
+        parser.add_argument('--display_port', type=int, default=8097, help='visdom port of the web display')
+        parser.add_argument('--update_html_freq', type=int, default=1000,
+                            help='frequency of saving training results to html')
+        parser.add_argument('--print_freq', type=int, default=10,
+                            help='frequency of showing training results on console')
+        parser.add_argument('--no_html', action='store_true',
+                            help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/')
+        # network saving and loading parameters
+        parser.add_argument('--save_latest_freq', type=int, default=5000, help='frequency of saving the latest results')
+        parser.add_argument('--save_epoch_freq', type=int, default=5,
+                            help='frequency of saving checkpoints at the end of epochs')
+        parser.add_argument('--save_by_iter', action='store_true', help='whether saves model by iteration')
+        parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model')
+        parser.add_argument('--epoch_count', type=int, default=1,
+                            help='the starting epoch count, we save the model '
+                                 'by <epoch_count>, <epoch_count>+<save_latest_freq>, ...')
+        parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc')
+        # training parameters
+        parser.add_argument('--n_epochs', type=int, default=100, help='number of epochs with the initial learning rate')
+        parser.add_argument('--n_epochs_decay', type=int, default=100,
+                            help='number of epochs to linearly decay learning rate to zero')
+        parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam')
+        parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam')
+        parser.add_argument('--lr_policy', type=str, default='linear',
+                            help='learning rate policy. [linear | step | plateau | cosine]')
+        parser.add_argument('--lr_decay_iters', type=int, default=50,
+                            help='multiply by a gamma every lr_decay_iters iterations')
+        self.isTrain = True
+        return parser

train/train.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import time
+from options.train_options import TrainOptions
+from data import create_dataset
+from models import create_model
+from util.visualizer import Visualizer
+if __name__ == '__main__':
+    opt = TrainOptions().parse()   # get training options
+    dataset = create_dataset(opt)  # create a dataset given opt.dataset_mode and other options
+    dataset_size = len(dataset)    # get the number of images in the dataset.
+    print('The number of training images = %d' % dataset_size)
+    model = create_model(opt)      # create a model given opt.model and other options
+    model.setup(opt)               # regular setup: load and print networks; create schedulers
+    visualizer = Visualizer(opt)   # create a visualizer that display/save images and plots
+    total_iters = 0                # the total number of training iterations
+    for epoch in range(opt.epoch_count, opt.n_epochs + opt.n_epochs_decay + 1):
+        epoch_start_time = time.time()  # timer for entire epoch
+        iter_data_time = time.time()    # timer for data loading per iteration
+        epoch_iter = 0                  # the number of training iterations in current epoch, reset to 0 every epoch
+        visualizer.reset()              # reset visualizer: make sure it saves results to HTML at least once every epoch
+        for i, data in enumerate(dataset):  # inner loop within one epoch
+            iter_start_time = time.time()  # timer for computation per iteration
+            if total_iters % opt.print_freq == 0:
+                t_data = iter_start_time - iter_data_time
+            total_iters += opt.batch_size
+            epoch_iter += opt.batch_size
+            model.set_input(data)         # unpack data from dataset and apply preprocessing
+            model.optimize_parameters()   # calculate loss functions, get gradients, update network weights
+            if total_iters % opt.display_freq == 0:   # display images on visdom and save images to a HTML file
+                save_result = total_iters % opt.update_html_freq == 0
+                model.compute_visuals()
+                visualizer.display_current_results(model.get_current_visuals(), epoch, save_result)
+            if total_iters % opt.print_freq == 0:    # print training losses and save logging information to the disk
+                losses = model.get_current_losses()
+                t_comp = (time.time() - iter_start_time) / opt.batch_size
+                visualizer.print_current_losses(epoch, epoch_iter, losses, t_comp, t_data)
+                if opt.display_id > 0:
+                    visualizer.plot_current_losses(epoch, float(epoch_iter) / dataset_size, losses)
+            if total_iters % opt.save_latest_freq == 0:   # cache our latest model every <save_latest_freq> iterations
+                print('saving the latest model (epoch %d, total_iters %d)' % (epoch, total_iters))
+                save_suffix = 'iter_%d' % total_iters if opt.save_by_iter else 'latest'
+                model.save_networks(save_suffix)
+            iter_data_time = time.time()
+        if epoch % opt.save_epoch_freq == 0:              # cache our model every <save_epoch_freq> epochs
+            print('saving the model at the end of epoch %d, iters %d' % (epoch, total_iters))
+            model.save_networks('latest')
+            model.save_networks(epoch)
+        print('End of epoch %d / %d \t Time Taken: %d sec' % (epoch, opt.n_epochs + opt.n_epochs_decay,
+                                                              time.time() - epoch_start_time))
+        model.update_learning_rate()  # update learning rates in the beginning of every epoch.

train/train.sh ADDED Viewed

	@@ -0,0 +1,14 @@

+python train.py \
+--name painter \
+--gpu_ids 0 \
+--model painter \
+--dataset_mode null \
+--batch_size 64 \
+--display_freq 25 \
+--print_freq 25 \
+--lr 1e-4 \
+--init_type normal \
+--n_epochs 200 \
+--n_epochs_decay 20 \
+--max_dataset_size 16384 \
+--save_epoch_freq 20

train/util/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """This package includes a miscellaneous collection of useful helper functions."""

train/util/html.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import dominate
+from dominate.tags import meta, h3, table, tr, td, p, a, img, br
+import os
+class HTML:
+    """This HTML class allows us to save images and write texts into a single HTML file.
+     It consists of functions such as <add_header> (add a text header to the HTML file),
+     <add_images> (add a row of images to the HTML file), and <save> (save the HTML to the disk).
+     It is based on Python library 'dominate', a Python library for creating and manipulating HTML documents using a DOM API.
+    """
+    def __init__(self, web_dir, title, refresh=0):
+        """Initialize the HTML classes
+        Parameters:
+            web_dir (str) -- a directory that stores the webpage. HTML file will be created at <web_dir>/index.html; images will be saved at <web_dir/images/
+            title (str)   -- the webpage name
+            refresh (int) -- how often the website refresh itself; if 0; no refreshing
+        """
+        self.title = title
+        self.web_dir = web_dir
+        self.img_dir = os.path.join(self.web_dir, 'images')
+        if not os.path.exists(self.web_dir):
+            os.makedirs(self.web_dir)
+        if not os.path.exists(self.img_dir):
+            os.makedirs(self.img_dir)
+        self.doc = dominate.document(title=title)
+        if refresh > 0:
+            with self.doc.head:
+                meta(http_equiv="refresh", content=str(refresh))
+    def get_image_dir(self):
+        """Return the directory that stores images"""
+        return self.img_dir
+    def add_header(self, text):
+        """Insert a header to the HTML file
+        Parameters:
+            text (str) -- the header text
+        """
+        with self.doc:
+            h3(text)
+    def add_images(self, ims, txts, links, width=400):
+        """add images to the HTML file
+        Parameters:
+            ims (str list)   -- a list of image paths
+            txts (str list)  -- a list of image names shown on the website
+            links (str list) --  a list of hyperref links; when you click an image, it will redirect you to a new page
+        """
+        self.t = table(border=1, style="table-layout: fixed;")  # Insert a table
+        self.doc.add(self.t)
+        with self.t:
+            with tr():
+                for im, txt, link in zip(ims, txts, links):
+                    with td(style="word-wrap: break-word;", halign="center", valign="top"):
+                        with p():
+                            with a(href=os.path.join('images', link)):
+                                img(style="width:%dpx" % width, src=os.path.join('images', im))
+                            br()
+                            p(txt)
+    def save(self):
+        """save the current content to the HMTL file"""
+        html_file = '%s/index.html' % self.web_dir
+        f = open(html_file, 'wt')
+        f.write(self.doc.render())
+        f.close()
+if __name__ == '__main__':  # we show an example usage here.
+    html = HTML('web/', 'test_html')
+    html.add_header('hello world')
+    ims, txts, links = [], [], []
+    for n in range(4):
+        ims.append('image_%d.png' % n)
+        txts.append('text_%d' % n)
+        links.append('image_%d.png' % n)
+    html.add_images(ims, txts, links)
+    html.save()

train/util/morphology.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class Erosion2d(nn.Module):
+    def __init__(self, m=1):
+        super(Erosion2d, self).__init__()
+        self.m = m
+        self.pad = [m, m, m, m]
+        self.unfold = nn.Unfold(2 * m + 1, padding=0, stride=1)
+    def forward(self, x):
+        batch_size, c, h, w = x.shape
+        x_pad = F.pad(x, pad=self.pad, mode='constant', value=1e9)
+        for i in range(c):
+            channel = self.unfold(x_pad[:, [i], :, :])
+            channel = torch.min(channel, dim=1, keepdim=True)[0]
+            channel = channel.view([batch_size, 1, h, w])
+            x[:, [i], :, :] = channel
+        return x
+class Dilation2d(nn.Module):
+    def __init__(self, m=1):
+        super(Dilation2d, self).__init__()
+        self.m = m
+        self.pad = [m, m, m, m]
+        self.unfold = nn.Unfold(2 * m + 1, padding=0, stride=1)
+    def forward(self, x):
+        batch_size, c, h, w = x.shape
+        x_pad = F.pad(x, pad=self.pad, mode='constant', value=-1e9)
+        for i in range(c):
+            channel = self.unfold(x_pad[:, [i], :, :])
+            channel = torch.max(channel, dim=1, keepdim=True)[0]
+            channel = channel.view([batch_size, 1, h, w])
+            x[:, [i], :, :] = channel
+        return x

train/util/util.py ADDED Viewed

	@@ -0,0 +1,103 @@

+"""This module contains simple helper functions """
+from __future__ import print_function
+import torch
+import numpy as np
+from PIL import Image
+import os
+def tensor2im(input_image, imtype=np.uint8):
+    """"Converts a Tensor array into a numpy image array.
+    Parameters:
+        input_image (tensor) --  the input image tensor array
+        imtype (type)        --  the desired type of the converted numpy array
+    """
+    if not isinstance(input_image, np.ndarray):
+        if isinstance(input_image, torch.Tensor):  # get the data from a variable
+            image_tensor = input_image.data
+        else:
+            return input_image
+        image_numpy = image_tensor[0].cpu().float().numpy()  # convert it into a numpy array
+        if image_numpy.shape[0] == 1:  # grayscale to RGB
+            image_numpy = np.tile(image_numpy, (3, 1, 1))
+        image_numpy = np.transpose(image_numpy, (1, 2, 0)) * 255.0  # post-processing: transpose and scaling
+    else:  # if it is a numpy array
+        image_numpy = input_image * 255.
+    return image_numpy.astype(imtype)
+def diagnose_network(net, name='network'):
+    """Calculate and print the mean of average absolute(gradients)
+    Parameters:
+        net (torch network) -- Torch network
+        name (str) -- the name of the network
+    """
+    mean = 0.0
+    count = 0
+    for param in net.parameters():
+        if param.grad is not None:
+            mean += torch.mean(torch.abs(param.grad.data))
+            count += 1
+    if count > 0:
+        mean = mean / count
+    print(name)
+    print(mean)
+def save_image(image_numpy, image_path, aspect_ratio=1.0):
+    """Save a numpy image to the disk
+    Parameters:
+        image_numpy (numpy array) -- input numpy array
+        image_path (str)          -- the path of the image
+    """
+    image_pil = Image.fromarray(image_numpy)
+    h, w, _ = image_numpy.shape
+    if aspect_ratio > 1.0:
+        image_pil = image_pil.resize((h, int(w * aspect_ratio)), Image.BICUBIC)
+    if aspect_ratio < 1.0:
+        image_pil = image_pil.resize((int(h / aspect_ratio), w), Image.BICUBIC)
+    image_pil.save(image_path)
+def print_numpy(x, val=True, shp=False):
+    """Print the mean, min, max, median, std, and size of a numpy array
+    Parameters:
+        val (bool) -- if print the values of the numpy array
+        shp (bool) -- if print the shape of the numpy array
+    """
+    x = x.astype(np.float64)
+    if shp:
+        print('shape,', x.shape)
+    if val:
+        x = x.flatten()
+        print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % (
+            np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x)))
+def mkdirs(paths):
+    """create empty directories if they don't exist
+    Parameters:
+        paths (str list) -- a list of directory paths
+    """
+    if isinstance(paths, list) and not isinstance(paths, str):
+        for path in paths:
+            mkdir(path)
+    else:
+        mkdir(paths)
+def mkdir(path):
+    """create a single empty directory if it didn't exist
+    Parameters:
+        path (str) -- a single directory path
+    """
+    if not os.path.exists(path):
+        os.makedirs(path)

train/util/visualizer.py ADDED Viewed

	@@ -0,0 +1,224 @@

+import numpy as np
+import os
+import sys
+import ntpath
+import time
+from . import util, html
+from subprocess import Popen, PIPE
+if sys.version_info[0] == 2:
+    VisdomExceptionBase = Exception
+else:
+    VisdomExceptionBase = ConnectionError
+def save_images(webpage, visuals, image_path, aspect_ratio=1.0, width=256):
+    """Save images to the disk.
+    Parameters:
+        webpage (the HTML class) -- the HTML webpage class that stores these imaegs (see html.py for more details)
+        visuals (OrderedDict)    -- an ordered dictionary that stores (name, images (either tensor or numpy) ) pairs
+        image_path (str)         -- the string is used to create image paths
+        aspect_ratio (float)     -- the aspect ratio of saved images
+        width (int)              -- the images will be resized to width x width
+    This function will save images stored in 'visuals' to the HTML file specified by 'webpage'.
+    """
+    image_dir = webpage.get_image_dir()
+    short_path = ntpath.basename(image_path[0])
+    name = os.path.splitext(short_path)[0]
+    webpage.add_header(name)
+    ims, txts, links = [], [], []
+    for label, im_data in visuals.items():
+        im = util.tensor2im(im_data)
+        image_name = '%s_%s.png' % (name, label)
+        save_path = os.path.join(image_dir, image_name)
+        util.save_image(im, save_path, aspect_ratio=aspect_ratio)
+        ims.append(image_name)
+        txts.append(label)
+        links.append(image_name)
+    webpage.add_images(ims, txts, links, width=width)
+class Visualizer:
+    """This class includes several functions that can display/save images and print/save logging information.
+    It uses a Python library 'visdom' for display, and a Python library 'dominate' (wrapped in 'HTML') for creating
+    HTML files with images.
+    """
+    def __init__(self, opt):
+        """Initialize the Visualizer class
+        Parameters:
+            opt -- stores all the experiment flags; needs to be a subclass of BaseOptions
+        Step 1: Cache the training/test options
+        Step 2: connect to a visdom server
+        Step 3: create an HTML object for saveing HTML filters
+        Step 4: create a logging file to store training losses
+        """
+        self.opt = opt  # cache the option
+        self.display_id = opt.display_id
+        self.use_html = opt.isTrain and not opt.no_html
+        self.win_size = opt.display_winsize
+        self.name = opt.name
+        self.port = opt.display_port
+        self.saved = False
+        if self.display_id > 0:  # connect to a visdom server given <display_port> and <display_server>
+            import visdom
+            self.ncols = opt.display_ncols
+            self.vis = visdom.Visdom(server=opt.display_server, port=opt.display_port, env=opt.display_env)
+            if not self.vis.check_connection():
+                self.create_visdom_connections()
+        if self.use_html:  # create an HTML object at <checkpoints_dir>/web/; images will be saved under
+            # <checkpoints_dir>/web/images/
+            self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web')
+            self.img_dir = os.path.join(self.web_dir, 'images')
+            print('create web directory %s...' % self.web_dir)
+            util.mkdirs([self.web_dir, self.img_dir])
+        # create a logging file to store training losses
+        self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt')
+        with open(self.log_name, "a") as log_file:
+            now = time.strftime("%c")
+            log_file.write('================ Training Loss (%s) ================\n' % now)
+    def reset(self):
+        """Reset the self.saved status"""
+        self.saved = False
+    def create_visdom_connections(self):
+        """If the program could not connect to Visdom server, this function will start a new server at port <
+        self.port > """
+        cmd = sys.executable + ' -m visdom.server -p %d &>/dev/null &' % self.port
+        print('\n\nCould not connect to Visdom server. \n Trying to start a server....')
+        print('Command: %s' % cmd)
+        Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
+    def display_current_results(self, visuals, epoch, save_result):
+        """Display current results on visdom; save current results to an HTML file.
+        Parameters:
+            visuals (OrderedDict) - - dictionary of images to display or save
+            epoch (int) - - the current epoch
+            save_result (bool) - - if save the current results to an HTML file
+        """
+        if self.display_id > 0:  # show images in the browser using visdom
+            ncols = self.ncols
+            if ncols > 0:        # show all the images in one visdom panel
+                ncols = min(ncols, len(visuals))
+                h, w = next(iter(visuals.values())).shape[:2]
+                table_css = """<style>
+                        table {border-collapse: separate; border-spacing: 4px; white-space: nowrap; text-align: center}
+                        table td {width: % dpx; height: % dpx; padding: 4px; outline: 4px solid black}
+                        </style>""" % (w, h)  # create a table css
+                # create a table of images.
+                title = self.name
+                label_html = ''
+                label_html_row = ''
+                images = []
+                idx = 0
+                for label, image in visuals.items():
+                    image_numpy = util.tensor2im(image)
+                    label_html_row += '<td>%s</td>' % label
+                    images.append(image_numpy.transpose([2, 0, 1]))
+                    idx += 1
+                    if idx % ncols == 0:
+                        label_html += '<tr>%s</tr>' % label_html_row
+                        label_html_row = ''
+                white_image = np.ones_like(image_numpy.transpose([2, 0, 1])) * 255
+                while idx % ncols != 0:
+                    images.append(white_image)
+                    label_html_row += '<td></td>'
+                    idx += 1
+                if label_html_row != '':
+                    label_html += '<tr>%s</tr>' % label_html_row
+                try:
+                    self.vis.images(images, nrow=ncols, win=self.display_id + 1,
+                                    padding=2, opts=dict(title=title + ' images'))
+                    label_html = '<table>%s</table>' % label_html
+                    self.vis.text(table_css + label_html, win=self.display_id + 2,
+                                  opts=dict(title=title + ' labels'))
+                except VisdomExceptionBase:
+                    self.create_visdom_connections()
+            else:     # show each image in a separate visdom panel;
+                idx = 1
+                try:
+                    for label, image in visuals.items():
+                        image_numpy = util.tensor2im(image)
+                        self.vis.image(image_numpy.transpose([2, 0, 1]), opts=dict(title=label),
+                                       win=self.display_id + idx)
+                        idx += 1
+                except VisdomExceptionBase:
+                    self.create_visdom_connections()
+        if self.use_html and (save_result or not self.saved):  # save images to an HTML file if they haven't been saved.
+            self.saved = True
+            # save images to the disk
+            for label, image in visuals.items():
+                image_numpy = util.tensor2im(image)
+                img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.png' % (epoch, label))
+                util.save_image(image_numpy, img_path)
+            # update website
+            webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, refresh=1)
+            for n in range(epoch, 0, -1):
+                webpage.add_header('epoch [%d]' % n)
+                ims, txts, links = [], [], []
+                for label, image_numpy in visuals.items():
+                    image_numpy = util.tensor2im(image)
+                    img_path = 'epoch%.3d_%s.png' % (n, label)
+                    ims.append(img_path)
+                    txts.append(label)
+                    links.append(img_path)
+                webpage.add_images(ims, txts, links, width=self.win_size)
+            webpage.save()
+    def plot_current_losses(self, epoch, counter_ratio, losses):
+        """display the current losses on visdom display: dictionary of error labels and values
+        Parameters:
+            epoch (int)           -- current epoch
+            counter_ratio (float) -- progress (percentage) in the current epoch, between 0 to 1
+            losses (OrderedDict)  -- training losses stored in the format of (name, float) pairs
+        """
+        if not hasattr(self, 'plot_data'):
+            self.plot_data = {'X': [], 'Y': [], 'legend': list(losses.keys())}
+        self.plot_data['X'].append(epoch + counter_ratio)
+        self.plot_data['Y'].append([losses[k] for k in self.plot_data['legend']])
+        try:
+            self.vis.line(
+                X=np.stack([np.array(self.plot_data['X'])] * len(self.plot_data['legend']), 1),
+                Y=np.array(self.plot_data['Y']),
+                opts={
+                    'title': self.name + ' loss over time',
+                    'legend': self.plot_data['legend'],
+                    'xlabel': 'epoch',
+                    'ylabel': 'loss'},
+                win=self.display_id)
+        except VisdomExceptionBase:
+            self.create_visdom_connections()
+    # losses: same format as |losses| of plot_current_losses
+    def print_current_losses(self, epoch, iters, losses, t_comp, t_data):
+        """print current losses on console; also save the losses to the disk
+        Parameters:
+            epoch (int) -- current epoch
+            iters (int) -- current training iteration during this epoch (reset to 0 at the end of every epoch)
+            losses (OrderedDict) -- training losses stored in the format of (name, float) pairs
+            t_comp (float) -- computational time per data point (normalized by batch_size)
+            t_data (float) -- data loading time per data point (normalized by batch_size)
+        """
+        message = '(epoch: %d, iters: %d, time: %.3f, data: %.3f) ' % (epoch, iters, t_comp, t_data)
+        for k, v in losses.items():
+            message += '%s: %.3f ' % (k, v)
+        print(message)  # print the message
+        with open(self.log_name, "a") as log_file:
+            log_file.write('%s\n' % message)  # save the message