Spaces:

ZJYang
/

ZHIJI_cv_web_ui

Runtime error

App Files Files Community

zejunyang commited on Mar 24, 2023

Commit

9667e74

•

1 Parent(s): 9464d6e

update

Browse files

Files changed (38) hide show

.gitattributes +1 -0
NTED/NTED_module.py +101 -0
NTED/base_function.py +434 -0
NTED/base_module.py +115 -0
NTED/config.py +202 -0
NTED/demo_dataset.py +182 -0
NTED/edge_attention_layer.py +116 -0
NTED/extraction_distribution_model.py +62 -0
NTED/fashion_512.yaml +129 -0
NTED/nted_checkpoint.pt +3 -0
NTED/op/__init__.py +2 -0
NTED/op/conv2d_gradfix.py +227 -0
NTED/op/fused_act.py +127 -0
NTED/op/fused_bias_act.cpp +32 -0
NTED/op/fused_bias_act_kernel.cu +105 -0
NTED/op/upfirdn2d.cpp +31 -0
NTED/op/upfirdn2d.py +209 -0
NTED/op/upfirdn2d_kernel.cu +369 -0
app.py +20 -8
example/exp1.png +0 -0
example/exp2.png +0 -0
example/exp3.png +0 -0
example/exp4.png +0 -0
example/exp5.png +0 -0
example/exp6.png +0 -0
example/ref_img.png +3 -0
lite_openpose/body_bbox_detector.py +179 -0
lite_openpose/checkpoint_iter_370000.pth +3 -0
lite_openpose/modules/__init__.py +0 -0
lite_openpose/modules/conv.py +32 -0
lite_openpose/modules/get_parameters.py +23 -0
lite_openpose/modules/keypoints.py +201 -0
lite_openpose/modules/load_state.py +32 -0
lite_openpose/modules/loss.py +5 -0
lite_openpose/modules/one_euro_filter.py +51 -0
lite_openpose/modules/pose.py +118 -0
lite_openpose/pose2d_models/__init__.py +0 -0
lite_openpose/pose2d_models/with_mobilenet.py +123 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*/ref_img.png filter=lfs diff=lfs merge=lfs -text

NTED/NTED_module.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import cv2
+import numpy as np
+import torch
+import random
+import mediapipe as mp
+from lite_openpose.body_bbox_detector import BodyPoseEstimator
+from NTED.extraction_distribution_model import Generator
+from NTED.demo_dataset import DemoDataset
+from NTED.base_function import accumulate
+from NTED.config import Config
+def set_random_seed(seed):
+    r"""Set random seeds for everything.
+    Args:
+        seed (int): Random seed.
+        by_rank (bool):
+    """
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+class NTED():
+    def __init__(self):
+        super(NTED, self).__init__()
+        self.openpose_module = BodyPoseEstimator('cpu')
+        set_random_seed(0)
+        self.opt = Config('NTED/fashion_512.yaml', is_train=False)
+        net_G = Generator(**self.opt.gen.param).to('cpu')
+        net_G_ema = Generator(**self.opt.gen.param).to('cpu')
+        net_G_ema.eval()
+        accumulate(net_G_ema, net_G, 0)
+        checkpoint = torch.load('NTED/nted_checkpoint.pt', map_location=lambda storage, loc: storage)
+        net_G_ema.load_state_dict(checkpoint['net_G_ema'])
+        self.net_G = net_G_ema.eval()
+        self.data_loader = DemoDataset()
+        mp_hands = mp.solutions.hands
+        self.hands = mp_hands.Hands(static_image_mode=True, max_num_hands=2, min_detection_confidence=0.1)
+        self.ref_img = cv2.imread('example/ref_img.png')
+        self.ref_img = cv2.resize(self.ref_img, (352, 512))
+    def hand_pose_est(self, img):
+        results = self.hands.process(cv2.cvtColor(cv2.flip(img, 1), cv2.COLOR_BGR2RGB))
+        image_height, image_width, _ = img.shape
+        pose_data = []
+        if results.multi_hand_landmarks is not None:
+            for hand_landmarks in results.multi_hand_landmarks:
+                for joint_idx in range(21):
+                    pose_data.append([image_width - hand_landmarks.landmark[joint_idx].x * image_width, hand_landmarks.landmark[joint_idx].y * image_height])
+            if len(results.multi_hand_landmarks) == 2:
+                if results.multi_handedness[0].classification[0].label == 'Right':
+                    # 交换一下，先左手再右手
+                    tmp = pose_data[:21].copy()
+                    pose_data[:21] = pose_data[21:]
+                    pose_data[21:] = tmp
+            elif len(results.multi_hand_landmarks) == 1:
+                miss_hand = [[-1, -1] for _ in range(21)]
+                if results.multi_handedness[0].classification[0].label == 'Left':
+                    pose_data += miss_hand
+                else:
+                    pose_data = miss_hand + pose_data
+        else:
+            for _ in range(42):
+                pose_data.append([-1, -1])
+        pose_data = np.array(pose_data, dtype=np.int32)
+        return pose_data
+    def inference(self, img):
+        img = cv2.resize(img, (352, 512))
+        body_pose, bbox = self.openpose_module.detect_body_pose(img.copy())
+        hand_pose = self.hand_pose_est(img.copy())
+        data = self.data_loader.load_item(self.ref_img, body_pose[0], hand_pose)
+        output = self.net_G(
+            data['reference_image'],
+            data['target_skeleton'],
+        )
+        fake_image = output['fake_image'][0]
+        fake_image = self.data_loader.tensor2im(fake_image)
+        fake_image = cv2.resize(fake_image, (288, 480))
+        return data['skeleton_img'], fake_image

NTED/base_function.py ADDED Viewed

	@@ -0,0 +1,434 @@

+import sys
+import math
+import torch
+from torch import nn
+from torch.nn import functional as F
+from NTED.op import FusedLeakyReLU, fused_leaky_relu, upfirdn2d, conv2d_gradfix
+class ExtractionOperation(nn.Module):
+    def __init__(self, in_channel, num_label, match_kernel):
+        super(ExtractionOperation, self).__init__()
+        self.value_conv = EqualConv2d(in_channel, in_channel, match_kernel, 1, match_kernel//2, bias=True)
+        self.semantic_extraction_filter = EqualConv2d(in_channel, num_label, match_kernel, 1, match_kernel//2, bias=False)
+        self.softmax = nn.Softmax(dim=-1)
+        self.num_label = num_label
+    def forward(self, value, recoder):
+        key = value
+        b,c,h,w = value.shape
+        key = self.semantic_extraction_filter(self.feature_norm(key))
+        extraction_softmax = self.softmax(key.view(b, -1, h*w)) #bkm
+        values_flatten = self.value_conv(value).view(b, -1, h*w)
+        neural_textures = torch.einsum('bkm,bvm->bvk', extraction_softmax, values_flatten)
+        recoder['extraction_softmax'].insert(0, extraction_softmax)
+        recoder['neural_textures'].insert(0, neural_textures)
+        return neural_textures, extraction_softmax
+    def feature_norm(self, input_tensor):
+        input_tensor = input_tensor - input_tensor.mean(dim=1, keepdim=True)
+        norm = torch.norm(input_tensor, 2, 1, keepdim=True) + sys.float_info.epsilon
+        out = torch.div(input_tensor, norm)
+        return out
+class DistributionOperation(nn.Module):
+    def __init__(self, num_label, input_dim, match_kernel=3):
+        super(DistributionOperation, self).__init__()
+        self.semantic_distribution_filter = EqualConv2d(input_dim, num_label,
+                                    kernel_size=match_kernel,
+                                    stride=1,
+                                    padding=match_kernel//2)
+        self.num_label = num_label
+    def forward(self, query, extracted_feature, recoder):
+        b,c,h,w = query.shape
+        query = self.semantic_distribution_filter(query)
+        query_flatten = query.view(b, self.num_label, -1)
+        query_softmax = F.softmax(query_flatten, 1)
+        values_q = torch.einsum('bkm,bkv->bvm', query_softmax, extracted_feature.permute(0,2,1))
+        attn_out = values_q.view(b,-1,h,w)
+        recoder['semantic_distribution'].append(query)
+        return attn_out
+class EncoderLayer(nn.Sequential):
+    def __init__(
+        self,
+        in_channel,
+        out_channel,
+        kernel_size,
+        downsample=False,
+        blur_kernel=[1, 3, 3, 1],
+        bias=True,
+        activate=True,
+        use_extraction=False,
+        num_label=None,
+        match_kernel=None,
+        num_extractions=2
+    ):
+        super().__init__()
+        if downsample:
+            factor = 2
+            p = (len(blur_kernel) - factor) + (kernel_size - 1)
+            pad0 = (p + 1) // 2
+            pad1 = p // 2
+            self.blur = Blur(blur_kernel, pad=(pad0, pad1))
+            stride = 2
+            padding = 0
+        else:
+            self.blur = None
+            stride = 1
+            padding = kernel_size // 2
+        self.conv = EqualConv2d(
+                in_channel,
+                out_channel,
+                kernel_size,
+                padding=padding,
+                stride=stride,
+                bias=bias and not activate,
+            )
+        self.activate = FusedLeakyReLU(out_channel, bias=bias) if activate else None
+        self.use_extraction = use_extraction
+        if self.use_extraction:
+            self.extraction_operations = nn.ModuleList()
+            for _ in range(num_extractions):
+                self.extraction_operations.append(
+                    ExtractionOperation(
+                        out_channel,
+                        num_label,
+                        match_kernel
+                    )
+                )
+    def forward(self, input, recoder=None):
+        out = self.blur(input) if self.blur is not None else input
+        out = self.conv(out)
+        out = self.activate(out) if self.activate is not None else out
+        if self.use_extraction:
+            for extraction_operation in self.extraction_operations:
+                extraction_operation(out, recoder)
+        return out
+class DecoderLayer(nn.Module):
+    def __init__(
+        self,
+        in_channel,
+        out_channel,
+        kernel_size,
+        upsample=False,
+        blur_kernel=[1, 3, 3, 1],
+        bias=True,
+        activate=True,
+        use_distribution=True,
+        num_label=16,
+        match_kernel=3,
+    ):
+        super().__init__()
+        if upsample:
+            factor = 2
+            p = (len(blur_kernel) - factor) - (kernel_size - 1)
+            pad0 = (p + 1) // 2 + factor - 1
+            pad1 = p // 2 + 1
+            self.blur = Blur(blur_kernel, pad=(pad0, pad1), upsample_factor=factor)
+            self.conv = EqualTransposeConv2d(
+                in_channel,
+                out_channel,
+                kernel_size,
+                stride=2,
+                padding=0,
+                bias=bias and not activate,
+            )
+        else:
+            self.conv = EqualConv2d(
+                in_channel,
+                out_channel,
+                kernel_size,
+                stride=1,
+                padding=kernel_size//2,
+                bias=bias and not activate,
+            )
+            self.blur = None
+        self.distribution_operation = DistributionOperation(
+            num_label,
+            out_channel,
+            match_kernel=match_kernel
+        ) if use_distribution else None
+        self.activate = FusedLeakyReLU(out_channel, bias=bias) if activate else None
+        self.use_distribution = use_distribution
+    def forward(self, input, neural_texture=None, recoder=None):
+        out = self.conv(input)
+        out = self.blur(out) if self.blur is not None else out
+        if self.use_distribution and neural_texture is not None:
+            out_attn = self.distribution_operation(out, neural_texture, recoder)
+            out = (out + out_attn) / math.sqrt(2)
+        out = self.activate(out.contiguous()) if self.activate is not None else out
+        return out
+class EqualConv2d(nn.Module):
+    def __init__(
+        self, in_channel, out_channel, kernel_size, stride=1, padding=0, bias=True
+    ):
+        super().__init__()
+        self.weight = nn.Parameter(
+            torch.randn(out_channel, in_channel, kernel_size, kernel_size)
+        )
+        self.scale = 1 / math.sqrt(in_channel * kernel_size ** 2)
+        self.stride = stride
+        self.padding = padding
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(out_channel))
+        else:
+            self.bias = None
+    def forward(self, input):
+        out = conv2d_gradfix.conv2d(
+            input,
+            self.weight * self.scale,
+            bias=self.bias,
+            stride=self.stride,
+            padding=self.padding,
+        )
+        return out
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]},"
+            f" {self.weight.shape[2]}, stride={self.stride}, padding={self.padding})"
+        )
+class EqualTransposeConv2d(nn.Module):
+    def __init__(
+        self, in_channel, out_channel, kernel_size, stride=1, padding=0, bias=True
+    ):
+        super().__init__()
+        self.weight = nn.Parameter(
+            torch.randn(out_channel, in_channel, kernel_size, kernel_size)
+        )
+        self.scale = 1 / math.sqrt(in_channel * kernel_size ** 2)
+        self.stride = stride
+        self.padding = padding
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(out_channel))
+        else:
+            self.bias = None
+    def forward(self, input):
+        weight = self.weight.transpose(0,1)
+        out = conv2d_gradfix.conv_transpose2d(
+            input,
+            weight * self.scale,
+            bias=self.bias,
+            stride=self.stride,
+            padding=self.padding,
+        )
+        return out
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]},"
+            f" {self.weight.shape[2]}, stride={self.stride}, padding={self.padding})"
+        )
+class ToRGB(nn.Module):
+    def __init__(
+        self,
+        in_channel,
+        upsample=True,
+        blur_kernel=[1, 3, 3, 1]
+        ):
+        super().__init__()
+        if upsample:
+            self.upsample = Upsample(blur_kernel)
+        self.conv = EqualConv2d(in_channel, 3, 3, stride=1, padding=1)
+    def forward(self, input, skip=None):
+        out = self.conv(input)
+        if skip is not None:
+            skip = self.upsample(skip)
+            out = out + skip
+        return out
+class EqualLinear(nn.Module):
+    def __init__(
+        self, in_dim, out_dim, bias=True, bias_init=0, lr_mul=1, activation=None
+    ):
+        super().__init__()
+        self.weight = nn.Parameter(torch.randn(out_dim, in_dim).div_(lr_mul))
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(out_dim).fill_(bias_init))
+        else:
+            self.bias = None
+        self.activation = activation
+        self.scale = (1 / math.sqrt(in_dim)) * lr_mul
+        self.lr_mul = lr_mul
+    def forward(self, input):
+        if self.activation:
+            out = F.linear(input, self.weight * self.scale)
+            out = fused_leaky_relu(out, self.bias * self.lr_mul)
+        else:
+            out = F.linear(
+                input, self.weight * self.scale, bias=self.bias * self.lr_mul
+            )
+        return out
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]})"
+        )
+class Upsample(nn.Module):
+    def __init__(self, kernel, factor=2):
+        super().__init__()
+        self.factor = factor
+        kernel = make_kernel(kernel) * (factor ** 2)
+        self.register_buffer("kernel", kernel)
+        p = kernel.shape[0] - factor
+        pad0 = (p + 1) // 2 + factor - 1
+        pad1 = p // 2
+        self.pad = (pad0, pad1)
+    def forward(self, input):
+        out = upfirdn2d(input, self.kernel, up=self.factor, down=1, pad=self.pad)
+        return out
+class ResBlock(nn.Module):
+    def __init__(self, in_channel, out_channel, blur_kernel=[1, 3, 3, 1]):
+        super().__init__()
+        self.conv1 = ConvLayer(in_channel, in_channel, 3)
+        self.conv2 = ConvLayer(in_channel, out_channel, 3, downsample=True)
+        self.skip = ConvLayer(
+            in_channel, out_channel, 1, downsample=True, activate=False, bias=False
+        )
+    def forward(self, input):
+        out = self.conv1(input)
+        out = self.conv2(out)
+        skip = self.skip(input)
+        out = (out + skip) / math.sqrt(2)
+        return out
+class ConvLayer(nn.Sequential):
+    def __init__(
+        self,
+        in_channel,
+        out_channel,
+        kernel_size,
+        downsample=False,
+        blur_kernel=[1, 3, 3, 1],
+        bias=True,
+        activate=True,
+    ):
+        layers = []
+        if downsample:
+            factor = 2
+            p = (len(blur_kernel) - factor) + (kernel_size - 1)
+            pad0 = (p + 1) // 2
+            pad1 = p // 2
+            layers.append(Blur(blur_kernel, pad=(pad0, pad1)))
+            stride = 2
+            self.padding = 0
+        else:
+            stride = 1
+            self.padding = kernel_size // 2
+        layers.append(
+            EqualConv2d(
+                in_channel,
+                out_channel,
+                kernel_size,
+                padding=self.padding,
+                stride=stride,
+                bias=bias and not activate,
+            )
+        )
+        if activate:
+            layers.append(FusedLeakyReLU(out_channel, bias=bias))
+        super().__init__(*layers)
+class Blur(nn.Module):
+    def __init__(self, kernel, pad, upsample_factor=1):
+        super().__init__()
+        kernel = make_kernel(kernel)
+        if upsample_factor > 1:
+            kernel = kernel * (upsample_factor ** 2)
+        self.register_buffer("kernel", kernel)
+        self.pad = pad
+    def forward(self, input):
+        out = upfirdn2d(input, self.kernel, pad=self.pad)
+        return out
+def make_kernel(k):
+    k = torch.tensor(k, dtype=torch.float32)
+    if k.ndim == 1:
+        k = k[None, :] * k[:, None]
+    k /= k.sum()
+    return k
+def accumulate(model1, model2, decay=0.999):
+    par1 = dict(model1.named_parameters())
+    par2 = dict(model2.named_parameters())
+    for k in par1.keys():
+        par1[k].data.mul_(decay).add_(par2[k].data, alpha=1 - decay)

NTED/base_module.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import math
+import functools
+import sys
+import torch
+import torch.nn as nn
+from NTED.base_function import EncoderLayer, DecoderLayer, ToRGB
+from NTED.edge_attention_layer import Edge_Attn
+class Encoder(nn.Module):
+    def __init__(
+        self,
+        size,
+        input_dim,
+        channels,
+        num_labels=None,
+        match_kernels=None,
+        blur_kernel=[1, 3, 3, 1],
+        ):
+        super().__init__()
+        self.first = EncoderLayer(input_dim, channels[size], 1)
+        self.convs = nn.ModuleList()
+        log_size = int(math.log(size, 2))
+        self.log_size = log_size
+        in_channel = channels[size]
+        for i in range(log_size-1, 3, -1):
+            out_channel = channels[2 ** i]
+            num_label = num_labels[2 ** i] if num_labels is not None else None
+            match_kernel = match_kernels[2 ** i] if match_kernels is not None else None
+            use_extraction = num_label and match_kernel
+            conv = EncoderLayer(
+                in_channel,
+                out_channel,
+                kernel_size=3,
+                downsample=True,
+                blur_kernel=blur_kernel,
+                use_extraction=use_extraction,
+                num_label=num_label,
+                match_kernel=match_kernel
+                )
+            self.convs.append(conv)
+            in_channel = out_channel
+    def forward(self, input, recoder=None):
+        out = self.first(input)
+        for idx, layer in enumerate(self.convs):
+            out = layer(out, recoder)
+        return out
+class Decoder(nn.Module):
+    def __init__(
+        self,
+        size,
+        channels,
+        num_labels,
+        match_kernels,
+        blur_kernel=[1, 3, 3, 1],
+    ):
+        super().__init__()
+        self.convs = nn.ModuleList()
+        # input at resolution 16*16
+        in_channel = channels[16]
+        self.log_size = int(math.log(size, 2))
+        for i in range(4, self.log_size + 1):
+            out_channel = channels[2 ** i]
+            num_label, match_kernel = num_labels[2 ** i], match_kernels[2 ** i]
+            use_distribution = num_label and match_kernel
+            upsample = (i != 4)
+            base_layer = functools.partial(
+                DecoderLayer,
+                out_channel=out_channel,
+                kernel_size=3,
+                blur_kernel=blur_kernel,
+                use_distribution=use_distribution,
+                num_label=num_label,
+                match_kernel=match_kernel
+                )
+            up = nn.Module()
+            up.conv0 = base_layer(in_channel=in_channel, upsample=upsample)
+            up.conv1 = base_layer(in_channel=out_channel, upsample=False)
+            up.to_rgb = ToRGB(out_channel, upsample=upsample)
+            self.convs.append(up)
+            in_channel = out_channel
+        self.num_labels, self.match_kernels = num_labels, match_kernels
+        self.edge_attn_block = Edge_Attn(in_channels=3)
+    def forward(self, input, neural_textures, recoder):
+        counter = 0
+        out, skip = input, None
+        for i, up in enumerate(self.convs):
+            if self.num_labels[2**(i+4)] and self.match_kernels[2**(i+4)]:
+                neural_texture_conv0 = neural_textures[counter]
+                neural_texture_conv1 = neural_textures[counter+1]
+                counter += 2
+            else:
+                neural_texture_conv0, neural_texture_conv1 = None, None
+            out = up.conv0(out, neural_texture=neural_texture_conv0, recoder=recoder)
+            out = up.conv1(out, neural_texture=neural_texture_conv1, recoder=recoder)
+            skip = up.to_rgb(out, skip)
+        image = self.edge_attn_block(skip)
+        # image = skip
+        return image

NTED/config.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import collections
+import functools
+import os
+import re
+import yaml
+class AttrDict(dict):
+    """Dict as attribute trick."""
+    def __init__(self, *args, **kwargs):
+        super(AttrDict, self).__init__(*args, **kwargs)
+        self.__dict__ = self
+        for key, value in self.__dict__.items():
+            if isinstance(value, dict):
+                self.__dict__[key] = AttrDict(value)
+            elif isinstance(value, (list, tuple)):
+                if isinstance(value[0], dict):
+                    self.__dict__[key] = [AttrDict(item) for item in value]
+                else:
+                    self.__dict__[key] = value
+    def yaml(self):
+        """Convert object to yaml dict and return."""
+        yaml_dict = {}
+        for key, value in self.__dict__.items():
+            if isinstance(value, AttrDict):
+                yaml_dict[key] = value.yaml()
+            elif isinstance(value, list):
+                if isinstance(value[0], AttrDict):
+                    new_l = []
+                    for item in value:
+                        new_l.append(item.yaml())
+                    yaml_dict[key] = new_l
+                else:
+                    yaml_dict[key] = value
+            else:
+                yaml_dict[key] = value
+        return yaml_dict
+    def __repr__(self):
+        """Print all variables."""
+        ret_str = []
+        for key, value in self.__dict__.items():
+            if isinstance(value, AttrDict):
+                ret_str.append('{}:'.format(key))
+                child_ret_str = value.__repr__().split('\n')
+                for item in child_ret_str:
+                    ret_str.append('    ' + item)
+            elif isinstance(value, list):
+                if isinstance(value[0], AttrDict):
+                    ret_str.append('{}:'.format(key))
+                    for item in value:
+                        # Treat as AttrDict above.
+                        child_ret_str = item.__repr__().split('\n')
+                        for item in child_ret_str:
+                            ret_str.append('    ' + item)
+                else:
+                    ret_str.append('{}: {}'.format(key, value))
+            else:
+                ret_str.append('{}: {}'.format(key, value))
+        return '\n'.join(ret_str)
+class Config(AttrDict):
+    r"""Configuration class. This should include every human specifiable
+    hyperparameter values for your training."""
+    def __init__(self, filename=None, verbose=False, is_train=True):
+        super(Config, self).__init__()
+        # Set default parameters.
+        # Logging.
+        large_number = 1000000000
+        self.snapshot_save_iter = large_number
+        self.snapshot_save_epoch = large_number
+        self.snapshot_save_start_iter = 0
+        self.snapshot_save_start_epoch = 0
+        self.image_save_iter = large_number
+        self.eval_epoch = large_number
+        self.start_eval_epoch = large_number
+        self.eval_epoch = large_number
+        self.max_epoch = large_number
+        self.max_iter = large_number
+        self.logging_iter = 100
+        self.image_to_tensorboard=False
+        self.which_iter = None
+        self.resume = True
+        self.checkpoints_dir = 'NTED'
+        self.name = 'nted_checkpoint.pt'
+        self.phase = 'train' if is_train else 'test'
+        # Networks.
+        self.gen = AttrDict(type='generators.dummy')
+        self.dis = AttrDict(type='discriminators.dummy')
+        # Optimizers.
+        self.gen_optimizer = AttrDict(type='adam',
+                                    lr=0.0001,
+                                    adam_beta1=0.0,
+                                    adam_beta2=0.999,
+                                    eps=1e-8,
+                                    lr_policy=AttrDict(iteration_mode=False,
+                                                    type='step',
+                                                    step_size=large_number,
+                                                    gamma=1))
+        self.dis_optimizer = AttrDict(type='adam',
+                                lr=0.0001,
+                                adam_beta1=0.0,
+                                adam_beta2=0.999,
+                                eps=1e-8,
+                                lr_policy=AttrDict(iteration_mode=False,
+                                                   type='step',
+                                                   step_size=large_number,
+                                                   gamma=1))
+        # Data.
+        self.data = AttrDict(name='dummy',
+                             type='datasets.images',
+                             num_workers=0)
+        self.test_data = AttrDict(name='dummy',
+                                  type='datasets.images',
+                                  num_workers=0,
+                                  test=AttrDict(is_lmdb=False,
+                                                roots='',
+                                                batch_size=1))
+        self.trainer = AttrDict(
+            image_to_tensorboard=False,
+            hparam_to_tensorboard=False)
+        # Cudnn.
+        self.cudnn = AttrDict(deterministic=False,
+                              benchmark=True)
+        # Others.
+        self.pretrained_weight = ''
+        self.inference_args = AttrDict()
+        # Update with given configurations.
+        assert os.path.exists(filename), 'File {} not exist.'.format(filename)
+        loader = yaml.SafeLoader
+        loader.add_implicit_resolver(
+            u'tag:yaml.org,2002:float',
+            re.compile(u'''^(?:
+             [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
+            |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
+            |\\.[0-9_]+(?:[eE][-+][0-9]+)?
+            |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
+            |[-+]?\\.(?:inf|Inf|INF)
+            |\\.(?:nan|NaN|NAN))$''', re.X),
+            list(u'-+0123456789.'))
+        try:
+            with open(filename, 'r') as f:
+                cfg_dict = yaml.load(f, Loader=loader)
+        except EnvironmentError:
+            print('Please check the file with name of "%s"', filename)
+        recursive_update(self, cfg_dict)
+        # Put common opts in both gen and dis.
+        if 'common' in cfg_dict:
+            self.common = AttrDict(**cfg_dict['common'])
+            self.gen.common = self.common
+            self.dis.common = self.common
+        if verbose:
+            print(' config '.center(80, '-'))
+            print(self.__repr__())
+            print(''.center(80, '-'))
+def rsetattr(obj, attr, val):
+    """Recursively find object and set value"""
+    pre, _, post = attr.rpartition('.')
+    return setattr(rgetattr(obj, pre) if pre else obj, post, val)
+def rgetattr(obj, attr, *args):
+    """Recursively find object and return value"""
+    def _getattr(obj, attr):
+        r"""Get attribute."""
+        return getattr(obj, attr, *args)
+    return functools.reduce(_getattr, [obj] + attr.split('.'))
+def recursive_update(d, u):
+    """Recursively update AttrDict d with AttrDict u"""
+    for key, value in u.items():
+        if isinstance(value, collections.abc.Mapping):
+            d.__dict__[key] = recursive_update(d.get(key, AttrDict({})), value)
+        elif isinstance(value, (list, tuple)):
+            if isinstance(value[0], dict):
+                d.__dict__[key] = [AttrDict(item) for item in value]
+            else:
+                d.__dict__[key] = value
+        else:
+            d.__dict__[key] = value
+    return d

NTED/demo_dataset.py ADDED Viewed

	@@ -0,0 +1,182 @@

+import os
+import cv2
+import math
+import numpy as np
+from PIL import Image
+import torch
+import torchvision.transforms.functional as F
+class DemoDataset(object):
+    def __init__(self):
+        super().__init__()
+        self.LIMBSEQ = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
+                [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
+                [1, 16], [16, 18], [3, 17], [6, 18]]
+        self.COLORS = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
+                [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
+                [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
+        self.LIMBSEQ_hands = [[0, 1], [1, 2], [2, 3], [3, 4], \
+            [0, 5], [5, 6], [6, 7], [7, 8], \
+            [0, 9], [9, 10], [10, 11], [11, 12], \
+            [0, 13], [13, 14], [14, 15], [15, 16], \
+            [0, 17], [17, 18], [18, 19], [19, 20], \
+            [21, 22], [22, 23], [23, 24], [24, 25], \
+            [21, 26], [26, 27], [27, 28], [28, 29], \
+            [21, 30], [30, 31], [31, 32], [32, 33], \
+            [21, 34], [34, 35], [35, 36], [36, 37], \
+            [21, 38], [38, 39], [39, 40], [40, 41]]
+        self.COLORS_hands = [[85, 0, 0], [170, 0, 0], [85, 85, 0], [85, 170, 0], [170, 85, 0], [170, 170, 0], [85, 85, 85], \
+            [85, 85, 170], [85, 170, 85], [85, 170, 170], [0, 85, 0], [0, 170, 0], [0, 85, 85], [0, 85, 170], \
+            [0, 170, 85], [0, 170, 170], [50, 0, 0], [135, 0, 0], [50, 50, 0], [50, 135, 0], [135, 50, 0], \
+            [135, 135, 0], [50, 50, 50], [50, 50, 135], [50, 135, 50], [50, 135, 135], [0, 50, 0], [0, 135, 0], \
+            [0, 50, 50], [0, 50, 135], [0, 135, 50], [0, 135, 135], [100, 0, 0], [200, 0, 0], [100, 100, 0], \
+            [100, 200, 0], [200, 100, 0], [200, 200, 0], [100, 100, 100], [100, 100, 200], [100, 200, 100], [100, 200, 200]
+            ]
+        self.img_size = tuple([512, 352])
+    def load_item(self, img, pose, handpose=None):
+        reference_img = self.get_image_tensor(img)[None,:]
+        label, ske = self.get_label_tensor(pose, handpose)
+        label = label[None,:]
+        return {'reference_image':reference_img, 'target_skeleton':label, 'skeleton_img': ske}
+    def get_image_tensor(self, bgr_img):
+        img = Image.fromarray(cv2.cvtColor(bgr_img, cv2.COLOR_BGR2RGB))
+        img = F.resize(img, self.img_size)
+        img = F.to_tensor(img)
+        img = F.normalize(img, (0.5, 0.5, 0.5),(0.5, 0.5, 0.5))
+        return img
+    def get_label_tensor(self, pose, hand_pose=None):
+        canvas = np.zeros((self.img_size[0], self.img_size[1], 3)).astype(np.uint8)
+        keypoint = np.array(pose)
+        if hand_pose is not None:
+            keypoint_hands = np.array(hand_pose)
+        else:
+            keypoint_hands = None
+        # keypoint = self.trans_keypoins(keypoint)
+        stickwidth = 4
+        for i in range(18):
+            x, y = keypoint[i, 0:2]
+            if x == -1 or y == -1:
+                continue
+            cv2.circle(canvas, (int(x), int(y)), 4, self.COLORS[i], thickness=-1)
+        if keypoint_hands is not None:
+            for i in range(42):
+                    x, y = keypoint_hands[i, 0:2]
+                    if x == -1 or y == -1:
+                        continue
+                    cv2.circle(canvas, (int(x), int(y)), 4, self.COLORS_hands[i], thickness=-1)
+        joints = []
+        for i in range(17):
+            Y = keypoint[np.array(self.LIMBSEQ[i])-1, 0]
+            X = keypoint[np.array(self.LIMBSEQ[i])-1, 1]
+            cur_canvas = canvas.copy()
+            if -1 in Y or -1 in X:
+                joints.append(np.zeros_like(cur_canvas[:, :, 0]))
+                continue
+            mX = np.mean(X)
+            mY = np.mean(Y)
+            length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+            angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+            polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
+            cv2.fillConvexPoly(cur_canvas, polygon, self.COLORS[i])
+            canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
+            joint = np.zeros_like(cur_canvas[:, :, 0])
+            cv2.fillConvexPoly(joint, polygon, 255)
+            joint = cv2.addWeighted(joint, 0.4, joint, 0.6, 0)
+            joints.append(joint)
+        if keypoint_hands is not None:
+            for i in range(40):
+                    Y = keypoint_hands[np.array(self.LIMBSEQ_hands[i]), 0]
+                    X = keypoint_hands[np.array(self.LIMBSEQ_hands[i]), 1]
+                    cur_canvas = canvas.copy()
+                    if -1 in Y or -1 in X:
+                        if (i+1) % 4 == 0:
+                            joints.append(np.zeros_like(cur_canvas[:, :, 0]))
+                        continue
+                    mX = np.mean(X)
+                    mY = np.mean(Y)
+                    length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+                    angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+                    polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), int(stickwidth/2)), int(angle), 0, 360, 1)
+                    cv2.fillConvexPoly(cur_canvas, polygon, self.COLORS_hands[i])
+                    canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
+                    # 一根手指一个通道
+                    if i % 4 == 0:
+                        joint = np.zeros_like(cur_canvas[:, :, 0])
+                    cv2.fillConvexPoly(joint, polygon, 255)
+                    joint = cv2.addWeighted(joint, 0.4, joint, 0.6, 0)
+                    if (i+1) % 4 == 0:
+                        joints.append(joint)
+        pose = F.to_tensor(Image.fromarray(cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB)))
+        tensors_dist = 0
+        e = 1
+        for i in range(len(joints)):
+            im_dist = cv2.distanceTransform(255-joints[i], cv2.DIST_L1, 3)
+            im_dist = np.clip((im_dist / 3), 0, 255).astype(np.uint8)
+            tensor_dist = F.to_tensor(Image.fromarray(im_dist))
+            tensors_dist = tensor_dist if e == 1 else torch.cat([tensors_dist, tensor_dist])
+            e += 1
+        label_tensor = torch.cat((pose, tensors_dist), dim=0)
+        return label_tensor, canvas
+    def tensor2im(self, image_tensor, imtype=np.uint8, normalize=True,
+              three_channel_output=True):
+        r"""Convert tensor to image.
+        Args:
+            image_tensor (torch.tensor or list of torch.tensor): If tensor then
+                (NxCxHxW) or (NxTxCxHxW) or (CxHxW).
+            imtype (np.dtype): Type of output image.
+            normalize (bool): Is the input image normalized or not?
+                three_channel_output (bool): Should single channel images be made 3
+                channel in output?
+        Returns:
+            (numpy.ndarray, list if case 1, 2 above).
+        """
+        if image_tensor is None:
+            return None
+        if isinstance(image_tensor, list):
+            return [self.tensor2im(x, imtype, normalize) for x in image_tensor]
+        if image_tensor.dim() == 5 or image_tensor.dim() == 4:
+            return [self.tensor2im(image_tensor[idx], imtype, normalize)
+                    for idx in range(image_tensor.size(0))]
+        if image_tensor.dim() == 3:
+            image_numpy = image_tensor.detach().float().numpy()
+            if normalize:
+                image_numpy = (np.transpose(
+                    image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
+            else:
+                image_numpy = np.transpose(image_numpy, (1, 2, 0)) * 255.0
+            image_numpy = np.clip(image_numpy, 0, 255)
+            if image_numpy.shape[2] == 1 and three_channel_output:
+                image_numpy = np.repeat(image_numpy, 3, axis=2)
+            elif image_numpy.shape[2] > 3:
+                image_numpy = image_numpy[:, :, :3]
+            return image_numpy.astype(imtype)
+    def trans_keypoins(self, keypoints):
+        missing_keypoint_index = keypoints == -1
+        keypoints[missing_keypoint_index] = -1
+        return keypoints

NTED/edge_attention_layer.py ADDED Viewed

	@@ -0,0 +1,116 @@

+# Date: 2023-03-14
+# Creater: zejunyang
+# Function: 边缘注意力层。
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from NTED.base_function import Blur
+class ResBlock(nn.Module):
+    def __init__(self, in_nc, out_nc, scale='down'): # , norm_layer=nn.BatchNorm2d
+        super(ResBlock, self).__init__()
+        use_bias = True
+        assert scale in ['up', 'down', 'same'], "ResBlock scale must be in 'up' 'down' 'same'"
+        if scale == 'same':
+            # self.scale = nn.Conv2d(in_nc, out_nc, kernel_size=1, bias=True)
+            self.scale = nn.Conv2d(in_nc, out_nc, kernel_size=3, stride=1, padding=1, bias=True)
+        if scale == 'up':
+            self.scale = nn.Sequential(
+                nn.Upsample(scale_factor=2, mode='bilinear'),
+                nn.Conv2d(in_nc, out_nc, kernel_size=1,bias=True)
+            )
+        if scale == 'down':
+            self.scale = nn.Conv2d(in_nc, out_nc, kernel_size=3, stride=2, padding=1, bias=use_bias)
+        self.block = nn.Sequential(
+            nn.Conv2d(out_nc, out_nc, kernel_size=3, stride=1, padding=1, bias=use_bias),
+            # norm_layer(out_nc),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_nc, out_nc, kernel_size=3, stride=1, padding=1, bias=use_bias),
+            # norm_layer(out_nc)
+        )
+        self.relu = nn.ReLU(inplace=True)
+        # self.padding = nn.ReplicationPad2d(padding=(0, 1, 0, 0))
+    def forward(self, x):
+        residual = self.scale(x)
+        return self.relu(residual + self.block(residual))
+class Edge_Attn(nn.Module):
+    def __init__(self, in_channels=3):
+        super(Edge_Attn, self).__init__()
+        self.in_channels = in_channels
+        blur_kernel=[1, 3, 3, 3, 1]
+        self.blur = Blur(blur_kernel, pad=(2, 2), upsample_factor=1)
+        # self.conv = nn.Conv2d(self.in_channels, self.in_channels, 3, padding=1, bias=False)
+        self.res_block = ResBlock(self.in_channels, self.in_channels, scale='same')
+        self.sigmoid = nn.Sigmoid()
+    def gradient(self, x):
+        h_x = x.size()[2]
+        w_x = x.size()[3]
+        stride = 3
+        r = F.pad(x, (0, stride, 0, 0), mode='replicate')[:, :, :, stride:]
+        l = F.pad(x, (stride, 0, 0, 0), mode='replicate')[:, :, :, :w_x]
+        t = F.pad(x, (0, 0, stride, 0), mode='replicate')[:, :, :h_x, :]
+        b = F.pad(x, (0, 0, 0, stride), mode='replicate')[:, :, stride:, :]
+        xgrad = torch.pow(torch.pow((r - l) * 0.5, 2) + torch.pow((t - b) * 0.5, 2), 0.5)
+        xgrad = self.blur(xgrad)
+        return xgrad
+    def forward(self, x):
+        # feature_edge = self.gradient(x).detach()
+        # attn = self.conv(feature_edge)
+        for b in range(x.shape[0]):
+            for c in range(x.shape[1]):
+                if c == 0:
+                    channel_edge = self.gradient(x[b:b+1, c:c+1])
+                else:
+                    channel_edge = torch.concat([channel_edge, self.gradient(x[b:b+1, c:c+1])], dim=1)
+            if b == 0:
+                feature_edge = channel_edge
+            else:
+                feature_edge = torch.concat([feature_edge, channel_edge], dim=0)
+        feature_edge = feature_edge.detach()
+        feature_edge = x * feature_edge
+        attn = self.res_block(feature_edge)
+        attn = self.sigmoid(attn)
+        # out = x * attn
+        out = x * attn + x
+        return out
+if __name__ == '__main__':
+    from PIL import Image
+    import numpy as np
+    import cv2
+    edg_atten = Edge_Attn()
+    im = Image.open('/apdcephfs/share_1474453/zejunzhang/dataset/pose_trans_dataset/fake_images/001400.png')
+    npim = np.array(im,dtype=np.float32)
+    npim = cv2.cvtColor(npim, cv2.COLOR_RGB2GRAY)
+    # npim = npim[:, :, 2]
+    tim = torch.from_numpy(npim).unsqueeze_(0).unsqueeze_(0)
+    edge = edg_atten.gradient(tim)
+    npgrad = edge.squeeze(0).squeeze(0).data.clamp(0,255).numpy()
+    Image.fromarray(npgrad.astype('uint8')).save('tmp.png')
+    # tim = torch.from_numpy(npim).unsqueeze_(0)
+    # edge = edg_atten.gradient_1order(tim)
+    # npgrad = edge.squeeze(0).data.clamp(0,255).numpy()[:, :, 0]
+    # Image.fromarray(npgrad.astype('uint8')).save('tmp.png')

NTED/extraction_distribution_model.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import collections
+from torch import nn
+from NTED.base_module import Encoder, Decoder
+from torch.cuda.amp import autocast as autocast
+class Generator(nn.Module):
+    def __init__(
+        self,
+        size,
+        semantic_dim,
+        channels,
+        num_labels,
+        match_kernels,
+        blur_kernel=[1, 3, 3, 1],
+    ):
+        super().__init__()
+        self.size = size
+        self.reference_encoder = Encoder(
+            size, 3, channels, num_labels, match_kernels, blur_kernel
+        )
+        self.skeleton_encoder = Encoder(
+            size, semantic_dim, channels,
+            )
+        self.target_image_renderer = Decoder(
+            size, channels, num_labels, match_kernels, blur_kernel
+        )
+    def _cal_temp(self, module):
+        return sum(p.numel() for p in module.parameters() if p.requires_grad)
+    def forward(
+        self,
+        source_image,
+        skeleton,
+        amp_flag=False,
+    ):
+        if amp_flag:
+            with autocast():
+                output_dict={}
+                recoder = collections.defaultdict(list)
+                skeleton_feature = self.skeleton_encoder(skeleton)
+                _ = self.reference_encoder(source_image, recoder)
+                neural_textures = recoder["neural_textures"]
+                output_dict['fake_image'] = self.target_image_renderer(
+                    skeleton_feature, neural_textures, recoder
+                    )
+                output_dict['info'] = recoder
+                return output_dict
+        else:
+            output_dict={}
+            recoder = collections.defaultdict(list)
+            skeleton_feature = self.skeleton_encoder(skeleton)
+            _ = self.reference_encoder(source_image, recoder)
+            neural_textures = recoder["neural_textures"]
+            output_dict['fake_image'] = self.target_image_renderer(
+                skeleton_feature, neural_textures, recoder
+                )
+            output_dict['info'] = recoder
+            return output_dict

NTED/fashion_512.yaml ADDED Viewed

	@@ -0,0 +1,129 @@

+distributed: True
+image_to_tensorboard: True
+snapshot_save_iter: 50000
+snapshot_save_epoch: 20
+snapshot_save_start_iter: 20000
+snapshot_save_start_epoch: 100
+image_save_iter: 1000
+max_epoch: 400
+logging_iter: 100
+amp: False
+gen_optimizer:
+    type: adam
+    lr: 0.002
+    adam_beta1: 0.
+    adam_beta2: 0.99
+    lr_policy:
+        iteration_mode: False
+        type: step
+        step_size: 1000000
+        gamma: 1
+dis_optimizer:
+    type: adam
+    lr: 0.001882
+    adam_beta1: 0.
+    adam_beta2: 0.9905
+    lr_policy:
+        iteration_mode: False
+        type: step
+        step_size: 1000000
+        gamma: 1
+trainer:
+    type: NTED.extraction_distribution_trainer::Trainer
+    gan_mode: style_gan2
+    gan_start_iteration: 1000  # 0
+    face_crop_method: util.face_crop::crop_face_from_output
+    hand_crop_method: util.face_crop::crop_hands_from_output
+    d_reg_every: 16
+    r1: 10
+    loss_weight:
+      weight_perceptual: 1
+      weight_gan: 1.5
+      weight_attn_rec: 15
+      weight_face: 1
+      weight_hand: 1
+      weight_l1: 1
+      weight_l1_hand: 0.8
+      weight_edge: 100
+    attn_weights:
+      8: 1
+      16: 1
+      32: 1
+      64: 1
+      128: 1
+      256: 1
+    vgg_param:
+      network: vgg19
+      layers: ['relu_1_1', 'relu_2_1', 'relu_3_1', 'relu_4_1', 'relu_5_1']
+      num_scales: 3
+      use_style_loss: True
+      style_to_perceptual: 1000
+    vgg_hand_param:
+      network: vgg19
+      layers: ['relu_1_1', 'relu_2_1', 'relu_3_1','relu_3_3', 'relu_4_1', 'relu_4_3', 'relu_5_1']
+gen:
+    type: NTED.extraction_distribution_model::Generator
+    param:
+      size: 512
+      semantic_dim: 30
+      channels:
+        16: 512
+        32: 512
+        64: 512
+        128: 256
+        256: 128
+        512: 64
+        1024: 32
+      num_labels:
+        16: 16
+        32: 32
+        64: 32
+        128: 64
+        256: 64
+        512: False
+      match_kernels:
+        16: 1
+        32: 3
+        64: 3
+        128: 3
+        256: 3
+        512: False
+dis:
+    type: generators.discriminator::Discriminator
+    param:
+      size: 512
+      channels:
+        4: 512
+        8: 512
+        16: 512
+        32: 512
+        64: 512
+        128: 256
+        256: 128
+        512: 64
+      is_square_image: False
+data:
+    type: data.fashion_data::Dataset
+    preprocess_mode: resize_and_crop  # resize_and_crop
+    path: /apdcephfs/share_1474453/zejunzhang/dataset/pose_trans_dataset_2d
+    num_workers: 16
+    sub_path: 512-352
+    resolution: 512
+    scale_param: 0.1
+    train:
+      batch_size: 4   # real_batch_size: 2 * 2 (source-->target & target --> source) * 4 (GPUs) = 16
+      distributed: True
+    val:
+      batch_size: 4
+      distributed: True
+    hand_keypoint: True

NTED/nted_checkpoint.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:359d3d3bac365afe04aa8b906f1dc8891f0dd87ff1dfe5e60059b4fb9bb96af8
+size 284375285

NTED/op/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .fused_act import FusedLeakyReLU, fused_leaky_relu
2	+ from .upfirdn2d import upfirdn2d

NTED/op/conv2d_gradfix.py ADDED Viewed

	@@ -0,0 +1,227 @@

+import contextlib
+import warnings
+import torch
+from torch import autograd
+from torch.nn import functional as F
+enabled = True
+weight_gradients_disabled = False
+@contextlib.contextmanager
+def no_weight_gradients():
+    global weight_gradients_disabled
+    old = weight_gradients_disabled
+    weight_gradients_disabled = True
+    yield
+    weight_gradients_disabled = old
+def conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
+    if could_use_op(input):
+        return conv2d_gradfix(
+            transpose=False,
+            weight_shape=weight.shape,
+            stride=stride,
+            padding=padding,
+            output_padding=0,
+            dilation=dilation,
+            groups=groups,
+        ).apply(input, weight, bias)
+    return F.conv2d(
+        input=input,
+        weight=weight,
+        bias=bias,
+        stride=stride,
+        padding=padding,
+        dilation=dilation,
+        groups=groups,
+    )
+def conv_transpose2d(
+    input,
+    weight,
+    bias=None,
+    stride=1,
+    padding=0,
+    output_padding=0,
+    groups=1,
+    dilation=1,
+):
+    if could_use_op(input):
+        return conv2d_gradfix(
+            transpose=True,
+            weight_shape=weight.shape,
+            stride=stride,
+            padding=padding,
+            output_padding=output_padding,
+            groups=groups,
+            dilation=dilation,
+        ).apply(input, weight, bias)
+    return F.conv_transpose2d(
+        input=input,
+        weight=weight,
+        bias=bias,
+        stride=stride,
+        padding=padding,
+        output_padding=output_padding,
+        dilation=dilation,
+        groups=groups,
+    )
+def could_use_op(input):
+    if (not enabled) or (not torch.backends.cudnn.enabled):
+        return False
+    if input.device.type != "cuda":
+        return False
+    if any(torch.__version__.startswith(x) for x in ["1.7.", "1.8."]):
+        return True
+    warnings.warn(
+        f"conv2d_gradfix not supported on PyTorch {torch.__version__}. Falling back to torch.nn.functional.conv2d()."
+    )
+    return False
+def ensure_tuple(xs, ndim):
+    xs = tuple(xs) if isinstance(xs, (tuple, list)) else (xs,) * ndim
+    return xs
+conv2d_gradfix_cache = dict()
+def conv2d_gradfix(
+    transpose, weight_shape, stride, padding, output_padding, dilation, groups
+):
+    ndim = 2
+    weight_shape = tuple(weight_shape)
+    stride = ensure_tuple(stride, ndim)
+    padding = ensure_tuple(padding, ndim)
+    output_padding = ensure_tuple(output_padding, ndim)
+    dilation = ensure_tuple(dilation, ndim)
+    key = (transpose, weight_shape, stride, padding, output_padding, dilation, groups)
+    if key in conv2d_gradfix_cache:
+        return conv2d_gradfix_cache[key]
+    common_kwargs = dict(
+        stride=stride, padding=padding, dilation=dilation, groups=groups
+    )
+    def calc_output_padding(input_shape, output_shape):
+        if transpose:
+            return [0, 0]
+        return [
+            input_shape[i + 2]
+            - (output_shape[i + 2] - 1) * stride[i]
+            - (1 - 2 * padding[i])
+            - dilation[i] * (weight_shape[i + 2] - 1)
+            for i in range(ndim)
+        ]
+    class Conv2d(autograd.Function):
+        @staticmethod
+        def forward(ctx, input, weight, bias):
+            if not transpose:
+                out = F.conv2d(input=input, weight=weight, bias=bias, **common_kwargs)
+            else:
+                out = F.conv_transpose2d(
+                    input=input,
+                    weight=weight,
+                    bias=bias,
+                    output_padding=output_padding,
+                    **common_kwargs,
+                )
+            ctx.save_for_backward(input, weight)
+            return out
+        @staticmethod
+        def backward(ctx, grad_output):
+            input, weight = ctx.saved_tensors
+            grad_input, grad_weight, grad_bias = None, None, None
+            if ctx.needs_input_grad[0]:
+                p = calc_output_padding(
+                    input_shape=input.shape, output_shape=grad_output.shape
+                )
+                grad_input = conv2d_gradfix(
+                    transpose=(not transpose),
+                    weight_shape=weight_shape,
+                    output_padding=p,
+                    **common_kwargs,
+                ).apply(grad_output, weight, None)
+            if ctx.needs_input_grad[1] and not weight_gradients_disabled:
+                grad_weight = Conv2dGradWeight.apply(grad_output, input)
+            if ctx.needs_input_grad[2]:
+                grad_bias = grad_output.sum((0, 2, 3))
+            return grad_input, grad_weight, grad_bias
+    class Conv2dGradWeight(autograd.Function):
+        @staticmethod
+        def forward(ctx, grad_output, input):
+            op = torch._C._jit_get_operation(
+                "aten::cudnn_convolution_backward_weight"
+                if not transpose
+                else "aten::cudnn_convolution_transpose_backward_weight"
+            )
+            flags = [
+                torch.backends.cudnn.benchmark,
+                torch.backends.cudnn.deterministic,
+                torch.backends.cudnn.allow_tf32,
+            ]
+            grad_weight = op(
+                weight_shape,
+                grad_output,
+                input,
+                padding,
+                stride,
+                dilation,
+                groups,
+                *flags,
+            )
+            ctx.save_for_backward(grad_output, input)
+            return grad_weight
+        @staticmethod
+        def backward(ctx, grad_grad_weight):
+            grad_output, input = ctx.saved_tensors
+            grad_grad_output, grad_grad_input = None, None
+            if ctx.needs_input_grad[0]:
+                grad_grad_output = Conv2d.apply(input, grad_grad_weight, None)
+            if ctx.needs_input_grad[1]:
+                p = calc_output_padding(
+                    input_shape=input.shape, output_shape=grad_output.shape
+                )
+                grad_grad_input = conv2d_gradfix(
+                    transpose=(not transpose),
+                    weight_shape=weight_shape,
+                    output_padding=p,
+                    **common_kwargs,
+                ).apply(grad_output, grad_grad_weight, None)
+            return grad_grad_output, grad_grad_input
+    conv2d_gradfix_cache[key] = Conv2d
+    return Conv2d

NTED/op/fused_act.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import os
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.autograd import Function
+from torch.utils.cpp_extension import load
+module_path = os.path.dirname(__file__)
+fused = load(
+    "fused",
+    sources=[
+        os.path.join(module_path, "fused_bias_act.cpp"),
+        os.path.join(module_path, "fused_bias_act_kernel.cu"),
+    ],
+)
+class FusedLeakyReLUFunctionBackward(Function):
+    @staticmethod
+    def forward(ctx, grad_output, out, bias, negative_slope, scale):
+        ctx.save_for_backward(out)
+        ctx.negative_slope = negative_slope
+        ctx.scale = scale
+        empty = grad_output.new_empty(0)
+        grad_input = fused.fused_bias_act(
+            grad_output.contiguous(), empty, out, 3, 1, negative_slope, scale
+        )
+        dim = [0]
+        if grad_input.ndim > 2:
+            dim += list(range(2, grad_input.ndim))
+        if bias:
+            grad_bias = grad_input.sum(dim).detach()
+        else:
+            grad_bias = empty
+        return grad_input, grad_bias
+    @staticmethod
+    def backward(ctx, gradgrad_input, gradgrad_bias):
+        out, = ctx.saved_tensors
+        gradgrad_out = fused.fused_bias_act(
+            gradgrad_input.contiguous(),
+            gradgrad_bias.to(gradgrad_input.dtype),
+            out,
+            3,
+            1,
+            ctx.negative_slope,
+            ctx.scale,
+        )
+        return gradgrad_out, None, None, None, None
+class FusedLeakyReLUFunction(Function):
+    @staticmethod
+    def forward(ctx, input, bias, negative_slope, scale):
+        empty = input.new_empty(0)
+        ctx.bias = bias is not None
+        if bias is None:
+            bias = empty
+        out = fused.fused_bias_act(input, bias.to(input.dtype), empty, 3, 0, negative_slope, scale)
+        ctx.save_for_backward(out)
+        ctx.negative_slope = negative_slope
+        ctx.scale = scale
+        return out
+    @staticmethod
+    def backward(ctx, grad_output):
+        out, = ctx.saved_tensors
+        grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply(
+            grad_output, out, ctx.bias, ctx.negative_slope, ctx.scale
+        )
+        if not ctx.bias:
+            grad_bias = None
+        return grad_input, grad_bias, None, None
+class FusedLeakyReLU(nn.Module):
+    def __init__(self, channel, bias=True, negative_slope=0.2, scale=2 ** 0.5):
+        super().__init__()
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(channel))
+        else:
+            self.bias = None
+        self.negative_slope = negative_slope
+        self.scale = scale
+    def forward(self, input):
+        return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale)
+def fused_leaky_relu(input, bias=None, negative_slope=0.2, scale=2 ** 0.5):
+    if input.device.type == "cpu":
+        if bias is not None:
+            rest_dim = [1] * (input.ndim - bias.ndim - 1)
+            return (
+                F.leaky_relu(
+                    input + bias.view(1, bias.shape[0], *rest_dim), negative_slope=0.2
+                )
+                * scale
+            )
+        else:
+            return F.leaky_relu(input, negative_slope=0.2) * scale
+    else:
+        return FusedLeakyReLUFunction.apply(
+            input.contiguous(), bias, negative_slope, scale
+        )

NTED/op/fused_bias_act.cpp ADDED Viewed

	@@ -0,0 +1,32 @@

+#include <ATen/ATen.h>
+#include <torch/extension.h>
+torch::Tensor fused_bias_act_op(const torch::Tensor &input,
+                                const torch::Tensor &bias,
+                                const torch::Tensor &refer, int act, int grad,
+                                float alpha, float scale);
+#define CHECK_CUDA(x)                                                          \
+  TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CONTIGUOUS(x)                                                    \
+  TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
+#define CHECK_INPUT(x)                                                         \
+  CHECK_CUDA(x);                                                               \
+  CHECK_CONTIGUOUS(x)
+torch::Tensor fused_bias_act(const torch::Tensor &input,
+                             const torch::Tensor &bias,
+                             const torch::Tensor &refer, int act, int grad,
+                             float alpha, float scale) {
+  CHECK_INPUT(input);
+  CHECK_INPUT(bias);
+  at::DeviceGuard guard(input.device());
+  return fused_bias_act_op(input, bias, refer, act, grad, alpha, scale);
+}
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("fused_bias_act", &fused_bias_act, "fused bias act (CUDA)");
+}

NTED/op/fused_bias_act_kernel.cu ADDED Viewed

	@@ -0,0 +1,105 @@

+// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+//
+// This work is made available under the Nvidia Source Code License-NC.
+// To view a copy of this license, visit
+// https://nvlabs.github.io/stylegan2/license.html
+#include <torch/types.h>
+#include <ATen/ATen.h>
+#include <ATen/AccumulateType.h>
+#include <ATen/cuda/CUDAApplyUtils.cuh>
+#include <ATen/cuda/CUDAContext.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+template <typename scalar_t>
+static __global__ void
+fused_bias_act_kernel(scalar_t *out, const scalar_t *p_x, const scalar_t *p_b,
+                      const scalar_t *p_ref, int act, int grad, scalar_t alpha,
+                      scalar_t scale, int loop_x, int size_x, int step_b,
+                      int size_b, int use_bias, int use_ref) {
+  int xi = blockIdx.x * loop_x * blockDim.x + threadIdx.x;
+  scalar_t zero = 0.0;
+  for (int loop_idx = 0; loop_idx < loop_x && xi < size_x;
+       loop_idx++, xi += blockDim.x) {
+    scalar_t x = p_x[xi];
+    if (use_bias) {
+      x += p_b[(xi / step_b) % size_b];
+    }
+    scalar_t ref = use_ref ? p_ref[xi] : zero;
+    scalar_t y;
+    switch (act * 10 + grad) {
+    default:
+    case 10:
+      y = x;
+      break;
+    case 11:
+      y = x;
+      break;
+    case 12:
+      y = 0.0;
+      break;
+    case 30:
+      y = (x > 0.0) ? x : x * alpha;
+      break;
+    case 31:
+      y = (ref > 0.0) ? x : x * alpha;
+      break;
+    case 32:
+      y = 0.0;
+      break;
+    }
+    out[xi] = y * scale;
+  }
+}
+torch::Tensor fused_bias_act_op(const torch::Tensor &input,
+                                const torch::Tensor &bias,
+                                const torch::Tensor &refer, int act, int grad,
+                                float alpha, float scale) {
+  int curDevice = -1;
+  cudaGetDevice(&curDevice);
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  auto x = input.contiguous();
+  auto b = bias.contiguous();
+  auto ref = refer.contiguous();
+  int use_bias = b.numel() ? 1 : 0;
+  int use_ref = ref.numel() ? 1 : 0;
+  int size_x = x.numel();
+  int size_b = b.numel();
+  int step_b = 1;
+  for (int i = 1 + 1; i < x.dim(); i++) {
+    step_b *= x.size(i);
+  }
+  int loop_x = 4;
+  int block_size = 4 * 32;
+  int grid_size = (size_x - 1) / (loop_x * block_size) + 1;
+  auto y = torch::empty_like(x);
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      x.scalar_type(), "fused_bias_act_kernel", [&] {
+        fused_bias_act_kernel<scalar_t><<<grid_size, block_size, 0, stream>>>(
+            y.data_ptr<scalar_t>(), x.data_ptr<scalar_t>(),
+            b.data_ptr<scalar_t>(), ref.data_ptr<scalar_t>(), act, grad, alpha,
+            scale, loop_x, size_x, step_b, size_b, use_bias, use_ref);
+      });
+  return y;
+}

NTED/op/upfirdn2d.cpp ADDED Viewed

	@@ -0,0 +1,31 @@

+#include <ATen/ATen.h>
+#include <torch/extension.h>
+torch::Tensor upfirdn2d_op(const torch::Tensor &input,
+                           const torch::Tensor &kernel, int up_x, int up_y,
+                           int down_x, int down_y, int pad_x0, int pad_x1,
+                           int pad_y0, int pad_y1);
+#define CHECK_CUDA(x)                                                          \
+  TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CONTIGUOUS(x)                                                    \
+  TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
+#define CHECK_INPUT(x)                                                         \
+  CHECK_CUDA(x);                                                               \
+  CHECK_CONTIGUOUS(x)
+torch::Tensor upfirdn2d(const torch::Tensor &input, const torch::Tensor &kernel,
+                        int up_x, int up_y, int down_x, int down_y, int pad_x0,
+                        int pad_x1, int pad_y0, int pad_y1) {
+  CHECK_INPUT(input);
+  CHECK_INPUT(kernel);
+  at::DeviceGuard guard(input.device());
+  return upfirdn2d_op(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1,
+                      pad_y0, pad_y1);
+}
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("upfirdn2d", &upfirdn2d, "upfirdn2d (CUDA)");
+}

NTED/op/upfirdn2d.py ADDED Viewed

	@@ -0,0 +1,209 @@

+from collections import abc
+import os
+import torch
+from torch.nn import functional as F
+from torch.autograd import Function
+from torch.utils.cpp_extension import load
+module_path = os.path.dirname(__file__)
+upfirdn2d_op = load(
+    "upfirdn2d",
+    sources=[
+        os.path.join(module_path, "upfirdn2d.cpp"),
+        os.path.join(module_path, "upfirdn2d_kernel.cu"),
+    ],
+)
+class UpFirDn2dBackward(Function):
+    @staticmethod
+    def forward(
+        ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, in_size, out_size
+    ):
+        up_x, up_y = up
+        down_x, down_y = down
+        g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad
+        grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1)
+        grad_input = upfirdn2d_op.upfirdn2d(
+            grad_output,
+            grad_kernel.to(grad_output.dtype),
+            down_x,
+            down_y,
+            up_x,
+            up_y,
+            g_pad_x0,
+            g_pad_x1,
+            g_pad_y0,
+            g_pad_y1,
+        )
+        grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], in_size[3])
+        ctx.save_for_backward(kernel)
+        pad_x0, pad_x1, pad_y0, pad_y1 = pad
+        ctx.up_x = up_x
+        ctx.up_y = up_y
+        ctx.down_x = down_x
+        ctx.down_y = down_y
+        ctx.pad_x0 = pad_x0
+        ctx.pad_x1 = pad_x1
+        ctx.pad_y0 = pad_y0
+        ctx.pad_y1 = pad_y1
+        ctx.in_size = in_size
+        ctx.out_size = out_size
+        return grad_input
+    @staticmethod
+    def backward(ctx, gradgrad_input):
+        kernel, = ctx.saved_tensors
+        gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], ctx.in_size[3], 1)
+        gradgrad_out = upfirdn2d_op.upfirdn2d(
+            gradgrad_input,
+            kernel.to(gradgrad_input.dtype),
+            ctx.up_x,
+            ctx.up_y,
+            ctx.down_x,
+            ctx.down_y,
+            ctx.pad_x0,
+            ctx.pad_x1,
+            ctx.pad_y0,
+            ctx.pad_y1,
+        )
+        # gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0], ctx.out_size[1], ctx.in_size[3])
+        gradgrad_out = gradgrad_out.view(
+            ctx.in_size[0], ctx.in_size[1], ctx.out_size[0], ctx.out_size[1]
+        )
+        return gradgrad_out, None, None, None, None, None, None, None, None
+class UpFirDn2d(Function):
+    @staticmethod
+    def forward(ctx, input, kernel, up, down, pad):
+        up_x, up_y = up
+        down_x, down_y = down
+        pad_x0, pad_x1, pad_y0, pad_y1 = pad
+        kernel_h, kernel_w = kernel.shape
+        batch, channel, in_h, in_w = input.shape
+        ctx.in_size = input.shape
+        input = input.reshape(-1, in_h, in_w, 1)
+        ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1]))
+        out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h + down_y) // down_y
+        out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w + down_x) // down_x
+        ctx.out_size = (out_h, out_w)
+        ctx.up = (up_x, up_y)
+        ctx.down = (down_x, down_y)
+        ctx.pad = (pad_x0, pad_x1, pad_y0, pad_y1)
+        g_pad_x0 = kernel_w - pad_x0 - 1
+        g_pad_y0 = kernel_h - pad_y0 - 1
+        g_pad_x1 = in_w * up_x - out_w * down_x + pad_x0 - up_x + 1
+        g_pad_y1 = in_h * up_y - out_h * down_y + pad_y0 - up_y + 1
+        ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1)
+        out = upfirdn2d_op.upfirdn2d(
+            input, kernel.to(input.dtype), up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1
+        )
+        # out = out.view(major, out_h, out_w, minor)
+        out = out.view(-1, channel, out_h, out_w)
+        return out
+    @staticmethod
+    def backward(ctx, grad_output):
+        kernel, grad_kernel = ctx.saved_tensors
+        grad_input = None
+        if ctx.needs_input_grad[0]:
+            grad_input = UpFirDn2dBackward.apply(
+                grad_output,
+                kernel,
+                grad_kernel,
+                ctx.up,
+                ctx.down,
+                ctx.pad,
+                ctx.g_pad,
+                ctx.in_size,
+                ctx.out_size,
+            )
+        return grad_input, None, None, None, None
+def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
+    if not isinstance(up, abc.Iterable):
+        up = (up, up)
+    if not isinstance(down, abc.Iterable):
+        down = (down, down)
+    if len(pad) == 2:
+        pad = (pad[0], pad[1], pad[0], pad[1])
+    if input.device.type == "cpu":
+        out = upfirdn2d_native(input, kernel, *up, *down, *pad)
+    else:
+        out = UpFirDn2d.apply(input, kernel, up, down, pad)
+    return out
+def upfirdn2d_native(
+    input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1
+):
+    _, channel, in_h, in_w = input.shape
+    input = input.reshape(-1, in_h, in_w, 1)
+    _, in_h, in_w, minor = input.shape
+    kernel_h, kernel_w = kernel.shape
+    out = input.view(-1, in_h, 1, in_w, 1, minor)
+    out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1])
+    out = out.view(-1, in_h * up_y, in_w * up_x, minor)
+    out = F.pad(
+        out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]
+    )
+    out = out[
+        :,
+        max(-pad_y0, 0) : out.shape[1] - max(-pad_y1, 0),
+        max(-pad_x0, 0) : out.shape[2] - max(-pad_x1, 0),
+        :,
+    ]
+    out = out.permute(0, 3, 1, 2)
+    out = out.reshape(
+        [-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]
+    )
+    w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w)
+    out = F.conv2d(out, w)
+    out = out.reshape(
+        -1,
+        minor,
+        in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1,
+        in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1,
+    )
+    out = out.permute(0, 2, 3, 1)
+    out = out[:, ::down_y, ::down_x, :]
+    out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h + down_y) // down_y
+    out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w + down_x) // down_x
+    return out.view(-1, channel, out_h, out_w)

NTED/op/upfirdn2d_kernel.cu ADDED Viewed

	@@ -0,0 +1,369 @@

+// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+//
+// This work is made available under the Nvidia Source Code License-NC.
+// To view a copy of this license, visit
+// https://nvlabs.github.io/stylegan2/license.html
+#include <torch/types.h>
+#include <ATen/ATen.h>
+#include <ATen/AccumulateType.h>
+#include <ATen/cuda/CUDAApplyUtils.cuh>
+#include <ATen/cuda/CUDAContext.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+static __host__ __device__ __forceinline__ int floor_div(int a, int b) {
+  int c = a / b;
+  if (c * b > a) {
+    c--;
+  }
+  return c;
+}
+struct UpFirDn2DKernelParams {
+  int up_x;
+  int up_y;
+  int down_x;
+  int down_y;
+  int pad_x0;
+  int pad_x1;
+  int pad_y0;
+  int pad_y1;
+  int major_dim;
+  int in_h;
+  int in_w;
+  int minor_dim;
+  int kernel_h;
+  int kernel_w;
+  int out_h;
+  int out_w;
+  int loop_major;
+  int loop_x;
+};
+template <typename scalar_t>
+__global__ void upfirdn2d_kernel_large(scalar_t *out, const scalar_t *input,
+                                       const scalar_t *kernel,
+                                       const UpFirDn2DKernelParams p) {
+  int minor_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  int out_y = minor_idx / p.minor_dim;
+  minor_idx -= out_y * p.minor_dim;
+  int out_x_base = blockIdx.y * p.loop_x * blockDim.y + threadIdx.y;
+  int major_idx_base = blockIdx.z * p.loop_major;
+  if (out_x_base >= p.out_w || out_y >= p.out_h ||
+      major_idx_base >= p.major_dim) {
+    return;
+  }
+  int mid_y = out_y * p.down_y + p.up_y - 1 - p.pad_y0;
+  int in_y = min(max(floor_div(mid_y, p.up_y), 0), p.in_h);
+  int h = min(max(floor_div(mid_y + p.kernel_h, p.up_y), 0), p.in_h) - in_y;
+  int kernel_y = mid_y + p.kernel_h - (in_y + 1) * p.up_y;
+  for (int loop_major = 0, major_idx = major_idx_base;
+       loop_major < p.loop_major && major_idx < p.major_dim;
+       loop_major++, major_idx++) {
+    for (int loop_x = 0, out_x = out_x_base;
+         loop_x < p.loop_x && out_x < p.out_w; loop_x++, out_x += blockDim.y) {
+      int mid_x = out_x * p.down_x + p.up_x - 1 - p.pad_x0;
+      int in_x = min(max(floor_div(mid_x, p.up_x), 0), p.in_w);
+      int w = min(max(floor_div(mid_x + p.kernel_w, p.up_x), 0), p.in_w) - in_x;
+      int kernel_x = mid_x + p.kernel_w - (in_x + 1) * p.up_x;
+      const scalar_t *x_p =
+          &input[((major_idx * p.in_h + in_y) * p.in_w + in_x) * p.minor_dim +
+                 minor_idx];
+      const scalar_t *k_p = &kernel[kernel_y * p.kernel_w + kernel_x];
+      int x_px = p.minor_dim;
+      int k_px = -p.up_x;
+      int x_py = p.in_w * p.minor_dim;
+      int k_py = -p.up_y * p.kernel_w;
+      scalar_t v = 0.0f;
+      for (int y = 0; y < h; y++) {
+        for (int x = 0; x < w; x++) {
+          v += static_cast<scalar_t>(*x_p) * static_cast<scalar_t>(*k_p);
+          x_p += x_px;
+          k_p += k_px;
+        }
+        x_p += x_py - w * x_px;
+        k_p += k_py - w * k_px;
+      }
+      out[((major_idx * p.out_h + out_y) * p.out_w + out_x) * p.minor_dim +
+          minor_idx] = v;
+    }
+  }
+}
+template <typename scalar_t, int up_x, int up_y, int down_x, int down_y,
+          int kernel_h, int kernel_w, int tile_out_h, int tile_out_w>
+__global__ void upfirdn2d_kernel(scalar_t *out, const scalar_t *input,
+                                 const scalar_t *kernel,
+                                 const UpFirDn2DKernelParams p) {
+  const int tile_in_h = ((tile_out_h - 1) * down_y + kernel_h - 1) / up_y + 1;
+  const int tile_in_w = ((tile_out_w - 1) * down_x + kernel_w - 1) / up_x + 1;
+  __shared__ volatile float sk[kernel_h][kernel_w];
+  __shared__ volatile float sx[tile_in_h][tile_in_w];
+  int minor_idx = blockIdx.x;
+  int tile_out_y = minor_idx / p.minor_dim;
+  minor_idx -= tile_out_y * p.minor_dim;
+  tile_out_y *= tile_out_h;
+  int tile_out_x_base = blockIdx.y * p.loop_x * tile_out_w;
+  int major_idx_base = blockIdx.z * p.loop_major;
+  if (tile_out_x_base >= p.out_w | tile_out_y >= p.out_h |
+      major_idx_base >= p.major_dim) {
+    return;
+  }
+  for (int tap_idx = threadIdx.x; tap_idx < kernel_h * kernel_w;
+       tap_idx += blockDim.x) {
+    int ky = tap_idx / kernel_w;
+    int kx = tap_idx - ky * kernel_w;
+    scalar_t v = 0.0;
+    if (kx < p.kernel_w & ky < p.kernel_h) {
+      v = kernel[(p.kernel_h - 1 - ky) * p.kernel_w + (p.kernel_w - 1 - kx)];
+    }
+    sk[ky][kx] = v;
+  }
+  for (int loop_major = 0, major_idx = major_idx_base;
+       loop_major < p.loop_major & major_idx < p.major_dim;
+       loop_major++, major_idx++) {
+    for (int loop_x = 0, tile_out_x = tile_out_x_base;
+         loop_x < p.loop_x & tile_out_x < p.out_w;
+         loop_x++, tile_out_x += tile_out_w) {
+      int tile_mid_x = tile_out_x * down_x + up_x - 1 - p.pad_x0;
+      int tile_mid_y = tile_out_y * down_y + up_y - 1 - p.pad_y0;
+      int tile_in_x = floor_div(tile_mid_x, up_x);
+      int tile_in_y = floor_div(tile_mid_y, up_y);
+      __syncthreads();
+      for (int in_idx = threadIdx.x; in_idx < tile_in_h * tile_in_w;
+           in_idx += blockDim.x) {
+        int rel_in_y = in_idx / tile_in_w;
+        int rel_in_x = in_idx - rel_in_y * tile_in_w;
+        int in_x = rel_in_x + tile_in_x;
+        int in_y = rel_in_y + tile_in_y;
+        scalar_t v = 0.0;
+        if (in_x >= 0 & in_y >= 0 & in_x < p.in_w & in_y < p.in_h) {
+          v = input[((major_idx * p.in_h + in_y) * p.in_w + in_x) *
+                        p.minor_dim +
+                    minor_idx];
+        }
+        sx[rel_in_y][rel_in_x] = v;
+      }
+      __syncthreads();
+      for (int out_idx = threadIdx.x; out_idx < tile_out_h * tile_out_w;
+           out_idx += blockDim.x) {
+        int rel_out_y = out_idx / tile_out_w;
+        int rel_out_x = out_idx - rel_out_y * tile_out_w;
+        int out_x = rel_out_x + tile_out_x;
+        int out_y = rel_out_y + tile_out_y;
+        int mid_x = tile_mid_x + rel_out_x * down_x;
+        int mid_y = tile_mid_y + rel_out_y * down_y;
+        int in_x = floor_div(mid_x, up_x);
+        int in_y = floor_div(mid_y, up_y);
+        int rel_in_x = in_x - tile_in_x;
+        int rel_in_y = in_y - tile_in_y;
+        int kernel_x = (in_x + 1) * up_x - mid_x - 1;
+        int kernel_y = (in_y + 1) * up_y - mid_y - 1;
+        scalar_t v = 0.0;
+#pragma unroll
+        for (int y = 0; y < kernel_h / up_y; y++)
+#pragma unroll
+          for (int x = 0; x < kernel_w / up_x; x++)
+            v += sx[rel_in_y + y][rel_in_x + x] *
+                 sk[kernel_y + y * up_y][kernel_x + x * up_x];
+        if (out_x < p.out_w & out_y < p.out_h) {
+          out[((major_idx * p.out_h + out_y) * p.out_w + out_x) * p.minor_dim +
+              minor_idx] = v;
+        }
+      }
+    }
+  }
+}
+torch::Tensor upfirdn2d_op(const torch::Tensor &input,
+                           const torch::Tensor &kernel, int up_x, int up_y,
+                           int down_x, int down_y, int pad_x0, int pad_x1,
+                           int pad_y0, int pad_y1) {
+  int curDevice = -1;
+  cudaGetDevice(&curDevice);
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  UpFirDn2DKernelParams p;
+  auto x = input.contiguous();
+  auto k = kernel.contiguous();
+  p.major_dim = x.size(0);
+  p.in_h = x.size(1);
+  p.in_w = x.size(2);
+  p.minor_dim = x.size(3);
+  p.kernel_h = k.size(0);
+  p.kernel_w = k.size(1);
+  p.up_x = up_x;
+  p.up_y = up_y;
+  p.down_x = down_x;
+  p.down_y = down_y;
+  p.pad_x0 = pad_x0;
+  p.pad_x1 = pad_x1;
+  p.pad_y0 = pad_y0;
+  p.pad_y1 = pad_y1;
+  p.out_h = (p.in_h * p.up_y + p.pad_y0 + p.pad_y1 - p.kernel_h + p.down_y) /
+            p.down_y;
+  p.out_w = (p.in_w * p.up_x + p.pad_x0 + p.pad_x1 - p.kernel_w + p.down_x) /
+            p.down_x;
+  auto out =
+      at::empty({p.major_dim, p.out_h, p.out_w, p.minor_dim}, x.options());
+  int mode = -1;
+  int tile_out_h = -1;
+  int tile_out_w = -1;
+  if (p.up_x == 1 && p.up_y == 1 && p.down_x == 1 && p.down_y == 1 &&
+      p.kernel_h <= 4 && p.kernel_w <= 4) {
+    mode = 1;
+    tile_out_h = 16;
+    tile_out_w = 64;
+  }
+  if (p.up_x == 1 && p.up_y == 1 && p.down_x == 1 && p.down_y == 1 &&
+      p.kernel_h <= 3 && p.kernel_w <= 3) {
+    mode = 2;
+    tile_out_h = 16;
+    tile_out_w = 64;
+  }
+  if (p.up_x == 2 && p.up_y == 2 && p.down_x == 1 && p.down_y == 1 &&
+      p.kernel_h <= 4 && p.kernel_w <= 4) {
+    mode = 3;
+    tile_out_h = 16;
+    tile_out_w = 64;
+  }
+  if (p.up_x == 2 && p.up_y == 2 && p.down_x == 1 && p.down_y == 1 &&
+      p.kernel_h <= 2 && p.kernel_w <= 2) {
+    mode = 4;
+    tile_out_h = 16;
+    tile_out_w = 64;
+  }
+  if (p.up_x == 1 && p.up_y == 1 && p.down_x == 2 && p.down_y == 2 &&
+      p.kernel_h <= 4 && p.kernel_w <= 4) {
+    mode = 5;
+    tile_out_h = 8;
+    tile_out_w = 32;
+  }
+  if (p.up_x == 1 && p.up_y == 1 && p.down_x == 2 && p.down_y == 2 &&
+      p.kernel_h <= 2 && p.kernel_w <= 2) {
+    mode = 6;
+    tile_out_h = 8;
+    tile_out_w = 32;
+  }
+  dim3 block_size;
+  dim3 grid_size;
+  if (tile_out_h > 0 && tile_out_w > 0) {
+    p.loop_major = (p.major_dim - 1) / 16384 + 1;
+    p.loop_x = 1;
+    block_size = dim3(32 * 8, 1, 1);
+    grid_size = dim3(((p.out_h - 1) / tile_out_h + 1) * p.minor_dim,
+                     (p.out_w - 1) / (p.loop_x * tile_out_w) + 1,
+                     (p.major_dim - 1) / p.loop_major + 1);
+  } else {
+    p.loop_major = (p.major_dim - 1) / 16384 + 1;
+    p.loop_x = 4;
+    block_size = dim3(4, 32, 1);
+    grid_size = dim3((p.out_h * p.minor_dim - 1) / block_size.x + 1,
+                     (p.out_w - 1) / (p.loop_x * block_size.y) + 1,
+                     (p.major_dim - 1) / p.loop_major + 1);
+  }
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&] {
+    switch (mode) {
+    case 1:
+      upfirdn2d_kernel<scalar_t, 1, 1, 1, 1, 4, 4, 16, 64>
+          <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
+                                                 x.data_ptr<scalar_t>(),
+                                                 k.data_ptr<scalar_t>(), p);
+      break;
+    case 2:
+      upfirdn2d_kernel<scalar_t, 1, 1, 1, 1, 3, 3, 16, 64>
+          <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
+                                                 x.data_ptr<scalar_t>(),
+                                                 k.data_ptr<scalar_t>(), p);
+      break;
+    case 3:
+      upfirdn2d_kernel<scalar_t, 2, 2, 1, 1, 4, 4, 16, 64>
+          <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
+                                                 x.data_ptr<scalar_t>(),
+                                                 k.data_ptr<scalar_t>(), p);
+      break;
+    case 4:
+      upfirdn2d_kernel<scalar_t, 2, 2, 1, 1, 2, 2, 16, 64>
+          <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
+                                                 x.data_ptr<scalar_t>(),
+                                                 k.data_ptr<scalar_t>(), p);
+      break;
+    case 5:
+      upfirdn2d_kernel<scalar_t, 1, 1, 2, 2, 4, 4, 8, 32>
+          <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
+                                                 x.data_ptr<scalar_t>(),
+                                                 k.data_ptr<scalar_t>(), p);
+      break;
+    case 6:
+      upfirdn2d_kernel<scalar_t, 1, 1, 2, 2, 4, 4, 8, 32>
+          <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
+                                                 x.data_ptr<scalar_t>(),
+                                                 k.data_ptr<scalar_t>(), p);
+      break;
+    default:
+      upfirdn2d_kernel_large<scalar_t><<<grid_size, block_size, 0, stream>>>(
+          out.data_ptr<scalar_t>(), x.data_ptr<scalar_t>(),
+          k.data_ptr<scalar_t>(), p);
+    }
+  });
+  return out;
+}

app.py CHANGED Viewed

@@ -1,17 +1,29 @@
 import gradio as gr
-def greet(年龄预测器_输入您的年龄):
-    return "恭喜，您今年" + 年龄预测器_输入您的年龄 + "岁了!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()
 '''
 TODO
-先把openpose light整合进来测试一下
 测试视频展示功能
 '''

 import gradio as gr
+import cv2
+import numpy as np
+import torch
+from NTED.NTED_module import NTED
+NTED_Module = NTED()
+def pose_transfer(上传人体姿态图):
+    img = 上传人体姿态图
+    fake_img = NTED_Module.inference(img)
+    return fake_img
+with gr.Column():
+    result_gallery = gr.Gallery(label='Output', show_label=False, elem_id="gallery").style(grid=2, height='auto')
+gr.Interface(fn=pose_transfer,
+             inputs=["image"],
+             outputs=[result_gallery],
+             title="谷小雨姿态驱动图像",
+             examples=[["example/exp1.png"], ["example/exp2.png"], ["example/exp3.png"],\
+                 ["example/exp4.png"], ["example/exp5.png"], ["example/exp6.png"]],
+             ).launch(server_name='0.0.0.0')
 '''
 TODO
 测试视频展示功能
 '''

example/exp1.png ADDED Viewed

example/exp2.png ADDED Viewed

example/exp3.png ADDED Viewed

example/exp4.png ADDED Viewed

example/exp5.png ADDED Viewed

example/exp6.png ADDED Viewed

example/ref_img.png ADDED Viewed

Git LFS Details

SHA256: b3396e7f8e0a18f0c8dc50d1f98cabf26c13d8629e5b454a680531ea6daf31ed
Pointer size: 132 Bytes
Size of remote file: 1.01 MB

lite_openpose/body_bbox_detector.py ADDED Viewed

	@@ -0,0 +1,179 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+import os
+import os.path as osp
+import sys
+import numpy as np
+import cv2
+import math
+import torch
+import torchvision.transforms as transforms
+# from PIL import Image
+# Code from https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch/blob/master/demo.py
+# 2D body pose estimator
+sys.path.append('/apdcephfs/share_1474453/zejunzhang/workspace/HR-VITON/dataset_process_utils/lite_openpose')
+from pose2d_models.with_mobilenet import PoseEstimationWithMobileNet
+from modules.load_state import load_state
+from modules.pose import Pose, track_poses
+from modules.keypoints import extract_keypoints, group_keypoints
+def normalize(img, img_mean, img_scale):
+    img = np.array(img, dtype=np.float32)
+    img = (img - img_mean) * img_scale
+    return img
+def pad_width(img, stride, pad_value, min_dims):
+    h, w, _ = img.shape
+    h = min(min_dims[0], h)
+    min_dims[0] = math.ceil(min_dims[0] / float(stride)) * stride
+    min_dims[1] = max(min_dims[1], w)
+    min_dims[1] = math.ceil(min_dims[1] / float(stride)) * stride
+    pad = []
+    pad.append(int(math.floor((min_dims[0] - h) / 2.0)))
+    pad.append(int(math.floor((min_dims[1] - w) / 2.0)))
+    pad.append(int(min_dims[0] - h - pad[0]))
+    pad.append(int(min_dims[1] - w - pad[1]))
+    padded_img = cv2.copyMakeBorder(img, pad[0], pad[2], pad[1], pad[3],
+                                    cv2.BORDER_CONSTANT, value=pad_value)
+    return padded_img, pad
+class BodyPoseEstimator(object):
+    """
+    Hand Detector for third-view input.
+    It combines a body pose estimator (https://github.com/jhugestar/lightweight-human-pose-estimation.pytorch.git)
+    """
+    def __init__(self, device='cpu'):
+        # print("Loading Body Pose Estimator")
+        self.device=device
+        self.__load_body_estimator()
+    def __load_body_estimator(self):
+        net = PoseEstimationWithMobileNet()
+        pose2d_checkpoint = "lite_openpose/checkpoint_iter_370000.pth"
+        checkpoint = torch.load(pose2d_checkpoint, map_location='cpu')
+        load_state(net, checkpoint)
+        net = net.eval()
+        net = net.to(self.device)
+        self.model = net
+    #Code from https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch/demo.py
+    def __infer_fast(self, img, input_height_size, stride, upsample_ratio,
+        cpu=False, pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1/256):
+        height, width, _ = img.shape
+        scale = input_height_size / height
+        scaled_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
+        scaled_img = normalize(scaled_img, img_mean, img_scale)
+        min_dims = [input_height_size, max(scaled_img.shape[1], input_height_size)]
+        padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims)
+        tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float()
+        if not cpu:
+            tensor_img = tensor_img.to(self.device)
+        with torch.no_grad():
+            stages_output = self.model(tensor_img)
+        stage2_heatmaps = stages_output[-2]
+        heatmaps = np.transpose(stage2_heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0))
+        heatmaps = cv2.resize(heatmaps, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)
+        stage2_pafs = stages_output[-1]
+        pafs = np.transpose(stage2_pafs.squeeze().cpu().data.numpy(), (1, 2, 0))
+        pafs = cv2.resize(pafs, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)
+        return heatmaps, pafs, scale, pad
+    def detect_body_pose(self, img):
+        """
+        Output:
+            current_bbox: BBOX_XYWH
+        """
+        stride = 8
+        upsample_ratio = 4
+        orig_img = img.copy()
+        # forward
+        heatmaps, pafs, scale, pad = self.__infer_fast(img,
+            input_height_size=256, stride=stride, upsample_ratio=upsample_ratio)
+        total_keypoints_num = 0
+        all_keypoints_by_type = []
+        num_keypoints = Pose.num_kpts
+        for kpt_idx in range(num_keypoints):  # 19th for bg
+            total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num)
+        pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True)
+        for kpt_id in range(all_keypoints.shape[0]):
+            all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale
+            all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale
+        '''
+        # print(len(pose_entries))
+        if len(pose_entries)>1:
+            pose_entries = pose_entries[:1]
+            print("We only support one person currently")
+            # assert len(pose_entries) == 1, "We only support one person currently"
+        '''
+        current_poses, current_bbox = list(), list()
+        for n in range(len(pose_entries)):
+            if len(pose_entries[n]) == 0:
+                continue
+            pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1
+            for kpt_id in range(num_keypoints):
+                if pose_entries[n][kpt_id] != -1.0:  # keypoint was found
+                    pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0])
+                    pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1])
+            pose = Pose(pose_keypoints, pose_entries[n][18])
+            current_poses.append(pose.keypoints)
+            current_bbox.append(np.array(pose.bbox))
+        # enlarge the bbox
+        for i, bbox in enumerate(current_bbox):
+            x, y, w, h = bbox
+            margin = 0.2
+            x_margin = int(w * margin)
+            y_margin = int(h * margin)
+            x0 = max(x-x_margin, 0)
+            y0 = max(y-y_margin, 0)
+            x1 = min(x+w+x_margin, orig_img.shape[1])
+            y1 = min(y+h+y_margin, orig_img.shape[0])
+            current_bbox[i] = np.array((x0, y0, x1, y1)).astype(np.int32) # ltrb
+        # 只拿一个人
+        body_point_list = []
+        if len(current_poses) > 0:
+            for item in current_poses[0]:
+                if item[0] == item[1] == -1:
+                    body_point_list += [0.0, 0.0, 0.0]
+                else:
+                    body_point_list += [float(item[0]), float(item[1]), 1.0]
+        else:
+            for i in range(18):
+                body_point_list += [0.0, 0.0, 0.0]
+        pose_dict = dict()
+        pose_dict["people"] = []
+        pose_dict["people"].append({
+            "person_id": [-1],
+            "pose_keypoints_2d": body_point_list,
+            "hand_left_keypoints_2d": [],
+            "hand_right_keypoints_2d": [],
+            "face_keypoints_2d": [],
+            "pose_keypoints_3d": [],
+            "face_keypoints_3d": [],
+            "hand_left_keypoints_3d": [],
+            "hand_right_keypoints_3d": [],
+        })
+        return current_poses, current_bbox

lite_openpose/checkpoint_iter_370000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:517c86f769c6636583083f1467e3d212a0006c27109edb3aeffc19a79622d411
+size 87959810

lite_openpose/modules/__init__.py ADDED Viewed

File without changes

lite_openpose/modules/conv.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from torch import nn
+def conv(in_channels, out_channels, kernel_size=3, padding=1, bn=True, dilation=1, stride=1, relu=True, bias=True):
+    modules = [nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias)]
+    if bn:
+        modules.append(nn.BatchNorm2d(out_channels))
+    if relu:
+        modules.append(nn.ReLU(inplace=True))
+    return nn.Sequential(*modules)
+def conv_dw(in_channels, out_channels, kernel_size=3, padding=1, stride=1, dilation=1):
+    return nn.Sequential(
+        nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, dilation=dilation, groups=in_channels, bias=False),
+        nn.BatchNorm2d(in_channels),
+        nn.ReLU(inplace=True),
+        nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False),
+        nn.BatchNorm2d(out_channels),
+        nn.ReLU(inplace=True),
+    )
+def conv_dw_no_bn(in_channels, out_channels, kernel_size=3, padding=1, stride=1, dilation=1):
+    return nn.Sequential(
+        nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, dilation=dilation, groups=in_channels, bias=False),
+        nn.ELU(inplace=True),
+        nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False),
+        nn.ELU(inplace=True),
+    )

lite_openpose/modules/get_parameters.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from torch import nn
+def get_parameters(model, predicate):
+    for module in model.modules():
+        for param_name, param in module.named_parameters():
+            if predicate(module, param_name):
+                yield param
+def get_parameters_conv(model, name):
+    return get_parameters(model, lambda m, p: isinstance(m, nn.Conv2d) and m.groups == 1 and p == name)
+def get_parameters_conv_depthwise(model, name):
+    return get_parameters(model, lambda m, p: isinstance(m, nn.Conv2d)
+                                              and m.groups == m.in_channels
+                                              and m.in_channels == m.out_channels
+                                              and p == name)
+def get_parameters_bn(model, name):
+    return get_parameters(model, lambda m, p: isinstance(m, nn.BatchNorm2d) and p == name)

lite_openpose/modules/keypoints.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import math
+import numpy as np
+from operator import itemgetter
+BODY_PARTS_KPT_IDS = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11],
+                      [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], [0, 15], [15, 17], [2, 16], [5, 17]]
+BODY_PARTS_PAF_IDS = ([12, 13], [20, 21], [14, 15], [16, 17], [22, 23], [24, 25], [0, 1], [2, 3], [4, 5],
+                      [6, 7], [8, 9], [10, 11], [28, 29], [30, 31], [34, 35], [32, 33], [36, 37], [18, 19], [26, 27])
+def linspace2d(start, stop, n=10):
+    points = 1 / (n - 1) * (stop - start)
+    return points[:, None] * np.arange(n) + start[:, None]
+def extract_keypoints(heatmap, all_keypoints, total_keypoint_num):
+    heatmap[heatmap < 0.1] = 0
+    heatmap_with_borders = np.pad(heatmap, [(2, 2), (2, 2)], mode='constant')
+    heatmap_center = heatmap_with_borders[1:heatmap_with_borders.shape[0]-1, 1:heatmap_with_borders.shape[1]-1]
+    heatmap_left = heatmap_with_borders[1:heatmap_with_borders.shape[0]-1, 2:heatmap_with_borders.shape[1]]
+    heatmap_right = heatmap_with_borders[1:heatmap_with_borders.shape[0]-1, 0:heatmap_with_borders.shape[1]-2]
+    heatmap_up = heatmap_with_borders[2:heatmap_with_borders.shape[0], 1:heatmap_with_borders.shape[1]-1]
+    heatmap_down = heatmap_with_borders[0:heatmap_with_borders.shape[0]-2, 1:heatmap_with_borders.shape[1]-1]
+    heatmap_peaks = (heatmap_center > heatmap_left) &\
+                    (heatmap_center > heatmap_right) &\
+                    (heatmap_center > heatmap_up) &\
+                    (heatmap_center > heatmap_down)
+    heatmap_peaks = heatmap_peaks[1:heatmap_center.shape[0]-1, 1:heatmap_center.shape[1]-1]
+    keypoints = list(zip(np.nonzero(heatmap_peaks)[1], np.nonzero(heatmap_peaks)[0]))  # (w, h)
+    keypoints = sorted(keypoints, key=itemgetter(0))
+    suppressed = np.zeros(len(keypoints), np.uint8)
+    keypoints_with_score_and_id = []
+    keypoint_num = 0
+    for i in range(len(keypoints)):
+        if suppressed[i]:
+            continue
+        for j in range(i+1, len(keypoints)):
+            if math.sqrt((keypoints[i][0] - keypoints[j][0]) ** 2 +
+                         (keypoints[i][1] - keypoints[j][1]) ** 2) < 6:
+                suppressed[j] = 1
+        keypoint_with_score_and_id = (keypoints[i][0], keypoints[i][1], heatmap[keypoints[i][1], keypoints[i][0]],
+                                      total_keypoint_num + keypoint_num)
+        keypoints_with_score_and_id.append(keypoint_with_score_and_id)
+        keypoint_num += 1
+    all_keypoints.append(keypoints_with_score_and_id)
+    return keypoint_num
+def group_keypoints(all_keypoints_by_type, pafs, pose_entry_size=20, min_paf_score=0.05, demo=False):
+    pose_entries = []
+    all_keypoints = np.array([item for sublist in all_keypoints_by_type for item in sublist])
+    for part_id in range(len(BODY_PARTS_PAF_IDS)):
+        part_pafs = pafs[:, :, BODY_PARTS_PAF_IDS[part_id]]
+        kpts_a = all_keypoints_by_type[BODY_PARTS_KPT_IDS[part_id][0]]
+        kpts_b = all_keypoints_by_type[BODY_PARTS_KPT_IDS[part_id][1]]
+        num_kpts_a = len(kpts_a)
+        num_kpts_b = len(kpts_b)
+        kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0]
+        kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1]
+        if num_kpts_a == 0 and num_kpts_b == 0:  # no keypoints for such body part
+            continue
+        elif num_kpts_a == 0:  # body part has just 'b' keypoints
+            for i in range(num_kpts_b):
+                num = 0
+                for j in range(len(pose_entries)):  # check if already in some pose, was added by another body part
+                    if pose_entries[j][kpt_b_id] == kpts_b[i][3]:
+                        num += 1
+                        continue
+                if num == 0:
+                    pose_entry = np.ones(pose_entry_size) * -1
+                    pose_entry[kpt_b_id] = kpts_b[i][3]  # keypoint idx
+                    pose_entry[-1] = 1                   # num keypoints in pose
+                    pose_entry[-2] = kpts_b[i][2]        # pose score
+                    pose_entries.append(pose_entry)
+            continue
+        elif num_kpts_b == 0:  # body part has just 'a' keypoints
+            for i in range(num_kpts_a):
+                num = 0
+                for j in range(len(pose_entries)):
+                    if pose_entries[j][kpt_a_id] == kpts_a[i][3]:
+                        num += 1
+                        continue
+                if num == 0:
+                    pose_entry = np.ones(pose_entry_size) * -1
+                    pose_entry[kpt_a_id] = kpts_a[i][3]
+                    pose_entry[-1] = 1
+                    pose_entry[-2] = kpts_a[i][2]
+                    pose_entries.append(pose_entry)
+            continue
+        connections = []
+        for i in range(num_kpts_a):
+            kpt_a = np.array(kpts_a[i][0:2])
+            for j in range(num_kpts_b):
+                kpt_b = np.array(kpts_b[j][0:2])
+                mid_point = [(), ()]
+                mid_point[0] = (int(round((kpt_a[0] + kpt_b[0]) * 0.5)),
+                                int(round((kpt_a[1] + kpt_b[1]) * 0.5)))
+                mid_point[1] = mid_point[0]
+                vec = [kpt_b[0] - kpt_a[0], kpt_b[1] - kpt_a[1]]
+                vec_norm = math.sqrt(vec[0] ** 2 + vec[1] ** 2)
+                if vec_norm == 0:
+                    continue
+                vec[0] /= vec_norm
+                vec[1] /= vec_norm
+                cur_point_score = (vec[0] * part_pafs[mid_point[0][1], mid_point[0][0], 0] +
+                                   vec[1] * part_pafs[mid_point[1][1], mid_point[1][0], 1])
+                height_n = pafs.shape[0] // 2
+                success_ratio = 0
+                point_num = 10  # number of points to integration over paf
+                if cur_point_score > -100:
+                    passed_point_score = 0
+                    passed_point_num = 0
+                    x, y = linspace2d(kpt_a, kpt_b)
+                    for point_idx in range(point_num):
+                        if not demo:
+                            px = int(round(x[point_idx]))
+                            py = int(round(y[point_idx]))
+                        else:
+                            px = int(x[point_idx])
+                            py = int(y[point_idx])
+                        paf = part_pafs[py, px, 0:2]
+                        cur_point_score = vec[0] * paf[0] + vec[1] * paf[1]
+                        if cur_point_score > min_paf_score:
+                            passed_point_score += cur_point_score
+                            passed_point_num += 1
+                    success_ratio = passed_point_num / point_num
+                    ratio = 0
+                    if passed_point_num > 0:
+                        ratio = passed_point_score / passed_point_num
+                    ratio += min(height_n / vec_norm - 1, 0)
+                if ratio > 0 and success_ratio > 0.8:
+                    score_all = ratio + kpts_a[i][2] + kpts_b[j][2]
+                    connections.append([i, j, ratio, score_all])
+        if len(connections) > 0:
+            connections = sorted(connections, key=itemgetter(2), reverse=True)
+        num_connections = min(num_kpts_a, num_kpts_b)
+        has_kpt_a = np.zeros(num_kpts_a, dtype=np.int32)
+        has_kpt_b = np.zeros(num_kpts_b, dtype=np.int32)
+        filtered_connections = []
+        for row in range(len(connections)):
+            if len(filtered_connections) == num_connections:
+                break
+            i, j, cur_point_score = connections[row][0:3]
+            if not has_kpt_a[i] and not has_kpt_b[j]:
+                filtered_connections.append([kpts_a[i][3], kpts_b[j][3], cur_point_score])
+                has_kpt_a[i] = 1
+                has_kpt_b[j] = 1
+        connections = filtered_connections
+        if len(connections) == 0:
+            continue
+        if part_id == 0:
+            pose_entries = [np.ones(pose_entry_size) * -1 for _ in range(len(connections))]
+            for i in range(len(connections)):
+                pose_entries[i][BODY_PARTS_KPT_IDS[0][0]] = connections[i][0]
+                pose_entries[i][BODY_PARTS_KPT_IDS[0][1]] = connections[i][1]
+                pose_entries[i][-1] = 2
+                pose_entries[i][-2] = np.sum(all_keypoints[connections[i][0:2], 2]) + connections[i][2]
+        elif part_id == 17 or part_id == 18:
+            kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0]
+            kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1]
+            for i in range(len(connections)):
+                for j in range(len(pose_entries)):
+                    if pose_entries[j][kpt_a_id] == connections[i][0] and pose_entries[j][kpt_b_id] == -1:
+                        pose_entries[j][kpt_b_id] = connections[i][1]
+                    elif pose_entries[j][kpt_b_id] == connections[i][1] and pose_entries[j][kpt_a_id] == -1:
+                        pose_entries[j][kpt_a_id] = connections[i][0]
+            continue
+        else:
+            kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0]
+            kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1]
+            for i in range(len(connections)):
+                num = 0
+                for j in range(len(pose_entries)):
+                    if pose_entries[j][kpt_a_id] == connections[i][0]:
+                        pose_entries[j][kpt_b_id] = connections[i][1]
+                        num += 1
+                        pose_entries[j][-1] += 1
+                        pose_entries[j][-2] += all_keypoints[connections[i][1], 2] + connections[i][2]
+                if num == 0:
+                    pose_entry = np.ones(pose_entry_size) * -1
+                    pose_entry[kpt_a_id] = connections[i][0]
+                    pose_entry[kpt_b_id] = connections[i][1]
+                    pose_entry[-1] = 2
+                    pose_entry[-2] = np.sum(all_keypoints[connections[i][0:2], 2]) + connections[i][2]
+                    pose_entries.append(pose_entry)
+    filtered_entries = []
+    for i in range(len(pose_entries)):
+        if pose_entries[i][-1] < 3 or (pose_entries[i][-2] / pose_entries[i][-1] < 0.2):
+            continue
+        filtered_entries.append(pose_entries[i])
+    pose_entries = np.asarray(filtered_entries)
+    return pose_entries, all_keypoints

lite_openpose/modules/load_state.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import collections
+def load_state(net, checkpoint):
+    source_state = checkpoint['state_dict']
+    target_state = net.state_dict()
+    new_target_state = collections.OrderedDict()
+    for target_key, target_value in target_state.items():
+        if target_key in source_state and source_state[target_key].size() == target_state[target_key].size():
+            new_target_state[target_key] = source_state[target_key]
+        else:
+            new_target_state[target_key] = target_state[target_key]
+            print('[WARNING] Not found pre-trained parameters for {}'.format(target_key))
+    net.load_state_dict(new_target_state)
+def load_from_mobilenet(net, checkpoint):
+    source_state = checkpoint['state_dict']
+    target_state = net.state_dict()
+    new_target_state = collections.OrderedDict()
+    for target_key, target_value in target_state.items():
+        k = target_key
+        if k.find('model') != -1:
+            k = k.replace('model', 'module.model')
+        if k in source_state and source_state[k].size() == target_state[target_key].size():
+            new_target_state[target_key] = source_state[k]
+        else:
+            new_target_state[target_key] = target_state[target_key]
+            print('[WARNING] Not found pre-trained parameters for {}'.format(target_key))
+    net.load_state_dict(new_target_state)

lite_openpose/modules/loss.py ADDED Viewed

	@@ -0,0 +1,5 @@

+def l2_loss(input, target, mask, batch_size):
+    loss = (input - target) * mask
+    loss = (loss * loss) / 2 / batch_size
+    return loss.sum()

lite_openpose/modules/one_euro_filter.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import math
+def get_alpha(rate=30, cutoff=1):
+    tau = 1 / (2 * math.pi * cutoff)
+    te = 1 / rate
+    return 1 / (1 + tau / te)
+class LowPassFilter:
+    def __init__(self):
+        self.x_previous = None
+    def __call__(self, x, alpha=0.5):
+        if self.x_previous is None:
+            self.x_previous = x
+            return x
+        x_filtered = alpha * x + (1 - alpha) * self.x_previous
+        self.x_previous = x_filtered
+        return x_filtered
+class OneEuroFilter:
+    def __init__(self, freq=15, mincutoff=1, beta=0.05, dcutoff=1):
+        self.freq = freq
+        self.mincutoff = mincutoff
+        self.beta = beta
+        self.dcutoff = dcutoff
+        self.filter_x = LowPassFilter()
+        self.filter_dx = LowPassFilter()
+        self.x_previous = None
+        self.dx = None
+    def __call__(self, x):
+        if self.dx is None:
+            self.dx = 0
+        else:
+            self.dx = (x - self.x_previous) * self.freq
+        dx_smoothed = self.filter_dx(self.dx, get_alpha(self.freq, self.dcutoff))
+        cutoff = self.mincutoff + self.beta * abs(dx_smoothed)
+        x_filtered = self.filter_x(x, get_alpha(self.freq, cutoff))
+        self.x_previous = x
+        return x_filtered
+if __name__ == '__main__':
+    filter = OneEuroFilter(freq=15, beta=0.1)
+    for val in range(10):
+        x = val + (-1)**(val % 2)
+        x_filtered = filter(x)
+        print(x_filtered, x)

lite_openpose/modules/pose.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import cv2
+import numpy as np
+from modules.keypoints import BODY_PARTS_KPT_IDS, BODY_PARTS_PAF_IDS
+from modules.one_euro_filter import OneEuroFilter
+class Pose:
+    num_kpts = 18
+    kpt_names = ['nose', 'neck',
+                 'r_sho', 'r_elb', 'r_wri', 'l_sho', 'l_elb', 'l_wri',
+                 'r_hip', 'r_knee', 'r_ank', 'l_hip', 'l_knee', 'l_ank',
+                 'r_eye', 'l_eye',
+                 'r_ear', 'l_ear']
+    sigmas = np.array([.26, .79, .79, .72, .62, .79, .72, .62, 1.07, .87, .89, 1.07, .87, .89, .25, .25, .35, .35],
+                      dtype=np.float32) / 10.0
+    vars = (sigmas * 2) ** 2
+    last_id = -1
+    color = [0, 224, 255]
+    def __init__(self, keypoints, confidence):
+        super().__init__()
+        self.keypoints = keypoints
+        self.confidence = confidence
+        self.bbox = Pose.get_bbox(self.keypoints)
+        self.id = None
+        self.filters = [[OneEuroFilter(), OneEuroFilter()] for _ in range(Pose.num_kpts)]
+    @staticmethod
+    def get_bbox(keypoints):
+        found_keypoints = np.zeros((np.count_nonzero(keypoints[:, 0] != -1), 2), dtype=np.int32)
+        found_kpt_id = 0
+        for kpt_id in range(Pose.num_kpts):
+            if keypoints[kpt_id, 0] == -1:
+                continue
+            found_keypoints[found_kpt_id] = keypoints[kpt_id]
+            found_kpt_id += 1
+        bbox = cv2.boundingRect(found_keypoints)
+        return bbox
+    def update_id(self, id=None):
+        self.id = id
+        if self.id is None:
+            self.id = Pose.last_id + 1
+            Pose.last_id += 1
+    def draw(self, img):
+        assert self.keypoints.shape == (Pose.num_kpts, 2)
+        for part_id in range(len(BODY_PARTS_PAF_IDS) - 2):
+            kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0]
+            global_kpt_a_id = self.keypoints[kpt_a_id, 0]
+            if global_kpt_a_id != -1:
+                x_a, y_a = self.keypoints[kpt_a_id]
+                cv2.circle(img, (int(x_a), int(y_a)), 3, Pose.color, -1)
+            kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1]
+            global_kpt_b_id = self.keypoints[kpt_b_id, 0]
+            if global_kpt_b_id != -1:
+                x_b, y_b = self.keypoints[kpt_b_id]
+                cv2.circle(img, (int(x_b), int(y_b)), 3, Pose.color, -1)
+            if global_kpt_a_id != -1 and global_kpt_b_id != -1:
+                cv2.line(img, (int(x_a), int(y_a)), (int(x_b), int(y_b)), Pose.color, 2)
+def get_similarity(a, b, threshold=0.5):
+    num_similar_kpt = 0
+    for kpt_id in range(Pose.num_kpts):
+        if a.keypoints[kpt_id, 0] != -1 and b.keypoints[kpt_id, 0] != -1:
+            distance = np.sum((a.keypoints[kpt_id] - b.keypoints[kpt_id]) ** 2)
+            area = max(a.bbox[2] * a.bbox[3], b.bbox[2] * b.bbox[3])
+            similarity = np.exp(-distance / (2 * (area + np.spacing(1)) * Pose.vars[kpt_id]))
+            if similarity > threshold:
+                num_similar_kpt += 1
+    return num_similar_kpt
+def track_poses(previous_poses, current_poses, threshold=3, smooth=False):
+    """Propagate poses ids from previous frame results. Id is propagated,
+    if there are at least `threshold` similar keypoints between pose from previous frame and current.
+    If correspondence between pose on previous and current frame was established, pose keypoints are smoothed.
+    :param previous_poses: poses from previous frame with ids
+    :param current_poses: poses from current frame to assign ids
+    :param threshold: minimal number of similar keypoints between poses
+    :param smooth: smooth pose keypoints between frames
+    :return: None
+    """
+    current_poses = sorted(current_poses, key=lambda pose: pose.confidence, reverse=True)  # match confident poses first
+    mask = np.ones(len(previous_poses), dtype=np.int32)
+    for current_pose in current_poses:
+        best_matched_id = None
+        best_matched_pose_id = None
+        best_matched_iou = 0
+        for id, previous_pose in enumerate(previous_poses):
+            if not mask[id]:
+                continue
+            iou = get_similarity(current_pose, previous_pose)
+            if iou > best_matched_iou:
+                best_matched_iou = iou
+                best_matched_pose_id = previous_pose.id
+                best_matched_id = id
+        if best_matched_iou >= threshold:
+            mask[best_matched_id] = 0
+        else:  # pose not similar to any previous
+            best_matched_pose_id = None
+        current_pose.update_id(best_matched_pose_id)
+        if smooth:
+            for kpt_id in range(Pose.num_kpts):
+                if current_pose.keypoints[kpt_id, 0] == -1:
+                    continue
+                # reuse filter if previous pose has valid filter
+                if (best_matched_pose_id is not None
+                        and previous_poses[best_matched_id].keypoints[kpt_id, 0] != -1):
+                    current_pose.filters[kpt_id] = previous_poses[best_matched_id].filters[kpt_id]
+                current_pose.keypoints[kpt_id, 0] = current_pose.filters[kpt_id][0](current_pose.keypoints[kpt_id, 0])
+                current_pose.keypoints[kpt_id, 1] = current_pose.filters[kpt_id][1](current_pose.keypoints[kpt_id, 1])
+            current_pose.bbox = Pose.get_bbox(current_pose.keypoints)

lite_openpose/pose2d_models/__init__.py ADDED Viewed

File without changes

lite_openpose/pose2d_models/with_mobilenet.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import torch
+from torch import nn
+from modules.conv import conv, conv_dw, conv_dw_no_bn
+class Cpm(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.align = conv(in_channels, out_channels, kernel_size=1, padding=0, bn=False)
+        self.trunk = nn.Sequential(
+            conv_dw_no_bn(out_channels, out_channels),
+            conv_dw_no_bn(out_channels, out_channels),
+            conv_dw_no_bn(out_channels, out_channels)
+        )
+        self.conv = conv(out_channels, out_channels, bn=False)
+    def forward(self, x):
+        x = self.align(x)
+        x = self.conv(x + self.trunk(x))
+        return x
+class InitialStage(nn.Module):
+    def __init__(self, num_channels, num_heatmaps, num_pafs):
+        super().__init__()
+        self.trunk = nn.Sequential(
+            conv(num_channels, num_channels, bn=False),
+            conv(num_channels, num_channels, bn=False),
+            conv(num_channels, num_channels, bn=False)
+        )
+        self.heatmaps = nn.Sequential(
+            conv(num_channels, 512, kernel_size=1, padding=0, bn=False),
+            conv(512, num_heatmaps, kernel_size=1, padding=0, bn=False, relu=False)
+        )
+        self.pafs = nn.Sequential(
+            conv(num_channels, 512, kernel_size=1, padding=0, bn=False),
+            conv(512, num_pafs, kernel_size=1, padding=0, bn=False, relu=False)
+        )
+    def forward(self, x):
+        trunk_features = self.trunk(x)
+        heatmaps = self.heatmaps(trunk_features)
+        pafs = self.pafs(trunk_features)
+        return [heatmaps, pafs]
+class RefinementStageBlock(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.initial = conv(in_channels, out_channels, kernel_size=1, padding=0, bn=False)
+        self.trunk = nn.Sequential(
+            conv(out_channels, out_channels),
+            conv(out_channels, out_channels, dilation=2, padding=2)
+        )
+    def forward(self, x):
+        initial_features = self.initial(x)
+        trunk_features = self.trunk(initial_features)
+        return initial_features + trunk_features
+class RefinementStage(nn.Module):
+    def __init__(self, in_channels, out_channels, num_heatmaps, num_pafs):
+        super().__init__()
+        self.trunk = nn.Sequential(
+            RefinementStageBlock(in_channels, out_channels),
+            RefinementStageBlock(out_channels, out_channels),
+            RefinementStageBlock(out_channels, out_channels),
+            RefinementStageBlock(out_channels, out_channels),
+            RefinementStageBlock(out_channels, out_channels)
+        )
+        self.heatmaps = nn.Sequential(
+            conv(out_channels, out_channels, kernel_size=1, padding=0, bn=False),
+            conv(out_channels, num_heatmaps, kernel_size=1, padding=0, bn=False, relu=False)
+        )
+        self.pafs = nn.Sequential(
+            conv(out_channels, out_channels, kernel_size=1, padding=0, bn=False),
+            conv(out_channels, num_pafs, kernel_size=1, padding=0, bn=False, relu=False)
+        )
+    def forward(self, x):
+        trunk_features = self.trunk(x)
+        heatmaps = self.heatmaps(trunk_features)
+        pafs = self.pafs(trunk_features)
+        return [heatmaps, pafs]
+class PoseEstimationWithMobileNet(nn.Module):
+    def __init__(self, num_refinement_stages=1, num_channels=128, num_heatmaps=19, num_pafs=38):
+        super().__init__()
+        self.model = nn.Sequential(
+            conv(     3,  32, stride=2, bias=False),
+            conv_dw( 32,  64),
+            conv_dw( 64, 128, stride=2),
+            conv_dw(128, 128),
+            conv_dw(128, 256, stride=2),
+            conv_dw(256, 256),
+            conv_dw(256, 512),  # conv4_2
+            conv_dw(512, 512, dilation=2, padding=2),
+            conv_dw(512, 512),
+            conv_dw(512, 512),
+            conv_dw(512, 512),
+            conv_dw(512, 512)   # conv5_5
+        )
+        self.cpm = Cpm(512, num_channels)
+        self.initial_stage = InitialStage(num_channels, num_heatmaps, num_pafs)
+        self.refinement_stages = nn.ModuleList()
+        for idx in range(num_refinement_stages):
+            self.refinement_stages.append(RefinementStage(num_channels + num_heatmaps + num_pafs, num_channels,
+                                                          num_heatmaps, num_pafs))
+    def forward(self, x):
+        backbone_features = self.model(x)
+        backbone_features = self.cpm(backbone_features)
+        stages_output = self.initial_stage(backbone_features)
+        for refinement_stage in self.refinement_stages:
+            stages_output.extend(
+                refinement_stage(torch.cat([backbone_features, stages_output[-2], stages_output[-1]], dim=1)))
+        return stages_output