Spaces:
Running
Running
File size: 8,403 Bytes
0b8359d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Define some typical masked 2D convolutions."""
import numpy as np
from six.moves import xrange
import tensorflow as tf
import block_util
import blocks_std
# pylint does not recognize block_base.BlockBase.__call__().
# pylint: disable=not-callable
class RasterScanConv2D(blocks_std.Conv2DBase):
"""Conv2D with no dependency on future pixels (in raster scan order).
For example, assuming a 5 x 5 kernel, the kernel is applied a spatial mask:
T T T T T
T T T T T
T T x F F
F F F F F
F F F F F
where 'T' are pixels which are available when computing the convolution
for pixel 'x'. All the pixels marked with 'F' are not available.
'x' itself is not available if strict_order is True, otherwise, it is
available.
"""
def __init__(self, depth, filter_size, strides, padding,
strict_order=True,
bias=None, act=None, initializer=None, name=None):
super(RasterScanConv2D, self).__init__(
depth, filter_size, strides, padding, bias, act, name=name)
if (filter_size[0] % 2) != 1 or (filter_size[1] % 2) != 1:
raise ValueError('Kernel size should be odd.')
with self._BlockScope():
if initializer is None:
initializer = block_util.RsqrtInitializer(dims=(0, 1, 2))
self._initializer = initializer
self._strict_order = strict_order
def _CreateKernel(self, shape, dtype):
init = self._initializer(shape, dtype)
kernel = self.NewVar(init)
mask = np.ones(shape[:2], dtype=dtype.as_numpy_dtype)
center = shape[:2] // 2
mask[center[0] + 1:, :] = 0
if not self._strict_order:
mask[center[0], center[1] + 1:] = 0
else:
mask[center[0], center[1]:] = 0
mask = mask.reshape(mask.shape + (1, 1))
return tf.convert_to_tensor(mask, dtype) * kernel
class DepthOrderConv2D(blocks_std.Conv2DBase):
"""Conv2D with no dependency on higher depth dimensions.
More precisely, the output depth #n has only dependencies on input depths #k
for k < n (if strict_order is True) or for k <= n (if strict_order is False).
"""
def __init__(self, depth, filter_size, strides, padding,
strict_order=True,
bias=None, act=None, initializer=None, name=None):
super(DepthOrderConv2D, self).__init__(
depth, filter_size, strides, padding, bias, act, name=name)
with self._BlockScope():
if initializer is None:
initializer = block_util.RsqrtInitializer(dims=(0, 1, 2))
self._initializer = initializer
self._strict_order = strict_order
def _CreateKernel(self, shape, dtype):
init = self._initializer(shape, dtype)
kernel = self.NewVar(init)
mask = np.ones(shape[2:], dtype=dtype.as_numpy_dtype)
depth_output = shape[3]
for d in xrange(depth_output):
if self._strict_order:
mask[d:, d] = 0
else:
mask[d + 1:, d] = 0
mask = mask.reshape((1, 1) + mask.shape)
return tf.convert_to_tensor(mask, dtype) * kernel
class GroupRasterScanConv2D(blocks_std.Conv2DBase):
"""Conv2D with no dependency on future pixels (in raster scan order).
This version only introduces dependencies on previous pixels in raster scan
order. It can also introduce some dependencies on previous depth positions
of the current pixel (current pixel = center pixel of the kernel) in the
following way:
the depth dimension of the input is split into Ki groups of size
|input_group_size|, the output dimension is split into Ko groups of size
|output_group_size| (usually Ki == Ko). Each output group ko of the current
pixel position can only depend on previous input groups ki
(i.e. ki < ko if strict_order is True or ki <= ko if strict_order is False).
Notes:
- Block RasterScanConv2D is a special case of GroupRasterScanConv2D
where Ki == Ko == 1 (i.e. input_group_size == input_depth and
output_group_size == output_depth).
- For 1x1 convolution, block DepthOrderConv2D is a special case of
GroupRasterScanConv2D where input_group_size == 1 and
output_group_size == 1.
"""
def __init__(self, depth, filter_size, strides, padding,
strict_order=True,
input_group_size=1,
output_group_size=1,
bias=None, act=None, initializer=None, name=None):
super(GroupRasterScanConv2D, self).__init__(
depth, filter_size, strides, padding, bias, act, name=name)
if (filter_size[0] % 2) != 1 or (filter_size[1] % 2) != 1:
raise ValueError('Kernel size should be odd.')
with self._BlockScope():
if initializer is None:
initializer = block_util.RsqrtInitializer(dims=(0, 1, 2))
self._initializer = initializer
self._input_group_size = input_group_size
self._output_group_size = output_group_size
self._strict_order = strict_order
if depth % self._output_group_size != 0:
raise ValueError(
'Invalid depth group size: {} for depth {}'.format(
self._output_group_size, depth))
self._output_group_count = depth // self._output_group_size
def _CreateKernel(self, shape, dtype):
init = self._initializer(shape, dtype)
kernel = self.NewVar(init)
depth_input = shape[2]
if depth_input % self._input_group_size != 0:
raise ValueError(
'Invalid depth group size: {} for depth {}'.format(
self._input_group_size, depth_input))
input_group_count = depth_input // self._input_group_size
output_group_count = self._output_group_count
# Set the mask to 0 for future pixels in raster scan order.
center = shape[:2] // 2
mask = np.ones([shape[0], shape[1],
input_group_count, self._input_group_size,
output_group_count, self._output_group_size],
dtype=dtype.as_numpy_dtype)
mask[center[0] + 1:, :, :, :, :, :] = 0
mask[center[0], center[1] + 1:, :, :, :, :] = 0
# Adjust the mask for the current position (the center position).
depth_output = shape[3]
for d in xrange(output_group_count):
mask[center[0], center[1], d + 1:, :, d:d + 1, :] = 0
if self._strict_order:
mask[center[0], center[1], d, :, d:d + 1, :] = 0
mask = mask.reshape([shape[0], shape[1], depth_input, depth_output])
return tf.convert_to_tensor(mask, dtype) * kernel
class InFillingConv2D(blocks_std.Conv2DBase):
"""Conv2D with kernel having no dependency on the current pixel.
For example, assuming a 5 x 5 kernel, the kernel is applied a spatial mask:
T T T T T
T T T T T
T T x T T
T T T T T
T T T T T
where 'T' marks a pixel which is available when computing the convolution
for pixel 'x'. 'x' itself is not available.
"""
def __init__(self, depth, filter_size, strides, padding,
bias=None, act=None, initializer=None, name=None):
super(InFillingConv2D, self).__init__(
depth, filter_size, strides, padding, bias, act, name=name)
if (filter_size[0] % 2) != 1 or (filter_size[1] % 2) != 1:
raise ValueError('Kernel size should be odd.')
if filter_size[0] == 1 and filter_size[1] == 1:
raise ValueError('Kernel size should be larger than 1x1.')
with self._BlockScope():
if initializer is None:
initializer = block_util.RsqrtInitializer(dims=(0, 1, 2))
self._initializer = initializer
def _CreateKernel(self, shape, dtype):
init = self._initializer(shape, dtype)
kernel = self.NewVar(init)
mask = np.ones(shape[:2], dtype=dtype.as_numpy_dtype)
center = shape[:2] // 2
mask[center[0], center[1]] = 0
mask = mask.reshape(mask.shape + (1, 1))
return tf.convert_to_tensor(mask, dtype) * kernel
|