Spaces:
Running
Running
# Copyright 2017 The TensorFlow Authors All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================== | |
"""Define some typical masked 2D convolutions.""" | |
import numpy as np | |
from six.moves import xrange | |
import tensorflow as tf | |
import block_util | |
import blocks_std | |
# pylint does not recognize block_base.BlockBase.__call__(). | |
# pylint: disable=not-callable | |
class RasterScanConv2D(blocks_std.Conv2DBase): | |
"""Conv2D with no dependency on future pixels (in raster scan order). | |
For example, assuming a 5 x 5 kernel, the kernel is applied a spatial mask: | |
T T T T T | |
T T T T T | |
T T x F F | |
F F F F F | |
F F F F F | |
where 'T' are pixels which are available when computing the convolution | |
for pixel 'x'. All the pixels marked with 'F' are not available. | |
'x' itself is not available if strict_order is True, otherwise, it is | |
available. | |
""" | |
def __init__(self, depth, filter_size, strides, padding, | |
strict_order=True, | |
bias=None, act=None, initializer=None, name=None): | |
super(RasterScanConv2D, self).__init__( | |
depth, filter_size, strides, padding, bias, act, name=name) | |
if (filter_size[0] % 2) != 1 or (filter_size[1] % 2) != 1: | |
raise ValueError('Kernel size should be odd.') | |
with self._BlockScope(): | |
if initializer is None: | |
initializer = block_util.RsqrtInitializer(dims=(0, 1, 2)) | |
self._initializer = initializer | |
self._strict_order = strict_order | |
def _CreateKernel(self, shape, dtype): | |
init = self._initializer(shape, dtype) | |
kernel = self.NewVar(init) | |
mask = np.ones(shape[:2], dtype=dtype.as_numpy_dtype) | |
center = shape[:2] // 2 | |
mask[center[0] + 1:, :] = 0 | |
if not self._strict_order: | |
mask[center[0], center[1] + 1:] = 0 | |
else: | |
mask[center[0], center[1]:] = 0 | |
mask = mask.reshape(mask.shape + (1, 1)) | |
return tf.convert_to_tensor(mask, dtype) * kernel | |
class DepthOrderConv2D(blocks_std.Conv2DBase): | |
"""Conv2D with no dependency on higher depth dimensions. | |
More precisely, the output depth #n has only dependencies on input depths #k | |
for k < n (if strict_order is True) or for k <= n (if strict_order is False). | |
""" | |
def __init__(self, depth, filter_size, strides, padding, | |
strict_order=True, | |
bias=None, act=None, initializer=None, name=None): | |
super(DepthOrderConv2D, self).__init__( | |
depth, filter_size, strides, padding, bias, act, name=name) | |
with self._BlockScope(): | |
if initializer is None: | |
initializer = block_util.RsqrtInitializer(dims=(0, 1, 2)) | |
self._initializer = initializer | |
self._strict_order = strict_order | |
def _CreateKernel(self, shape, dtype): | |
init = self._initializer(shape, dtype) | |
kernel = self.NewVar(init) | |
mask = np.ones(shape[2:], dtype=dtype.as_numpy_dtype) | |
depth_output = shape[3] | |
for d in xrange(depth_output): | |
if self._strict_order: | |
mask[d:, d] = 0 | |
else: | |
mask[d + 1:, d] = 0 | |
mask = mask.reshape((1, 1) + mask.shape) | |
return tf.convert_to_tensor(mask, dtype) * kernel | |
class GroupRasterScanConv2D(blocks_std.Conv2DBase): | |
"""Conv2D with no dependency on future pixels (in raster scan order). | |
This version only introduces dependencies on previous pixels in raster scan | |
order. It can also introduce some dependencies on previous depth positions | |
of the current pixel (current pixel = center pixel of the kernel) in the | |
following way: | |
the depth dimension of the input is split into Ki groups of size | |
|input_group_size|, the output dimension is split into Ko groups of size | |
|output_group_size| (usually Ki == Ko). Each output group ko of the current | |
pixel position can only depend on previous input groups ki | |
(i.e. ki < ko if strict_order is True or ki <= ko if strict_order is False). | |
Notes: | |
- Block RasterScanConv2D is a special case of GroupRasterScanConv2D | |
where Ki == Ko == 1 (i.e. input_group_size == input_depth and | |
output_group_size == output_depth). | |
- For 1x1 convolution, block DepthOrderConv2D is a special case of | |
GroupRasterScanConv2D where input_group_size == 1 and | |
output_group_size == 1. | |
""" | |
def __init__(self, depth, filter_size, strides, padding, | |
strict_order=True, | |
input_group_size=1, | |
output_group_size=1, | |
bias=None, act=None, initializer=None, name=None): | |
super(GroupRasterScanConv2D, self).__init__( | |
depth, filter_size, strides, padding, bias, act, name=name) | |
if (filter_size[0] % 2) != 1 or (filter_size[1] % 2) != 1: | |
raise ValueError('Kernel size should be odd.') | |
with self._BlockScope(): | |
if initializer is None: | |
initializer = block_util.RsqrtInitializer(dims=(0, 1, 2)) | |
self._initializer = initializer | |
self._input_group_size = input_group_size | |
self._output_group_size = output_group_size | |
self._strict_order = strict_order | |
if depth % self._output_group_size != 0: | |
raise ValueError( | |
'Invalid depth group size: {} for depth {}'.format( | |
self._output_group_size, depth)) | |
self._output_group_count = depth // self._output_group_size | |
def _CreateKernel(self, shape, dtype): | |
init = self._initializer(shape, dtype) | |
kernel = self.NewVar(init) | |
depth_input = shape[2] | |
if depth_input % self._input_group_size != 0: | |
raise ValueError( | |
'Invalid depth group size: {} for depth {}'.format( | |
self._input_group_size, depth_input)) | |
input_group_count = depth_input // self._input_group_size | |
output_group_count = self._output_group_count | |
# Set the mask to 0 for future pixels in raster scan order. | |
center = shape[:2] // 2 | |
mask = np.ones([shape[0], shape[1], | |
input_group_count, self._input_group_size, | |
output_group_count, self._output_group_size], | |
dtype=dtype.as_numpy_dtype) | |
mask[center[0] + 1:, :, :, :, :, :] = 0 | |
mask[center[0], center[1] + 1:, :, :, :, :] = 0 | |
# Adjust the mask for the current position (the center position). | |
depth_output = shape[3] | |
for d in xrange(output_group_count): | |
mask[center[0], center[1], d + 1:, :, d:d + 1, :] = 0 | |
if self._strict_order: | |
mask[center[0], center[1], d, :, d:d + 1, :] = 0 | |
mask = mask.reshape([shape[0], shape[1], depth_input, depth_output]) | |
return tf.convert_to_tensor(mask, dtype) * kernel | |
class InFillingConv2D(blocks_std.Conv2DBase): | |
"""Conv2D with kernel having no dependency on the current pixel. | |
For example, assuming a 5 x 5 kernel, the kernel is applied a spatial mask: | |
T T T T T | |
T T T T T | |
T T x T T | |
T T T T T | |
T T T T T | |
where 'T' marks a pixel which is available when computing the convolution | |
for pixel 'x'. 'x' itself is not available. | |
""" | |
def __init__(self, depth, filter_size, strides, padding, | |
bias=None, act=None, initializer=None, name=None): | |
super(InFillingConv2D, self).__init__( | |
depth, filter_size, strides, padding, bias, act, name=name) | |
if (filter_size[0] % 2) != 1 or (filter_size[1] % 2) != 1: | |
raise ValueError('Kernel size should be odd.') | |
if filter_size[0] == 1 and filter_size[1] == 1: | |
raise ValueError('Kernel size should be larger than 1x1.') | |
with self._BlockScope(): | |
if initializer is None: | |
initializer = block_util.RsqrtInitializer(dims=(0, 1, 2)) | |
self._initializer = initializer | |
def _CreateKernel(self, shape, dtype): | |
init = self._initializer(shape, dtype) | |
kernel = self.NewVar(init) | |
mask = np.ones(shape[:2], dtype=dtype.as_numpy_dtype) | |
center = shape[:2] // 2 | |
mask[center[0], center[1]] = 0 | |
mask = mask.reshape(mask.shape + (1, 1)) | |
return tf.convert_to_tensor(mask, dtype) * kernel | |