NCTCMumbai's picture
Upload 2571 files
0b8359d
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Define some typical masked 2D convolutions."""
import numpy as np
from six.moves import xrange
import tensorflow as tf
import block_util
import blocks_std
# pylint does not recognize block_base.BlockBase.__call__().
# pylint: disable=not-callable
class RasterScanConv2D(blocks_std.Conv2DBase):
"""Conv2D with no dependency on future pixels (in raster scan order).
For example, assuming a 5 x 5 kernel, the kernel is applied a spatial mask:
T T T T T
T T T T T
T T x F F
F F F F F
F F F F F
where 'T' are pixels which are available when computing the convolution
for pixel 'x'. All the pixels marked with 'F' are not available.
'x' itself is not available if strict_order is True, otherwise, it is
available.
"""
def __init__(self, depth, filter_size, strides, padding,
strict_order=True,
bias=None, act=None, initializer=None, name=None):
super(RasterScanConv2D, self).__init__(
depth, filter_size, strides, padding, bias, act, name=name)
if (filter_size[0] % 2) != 1 or (filter_size[1] % 2) != 1:
raise ValueError('Kernel size should be odd.')
with self._BlockScope():
if initializer is None:
initializer = block_util.RsqrtInitializer(dims=(0, 1, 2))
self._initializer = initializer
self._strict_order = strict_order
def _CreateKernel(self, shape, dtype):
init = self._initializer(shape, dtype)
kernel = self.NewVar(init)
mask = np.ones(shape[:2], dtype=dtype.as_numpy_dtype)
center = shape[:2] // 2
mask[center[0] + 1:, :] = 0
if not self._strict_order:
mask[center[0], center[1] + 1:] = 0
else:
mask[center[0], center[1]:] = 0
mask = mask.reshape(mask.shape + (1, 1))
return tf.convert_to_tensor(mask, dtype) * kernel
class DepthOrderConv2D(blocks_std.Conv2DBase):
"""Conv2D with no dependency on higher depth dimensions.
More precisely, the output depth #n has only dependencies on input depths #k
for k < n (if strict_order is True) or for k <= n (if strict_order is False).
"""
def __init__(self, depth, filter_size, strides, padding,
strict_order=True,
bias=None, act=None, initializer=None, name=None):
super(DepthOrderConv2D, self).__init__(
depth, filter_size, strides, padding, bias, act, name=name)
with self._BlockScope():
if initializer is None:
initializer = block_util.RsqrtInitializer(dims=(0, 1, 2))
self._initializer = initializer
self._strict_order = strict_order
def _CreateKernel(self, shape, dtype):
init = self._initializer(shape, dtype)
kernel = self.NewVar(init)
mask = np.ones(shape[2:], dtype=dtype.as_numpy_dtype)
depth_output = shape[3]
for d in xrange(depth_output):
if self._strict_order:
mask[d:, d] = 0
else:
mask[d + 1:, d] = 0
mask = mask.reshape((1, 1) + mask.shape)
return tf.convert_to_tensor(mask, dtype) * kernel
class GroupRasterScanConv2D(blocks_std.Conv2DBase):
"""Conv2D with no dependency on future pixels (in raster scan order).
This version only introduces dependencies on previous pixels in raster scan
order. It can also introduce some dependencies on previous depth positions
of the current pixel (current pixel = center pixel of the kernel) in the
following way:
the depth dimension of the input is split into Ki groups of size
|input_group_size|, the output dimension is split into Ko groups of size
|output_group_size| (usually Ki == Ko). Each output group ko of the current
pixel position can only depend on previous input groups ki
(i.e. ki < ko if strict_order is True or ki <= ko if strict_order is False).
Notes:
- Block RasterScanConv2D is a special case of GroupRasterScanConv2D
where Ki == Ko == 1 (i.e. input_group_size == input_depth and
output_group_size == output_depth).
- For 1x1 convolution, block DepthOrderConv2D is a special case of
GroupRasterScanConv2D where input_group_size == 1 and
output_group_size == 1.
"""
def __init__(self, depth, filter_size, strides, padding,
strict_order=True,
input_group_size=1,
output_group_size=1,
bias=None, act=None, initializer=None, name=None):
super(GroupRasterScanConv2D, self).__init__(
depth, filter_size, strides, padding, bias, act, name=name)
if (filter_size[0] % 2) != 1 or (filter_size[1] % 2) != 1:
raise ValueError('Kernel size should be odd.')
with self._BlockScope():
if initializer is None:
initializer = block_util.RsqrtInitializer(dims=(0, 1, 2))
self._initializer = initializer
self._input_group_size = input_group_size
self._output_group_size = output_group_size
self._strict_order = strict_order
if depth % self._output_group_size != 0:
raise ValueError(
'Invalid depth group size: {} for depth {}'.format(
self._output_group_size, depth))
self._output_group_count = depth // self._output_group_size
def _CreateKernel(self, shape, dtype):
init = self._initializer(shape, dtype)
kernel = self.NewVar(init)
depth_input = shape[2]
if depth_input % self._input_group_size != 0:
raise ValueError(
'Invalid depth group size: {} for depth {}'.format(
self._input_group_size, depth_input))
input_group_count = depth_input // self._input_group_size
output_group_count = self._output_group_count
# Set the mask to 0 for future pixels in raster scan order.
center = shape[:2] // 2
mask = np.ones([shape[0], shape[1],
input_group_count, self._input_group_size,
output_group_count, self._output_group_size],
dtype=dtype.as_numpy_dtype)
mask[center[0] + 1:, :, :, :, :, :] = 0
mask[center[0], center[1] + 1:, :, :, :, :] = 0
# Adjust the mask for the current position (the center position).
depth_output = shape[3]
for d in xrange(output_group_count):
mask[center[0], center[1], d + 1:, :, d:d + 1, :] = 0
if self._strict_order:
mask[center[0], center[1], d, :, d:d + 1, :] = 0
mask = mask.reshape([shape[0], shape[1], depth_input, depth_output])
return tf.convert_to_tensor(mask, dtype) * kernel
class InFillingConv2D(blocks_std.Conv2DBase):
"""Conv2D with kernel having no dependency on the current pixel.
For example, assuming a 5 x 5 kernel, the kernel is applied a spatial mask:
T T T T T
T T T T T
T T x T T
T T T T T
T T T T T
where 'T' marks a pixel which is available when computing the convolution
for pixel 'x'. 'x' itself is not available.
"""
def __init__(self, depth, filter_size, strides, padding,
bias=None, act=None, initializer=None, name=None):
super(InFillingConv2D, self).__init__(
depth, filter_size, strides, padding, bias, act, name=name)
if (filter_size[0] % 2) != 1 or (filter_size[1] % 2) != 1:
raise ValueError('Kernel size should be odd.')
if filter_size[0] == 1 and filter_size[1] == 1:
raise ValueError('Kernel size should be larger than 1x1.')
with self._BlockScope():
if initializer is None:
initializer = block_util.RsqrtInitializer(dims=(0, 1, 2))
self._initializer = initializer
def _CreateKernel(self, shape, dtype):
init = self._initializer(shape, dtype)
kernel = self.NewVar(init)
mask = np.ones(shape[:2], dtype=dtype.as_numpy_dtype)
center = shape[:2] // 2
mask[center[0], center[1]] = 0
mask = mask.reshape(mask.shape + (1, 1))
return tf.convert_to_tensor(mask, dtype) * kernel