Spaces:

NCTCMumbai
/

NCTC

Running

File size: 12,592 Bytes

0b8359d

# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Basic blocks for building tensorflow models."""

import numpy as np
import tensorflow as tf

import block_base
import block_util

# pylint does not recognize block_base.BlockBase.__call__().
# pylint: disable=not-callable


def HandleConvPaddingModes(x, padding, kernel_shape, strides):
  """Returns an updated tensor and padding type for REFLECT and SYMMETRIC.

  Args:
    x: A 4D tensor with shape [batch_size, height, width, depth].
    padding: Padding mode (SAME, VALID, REFLECT, or SYMMETRIC).
    kernel_shape: Shape of convolution kernel that will be applied.
    strides: Convolution stride that will be used.

  Returns:
    x and padding after adjustments for REFLECT and SYMMETRIC.
  """
  # For 1x1 convolution, all padding modes are the same.
  if np.all(kernel_shape[:2] == 1):
    return x, 'VALID'

  if padding == 'REFLECT' or padding == 'SYMMETRIC':
    # We manually compute the number of paddings as if 'SAME'.
    # From Tensorflow kernel, the formulas are as follows.
    #   output_shape = ceil(input_shape / strides)
    #   paddings = (output_shape - 1) * strides + filter_size - input_shape
    # Let x, y, s be a shorthand notations for input_shape, output_shape, and
    # strides, respectively. Let (x - 1) = sn + r where 0 <= r < s. Note that
    #   y - 1 = ceil(x / s) - 1 = floor((x - 1) / s) = n
    # provided that x > 0. Therefore
    #   paddings = n * s + filter_size - (sn + r + 1)
    #            = filter_size - r - 1.
    input_shape = x.get_shape()  # shape at graph construction time
    img_shape = tf.shape(x)[1:3]  # image shape (no batch) at run time
    remainder = tf.mod(img_shape - 1, strides[1:3])
    pad_sizes = kernel_shape[:2] - remainder - 1

    pad_rows = pad_sizes[0]
    pad_cols = pad_sizes[1]
    pad = tf.stack([[0, 0], tf.stack([pad_rows // 2, (pad_rows + 1) // 2]),
                    tf.stack([pad_cols // 2, (pad_cols + 1) // 2]), [0, 0]])

    # Manually pad the input and switch the padding mode to 'VALID'.
    x = tf.pad(x, pad, mode=padding)
    x.set_shape([input_shape[0], x.get_shape()[1],
                 x.get_shape()[2], input_shape[3]])
    padding = 'VALID'

  return x, padding


class PassThrough(block_base.BlockBase):
  """A dummy transform block that does nothing."""

  def __init__(self):
    # Pass an empty string to disable name scoping.
    super(PassThrough, self).__init__(name='')

  def _Apply(self, inp):
    return inp

  @property
  def initialized(self):
    """Always returns True."""
    return True


class Bias(object):
  """An initialization helper class for BiasAdd block below."""

  def __init__(self, value=0):
    self.value = value


class BiasAdd(block_base.BlockBase):
  """A tf.nn.bias_add wrapper.

  This wrapper may act as a PassThrough block depending on the initializer
  provided, to make easier optional bias applications in NN blocks, etc.
  See __init__() for the details.
  """

  def __init__(self, initializer=Bias(0), name=None):
    """Initializes Bias block.

    |initializer| parameter have two special cases.

    1. If initializer is None, then this block works as a PassThrough.
    2. If initializer is a Bias class object, then tf.constant_initializer is
       used with the stored value.

    Args:
      initializer: An initializer for the bias variable.
      name: Name of this block.
    """
    super(BiasAdd, self).__init__(name)

    with self._BlockScope():
      if isinstance(initializer, Bias):
        self._initializer = tf.constant_initializer(value=initializer.value)
      else:
        self._initializer = initializer

      self._bias = None

  def _Apply(self, x):
    if not self._bias:
      init = self._initializer([int(x.get_shape()[-1])], x.dtype)
      self._bias = self.NewVar(init)

    return tf.nn.bias_add(x, self._bias)

  def CreateWeightLoss(self):
    return []


class LinearBase(block_base.BlockBase):
  """A matmul wrapper.

  Returns input * W, where matrix W can be customized through derivation.
  """

  def __init__(self, depth, name=None):
    super(LinearBase, self).__init__(name)

    with self._BlockScope():
      self._depth = depth
      self._matrix = None

  def _CreateKernel(self, shape, dtype):
    raise NotImplementedError('This method must be sub-classed.')

  def _Apply(self, x):
    if not self._matrix:
      shape = [int(x.get_shape()[-1]), self._depth]
      self._matrix = self._CreateKernel(shape, x.dtype)

    return tf.matmul(x, self._matrix)


class Linear(LinearBase):
  """A matmul wrapper.

  Returns input * W, where matrix W is learned.
  """

  def __init__(self,
               depth,
               initializer=block_util.RsqrtInitializer(),
               name=None):
    super(Linear, self).__init__(depth, name)

    with self._BlockScope():
      self._initializer = initializer

  def _CreateKernel(self, shape, dtype):
    init = self._initializer(shape, dtype)
    return self.NewVar(init)


class NN(block_base.BlockBase):
  """A neural network layer wrapper.

  Returns act(input * W + b), where matrix W, bias b are learned, and act is an
  optional activation function (i.e., nonlinearity).

  This transform block can handle multiple inputs. If x_1, x_2, ..., x_m are
  the inputs, then returns act(x_1 * W_1 + ... + x_m * W_m + b).

  Attributes:
    nunits: The dimension of the output.
  """

  def __init__(self,
               depth,
               bias=Bias(0),
               act=None,  # e.g., tf.nn.relu
               initializer=block_util.RsqrtInitializer(),
               linear_block_factory=(lambda d, i: Linear(d, initializer=i)),
               name=None):
    """Initializes NN block.

    Args:
      depth: The depth of the output.
      bias: An initializer for the bias, or a Bias class object. If None, there
        will be no bias term for this NN block. See BiasAdd block.
      act: Optional activation function. If None, no activation is applied.
      initializer: The initialization method for the matrix weights.
      linear_block_factory: A function used to create a linear block.
      name: The name of this block.
    """
    super(NN, self).__init__(name)

    with self._BlockScope():
      self._linear_block_factory = linear_block_factory
      self._depth = depth
      self._initializer = initializer
      self._matrices = None

      self._bias = BiasAdd(bias) if bias else PassThrough()
      self._act = act if act else PassThrough()

  def _Apply(self, *args):
    if not self._matrices:
      self._matrices = [
          self._linear_block_factory(self._depth, self._initializer)
          for _ in args]

    if len(self._matrices) != len(args):
      raise ValueError('{} expected {} inputs, but observed {} inputs'.format(
          self.name, len(self._matrices), len(args)))

    if len(args) > 1:
      y = tf.add_n([m(x) for m, x in zip(self._matrices, args)])
    else:
      y = self._matrices[0](args[0])

    return self._act(self._bias(y))


class Conv2DBase(block_base.BlockBase):
  """A tf.nn.conv2d operator."""

  def __init__(self, depth, filter_size, strides, padding,
               bias=None, act=None, atrous_rate=None, conv=tf.nn.conv2d,
               name=None):
    """Initializes a Conv2DBase block.

    Arguments:
      depth: The output depth of the block (i.e. #filters); if negative, the
        output depth will be set to be the same as the input depth.
      filter_size: The size of the 2D filter. If it's specified as an integer,
        it's going to create a square filter. Otherwise, this is a tuple
        specifying the height x width of the filter.
      strides: A tuple specifying the y and x stride.
      padding: One of the valid padding modes allowed by tf.nn.conv2d, or
        'REFLECT'/'SYMMETRIC' for mirror padding.
      bias: An initializer for the bias, or a Bias class object. If None, there
          will be no bias in this block. See BiasAdd block.
      act: Optional activation function applied to the output.
      atrous_rate: optional input rate for ATrous convolution. If not None, this
          will be used and the strides will be ignored.
      conv: The convolution function to use (e.g. tf.nn.conv2d).
      name: The name for this conv2d op.
    """
    super(Conv2DBase, self).__init__(name)

    with self._BlockScope():
      self._act = act if act else PassThrough()
      self._bias = BiasAdd(bias) if bias else PassThrough()

      self._kernel_shape = np.zeros((4,), dtype=np.int32)
      self._kernel_shape[:2] = filter_size
      self._kernel_shape[3] = depth

      self._strides = np.ones((4,), dtype=np.int32)
      self._strides[1:3] = strides
      self._strides = list(self._strides)

      self._padding = padding

      self._kernel = None
      self._conv = conv

      self._atrous_rate = atrous_rate

  def _CreateKernel(self, shape, dtype):
    raise NotImplementedError('This method must be sub-classed')

  def _Apply(self, x):
    """Apply the self._conv op.

    Arguments:
      x: input tensor. It needs to be a 4D tensor of the form
          [batch, height, width, channels].
    Returns:
      The output of the convolution of x with the current convolutional
      kernel.
    Raises:
      ValueError: if number of channels is not defined at graph construction.
    """
    input_shape = x.get_shape().with_rank(4)
    input_shape[3:].assert_is_fully_defined()  # channels must be defined
    if self._kernel is None:
      assert self._kernel_shape[2] == 0, self._kernel_shape
      self._kernel_shape[2] = input_shape[3].value
      if self._kernel_shape[3] < 0:
        # Make output depth be the same as input depth.
        self._kernel_shape[3] = self._kernel_shape[2]
      self._kernel = self._CreateKernel(self._kernel_shape, x.dtype)

    x, padding = HandleConvPaddingModes(
        x, self._padding, self._kernel_shape, self._strides)
    if self._atrous_rate is None:
      x = self._conv(x, self._kernel, strides=self._strides, padding=padding)
    else:
      x = self._conv(x, self._kernel, rate=self._atrous_rate, padding=padding)

    if self._padding != 'VALID':
      # Manually update shape. Known shape information can be lost by tf.pad().
      height = (1 + (input_shape[1].value - 1) // self._strides[1]
                if input_shape[1].value else None)
      width = (1 + (input_shape[2].value - 1) // self._strides[2]
               if input_shape[2].value else None)
      shape = x.get_shape()
      x.set_shape([shape[0], height, width, shape[3]])

    return self._act(self._bias(x))


class Conv2D(Conv2DBase):
  """A tf.nn.conv2d operator."""

  def __init__(self, depth, filter_size, strides, padding,
               bias=None, act=None, initializer=None, name=None):
    """Initializes a Conv2D block.

    Arguments:
      depth: The output depth of the block (i.e., #filters)
      filter_size: The size of the 2D filter. If it's specified as an integer,
        it's going to create a square filter. Otherwise, this is a tuple
        specifying the height x width of the filter.
      strides: A tuple specifying the y and x stride.
      padding: One of the valid padding modes allowed by tf.nn.conv2d, or
        'REFLECT'/'SYMMETRIC' for mirror padding.
      bias: An initializer for the bias, or a Bias class object. If None, there
          will be no bias in this block. See BiasAdd block.
      act: Optional activation function applied to the output.
      initializer: Optional initializer for weights.
      name: The name for this conv2d op.
    """
    super(Conv2D, self).__init__(depth, filter_size, strides, padding, bias,
                                 act, conv=tf.nn.conv2d, name=name)

    with self._BlockScope():
      if initializer is None:
        initializer = block_util.RsqrtInitializer(dims=(0, 1, 2))
      self._initializer = initializer

  def _CreateKernel(self, shape, dtype):
    return self.NewVar(self._initializer(shape, dtype))