Image-Manipulation-Face

Runtime error

File size: 20,607 Bytes

42d4082

# python 3.7
"""Utility functions for image editing from latent space."""

import os.path
import numpy as np

__all__ = [
    'parse_indices', 'interpolate', 'mix_style',
    'get_layerwise_manipulation_strength', 'manipulate', 'parse_boundary_list'
]


def parse_indices(obj, min_val=None, max_val=None):
  """Parses indices.

  If the input is a list or tuple, this function has no effect.

  The input can also be a string, which is either a comma separated list of
  numbers 'a, b, c', or a dash separated range 'a - c'. Space in the string will
  be ignored.

  Args:
    obj: The input object to parse indices from.
    min_val: If not `None`, this function will check that all indices are equal
      to or larger than this value. (default: None)
    max_val: If not `None`, this function will check that all indices are equal
      to or smaller than this field. (default: None)

  Returns:
    A list of integers.

  Raises:
    If the input is invalid, i.e., neither a list or tuple, nor a string.
  """
  if obj is None or obj == '':
    indices = []
  elif isinstance(obj, int):
    indices = [obj]
  elif isinstance(obj, (list, tuple, np.ndarray)):
    indices = list(obj)
  elif isinstance(obj, str):
    indices = []
    splits = obj.replace(' ', '').split(',')
    for split in splits:
      numbers = list(map(int, split.split('-')))
      if len(numbers) == 1:
        indices.append(numbers[0])
      elif len(numbers) == 2:
        indices.extend(list(range(numbers[0], numbers[1] + 1)))
  else:
    raise ValueError(f'Invalid type of input: {type(obj)}!')

  assert isinstance(indices, list)
  indices = sorted(list(set(indices)))
  for idx in indices:
    assert isinstance(idx, int)
    if min_val is not None:
      assert idx >= min_val, f'{idx} is smaller than min val `{min_val}`!'
    if max_val is not None:
      assert idx <= max_val, f'{idx} is larger than max val `{max_val}`!'

  return indices


def interpolate(src_codes, dst_codes, step=5):
  """Interpolates two sets of latent codes linearly.

  Args:
    src_codes: Source codes, with shape [num, *code_shape].
    dst_codes: Target codes, with shape [num, *code_shape].
    step: Number of interplolation steps, with source and target included. For
      example, if `step = 5`, three more samples will be inserted. (default: 5)

  Returns:
    Interpolated codes, with shape [num, step, *code_shape].

  Raises:
    ValueError: If the input two sets of latent codes are with different shapes.
  """
  if not (src_codes.ndim >= 2 and src_codes.shape == dst_codes.shape):
    raise ValueError(f'Shapes of source codes and target codes should both be '
                     f'[num, *code_shape], but {src_codes.shape} and '
                     f'{dst_codes.shape} are received!')
  num = src_codes.shape[0]
  code_shape = src_codes.shape[1:]

  a = src_codes[:, np.newaxis]
  b = dst_codes[:, np.newaxis]
  l = np.linspace(0.0, 1.0, step).reshape(
      [step if axis == 1 else 1 for axis in range(a.ndim)])
  results = a + l * (b - a)
  assert results.shape == (num, step, *code_shape)

  return results


def mix_style(style_codes,
              content_codes,
              num_layers=1,
              mix_layers=None,
              is_style_layerwise=True,
              is_content_layerwise=True):
  """Mixes styles from style codes to those of content codes.

  Each style code or content code consists of `num_layers` codes, each of which
  is typically fed into a particular layer of the generator. This function mixes
  styles by partially replacing the codes of `content_codes` from some certain
  layers with those of `style_codes`.

  For example, if both style code and content code are with shape [10, 512],
  meaning to have 10 layers and each employs a 512-dimensional latent code. And
  the 1st, 2nd, and 3rd layers are the target layers to perform style mixing.
  Then the top half of the content code (with shape [3, 512]) will be replaced
  by the top half of the style code (also with shape [3, 512]).

  NOTE: This function also supports taking single-layer latent codes as inputs,
  i.e., setting `is_style_layerwise` or `is_content_layerwise` as False. In this
  case, the corresponding code will be first repeated for `num_layers` before
  performing style mixing.

  Args:
    style_codes: Style codes, with shape [num_styles, *code_shape] or
      [num_styles, num_layers, *code_shape].
    content_codes: Content codes, with shape [num_contents, *code_shape] or
      [num_contents, num_layers, *code_shape].
    num_layers: Total number of layers in the generative model. (default: 1)
    mix_layers: Indices of the layers to perform style mixing. `None` means to
      replace all layers, in which case the content code will be completely
      replaced by style code. (default: None)
    is_style_layerwise: Indicating whether the input `style_codes` are
      layer-wise codes. (default: True)
    is_content_layerwise: Indicating whether the input `content_codes` are
      layer-wise codes. (default: True)
    num_layers

  Returns:
    Codes after style mixing, with shape [num_styles, num_contents, num_layers,
      *code_shape].

  Raises:
    ValueError: If input `content_codes` or `style_codes` is with invalid shape.
  """
  if not is_style_layerwise:
    style_codes = style_codes[:, np.newaxis]
    style_codes = np.tile(
        style_codes,
        [num_layers if axis == 1 else 1 for axis in range(style_codes.ndim)])
  if not is_content_layerwise:
    content_codes = content_codes[:, np.newaxis]
    content_codes = np.tile(
        content_codes,
        [num_layers if axis == 1 else 1 for axis in range(content_codes.ndim)])

  if not (style_codes.ndim >= 3 and style_codes.shape[1] == num_layers and
          style_codes.shape[1:] == content_codes.shape[1:]):
    raise ValueError(f'Shapes of style codes and content codes should be '
                     f'[num_styles, num_layers, *code_shape] and '
                     f'[num_contents, num_layers, *code_shape] respectively, '
                     f'but {style_codes.shape} and {content_codes.shape} are '
                     f'received!')

  layer_indices = parse_indices(mix_layers, min_val=0, max_val=num_layers - 1)
  if not layer_indices:
    layer_indices = list(range(num_layers))

  num_styles = style_codes.shape[0]
  num_contents = content_codes.shape[0]
  code_shape = content_codes.shape[2:]

  s = style_codes[:, np.newaxis]
  s = np.tile(s, [num_contents if axis == 1 else 1 for axis in range(s.ndim)])
  c = content_codes[np.newaxis]
  c = np.tile(c, [num_styles if axis == 0 else 1 for axis in range(c.ndim)])

  from_style = np.zeros(s.shape, dtype=bool)
  from_style[:, :, layer_indices] = True
  results = np.where(from_style, s, c)
  assert results.shape == (num_styles, num_contents, num_layers, *code_shape)

  return results


def get_layerwise_manipulation_strength(num_layers,
                                        truncation_psi,
                                        truncation_layers):
  """Gets layer-wise strength for manipulation.

  Recall the truncation trick played on layer [0, truncation_layers):

  w = truncation_psi * w + (1 - truncation_psi) * w_avg

  So, when using the same boundary to manipulate different layers, layer
  [0, truncation_layers) and layer [truncation_layers, num_layers) should use
  different strength to eliminate the effect from the truncation trick. More
  concretely, the strength for layer [0, truncation_layers) is set as
  `truncation_psi`, while that for other layers are set as 1.
  """
  strength = [1.0 for _ in range(num_layers)]
  if truncation_layers > 0:
    for layer_idx in range(0, truncation_layers):
      strength[layer_idx] = truncation_psi
  return strength


def manipulate(latent_codes,
               boundary,
               start_distance=-5.0,
               end_distance=5.0,
               step=21,
               layerwise_manipulation=False,
               num_layers=1,
               manipulate_layers=None,
               is_code_layerwise=False,
               is_boundary_layerwise=False,
               layerwise_manipulation_strength=1.0):
  """Manipulates the given latent codes with respect to a particular boundary.

  Basically, this function takes a set of latent codes and a boundary as inputs,
  and outputs a collection of manipulated latent codes.

  For example, let `step` to be 10, `latent_codes` to be with shape [num,
  *code_shape], and `boundary` to be with shape [1, *code_shape] and unit norm.
  Then the output will be with shape [num, 10, *code_shape]. For each 10-element
  manipulated codes, the first code is `start_distance` away from the original
  code (i.e., the input) along the `boundary` direction, while the last code is
  `end_distance` away. Remaining codes are linearly interpolated. Here,
  `distance` is sign sensitive.

  NOTE: This function also supports layer-wise manipulation, in which case the
  generator should be able to take layer-wise latent codes as inputs. For
  example, if the generator has 18 convolutional layers in total, and each of
  which takes an independent latent code as input. It is possible, sometimes
  with even better performance, to only partially manipulate these latent codes
  corresponding to some certain layers yet keeping others untouched.

  NOTE: Boundary is assumed to be normalized to unit norm already.

  Args:
    latent_codes: The input latent codes for manipulation, with shape
      [num, *code_shape] or [num, num_layers, *code_shape].
    boundary: The semantic boundary as reference, with shape [1, *code_shape] or
      [1, num_layers, *code_shape].
    start_distance: Start point for manipulation. (default: -5.0)
    end_distance: End point for manipulation. (default: 5.0)
    step: Number of manipulation steps. (default: 21)
    layerwise_manipulation: Whether to perform layer-wise manipulation.
      (default: False)
    num_layers: Number of layers. Only active when `layerwise_manipulation` is
      set as `True`. Should be a positive integer. (default: 1)
    manipulate_layers: Indices of the layers to perform manipulation. `None`
      means to manipulate latent codes from all layers. (default: None)
    is_code_layerwise: Whether the input latent codes are layer-wise. If set as
      `False`, the function will first repeat the input codes for `num_layers`
      times before perform manipulation. (default: False)
    is_boundary_layerwise: Whether the input boundary is layer-wise. If set as
      `False`, the function will first repeat boundary for `num_layers` times
      before perform manipulation. (default: False)
    layerwise_manipulation_strength: Manipulation strength for each layer. Only
      active when `layerwise_manipulation` is set as `True`. This field can be
      used to resolve the strength discrepancy across layers when truncation
      trick is on. See function `get_layerwise_manipulation_strength()` for
      details. A tuple, list, or `numpy.ndarray` is expected. If set as a single
      number, this strength will be used for all layers. (default: 1.0)

  Returns:
    Manipulated codes, with shape [num, step, *code_shape] if
      `layerwise_manipulation` is set as `False`, or shape [num, step,
      num_layers, *code_shape] if `layerwise_manipulation` is set as `True`.

  Raises:
    ValueError: If the input latent codes, boundary, or strength are with
      invalid shape.
  """
  if not (boundary.ndim >= 2 and boundary.shape[0] == 1):
    raise ValueError(f'Boundary should be with shape [1, *code_shape] or '
                     f'[1, num_layers, *code_shape], but '
                     f'{boundary.shape} is received!')

  if not layerwise_manipulation:
    assert not is_code_layerwise
    assert not is_boundary_layerwise
    num_layers = 1
    manipulate_layers = None
    layerwise_manipulation_strength = 1.0

  # Preprocessing for layer-wise manipulation.
  # Parse indices of manipulation layers.
  layer_indices = parse_indices(
      manipulate_layers, min_val=0, max_val=num_layers - 1)
  if not layer_indices:
    layer_indices = list(range(num_layers))
  # Make latent codes layer-wise if needed.
  assert num_layers > 0
  if not is_code_layerwise:
    x = latent_codes[:, np.newaxis]
    x = np.tile(x, [num_layers if axis == 1 else 1 for axis in range(x.ndim)])
  else:
    x = latent_codes
    if x.shape[1] != num_layers:
      raise ValueError(f'Latent codes should be with shape [num, num_layers, '
                       f'*code_shape], where `num_layers` equals to '
                       f'{num_layers}, but {x.shape} is received!')
  # Make boundary layer-wise if needed.
  if not is_boundary_layerwise:
    b = boundary
    b = np.tile(b, [num_layers if axis == 0 else 1 for axis in range(b.ndim)])
  else:
    b = boundary[0]
    if b.shape[0] != num_layers:
      raise ValueError(f'Boundary should be with shape [num_layers, '
                       f'*code_shape], where `num_layers` equals to '
                       f'{num_layers}, but {b.shape} is received!')
  # Get layer-wise manipulation strength.
  if isinstance(layerwise_manipulation_strength, (int, float)):
    s = [float(layerwise_manipulation_strength) for _ in range(num_layers)]
  elif isinstance(layerwise_manipulation_strength, (list, tuple)):
    s = layerwise_manipulation_strength
    if len(s) != num_layers:
      raise ValueError(f'Shape of layer-wise manipulation strength `{len(s)}` '
                       f'mismatches number of layers `{num_layers}`!')
  elif isinstance(layerwise_manipulation_strength, np.ndarray):
    s = layerwise_manipulation_strength
    if s.size != num_layers:
      raise ValueError(f'Shape of layer-wise manipulation strength `{s.size}` '
                       f'mismatches number of layers `{num_layers}`!')
  else:
    raise ValueError(f'Unsupported type of `layerwise_manipulation_strength`!')
  s = np.array(s).reshape(
      [num_layers if axis == 0 else 1 for axis in range(b.ndim)])
  b = b * s

  if x.shape[1:] != b.shape:
    raise ValueError(f'Latent code shape {x.shape} and boundary shape '
                     f'{b.shape} mismatch!')
  num = x.shape[0]
  code_shape = x.shape[2:]

  x = x[:, np.newaxis]
  b = b[np.newaxis, np.newaxis, :]
  l = np.linspace(start_distance, end_distance, step).reshape(
      [step if axis == 1 else 1 for axis in range(x.ndim)])
  results = np.tile(x, [step if axis == 1 else 1 for axis in range(x.ndim)])
  is_manipulatable = np.zeros(results.shape, dtype=bool)
  is_manipulatable[:, :, layer_indices] = True
  results = np.where(is_manipulatable, x + l * b, results)
  assert results.shape == (num, step, num_layers, *code_shape)

  return results if layerwise_manipulation else results[:, :, 0]


def manipulate2(latent_codes,
               proj,
               mindex,
               start_distance=-5.0,
               end_distance=5.0,
               step=21,
               layerwise_manipulation=False,
               num_layers=1,
               manipulate_layers=None,
               is_code_layerwise=False,
               layerwise_manipulation_strength=1.0):


  if not layerwise_manipulation:
    assert not is_code_layerwise
#    assert not is_boundary_layerwise
    num_layers = 1
    manipulate_layers = None
    layerwise_manipulation_strength = 1.0

  # Preprocessing for layer-wise manipulation.
  # Parse indices of manipulation layers.
  layer_indices = parse_indices(
      manipulate_layers, min_val=0, max_val=num_layers - 1)
  if not layer_indices:
    layer_indices = list(range(num_layers))
  # Make latent codes layer-wise if needed.
  assert num_layers > 0
  if not is_code_layerwise:
    x = latent_codes[:, np.newaxis]
    x = np.tile(x, [num_layers if axis == 1 else 1 for axis in range(x.ndim)])
  else:
    x = latent_codes
    if x.shape[1] != num_layers:
      raise ValueError(f'Latent codes should be with shape [num, num_layers, '
                       f'*code_shape], where `num_layers` equals to '
                       f'{num_layers}, but {x.shape} is received!')
  # Make boundary layer-wise if needed.
#  if not is_boundary_layerwise:
#    b = boundary
#    b = np.tile(b, [num_layers if axis == 0 else 1 for axis in range(b.ndim)])
#  else:
#    b = boundary[0]
#    if b.shape[0] != num_layers:
#      raise ValueError(f'Boundary should be with shape [num_layers, '
#                       f'*code_shape], where `num_layers` equals to '
#                       f'{num_layers}, but {b.shape} is received!')
  # Get layer-wise manipulation strength.
  if isinstance(layerwise_manipulation_strength, (int, float)):
    s = [float(layerwise_manipulation_strength) for _ in range(num_layers)]
  elif isinstance(layerwise_manipulation_strength, (list, tuple)):
    s = layerwise_manipulation_strength
    if len(s) != num_layers:
      raise ValueError(f'Shape of layer-wise manipulation strength `{len(s)}` '
                       f'mismatches number of layers `{num_layers}`!')
  elif isinstance(layerwise_manipulation_strength, np.ndarray):
    s = layerwise_manipulation_strength
    if s.size != num_layers:
      raise ValueError(f'Shape of layer-wise manipulation strength `{s.size}` '
                       f'mismatches number of layers `{num_layers}`!')
  else:
    raise ValueError(f'Unsupported type of `layerwise_manipulation_strength`!')
#  s = np.array(s).reshape(
#      [num_layers if axis == 0 else 1 for axis in range(b.ndim)])
#  b = b * s

#  if x.shape[1:] != b.shape:
#    raise ValueError(f'Latent code shape {x.shape} and boundary shape '
#                     f'{b.shape} mismatch!')
  num = x.shape[0]
  code_shape = x.shape[2:]

  x = x[:, np.newaxis]
#  b = b[np.newaxis, np.newaxis, :]
#  l = np.linspace(start_distance, end_distance, step).reshape(
#      [step if axis == 1 else 1 for axis in range(x.ndim)])
  results = np.tile(x, [step if axis == 1 else 1 for axis in range(x.ndim)])
  is_manipulatable = np.zeros(results.shape, dtype=bool)
  is_manipulatable[:, :, layer_indices] = True
  
  tmp=MPC(proj,x,mindex,start_distance,end_distance,step)
  tmp = tmp[:, :,np.newaxis]
  tmp1 = np.tile(tmp, [num_layers if axis == 2 else 1 for axis in range(tmp.ndim)])
  
  
  results = np.where(is_manipulatable, tmp1, results)
#  print(results.shape)
  assert results.shape == (num, step, num_layers, *code_shape)
  return results if layerwise_manipulation else results[:, :, 0]

def MPC(proj,x,mindex,start_distance,end_distance,step):
    # x shape (batch_size,1,num_layers,feature)
#    print(x.shape)
    x1=proj.transform(x[:,0,0,:]) #/np.sqrt(proj.explained_variance_) # (batch_size,num_pc)
    
    x1 = x1[:, np.newaxis]
    x1 = np.tile(x1, [step if axis == 1 else 1 for axis in range(x1.ndim)])
    
    
    l = np.linspace(start_distance, end_distance, step)[None,:]
    x1[:,:,mindex]+=l
    
    tmp=x1.reshape((-1,x1.shape[-1])) #*np.sqrt(proj.explained_variance_)
#    print('xxx')
    x2=proj.inverse_transform(tmp)
    x2=x2.reshape((x1.shape[0],x1.shape[1],-1))
    
#    x1 = x1[:, np.newaxis]
#    x1 = np.tile(x1, [step if axis == 1 else 1 for axis in range(x1.ndim)])
    
    return x2
    



def parse_boundary_list(boundary_list_path):
  """Parses boundary list.

  Sometimes, a text file containing a list of boundaries will significantly
  simplify image manipulation with a large amount of boundaries. This function
  is used to parse boundary information from such list file.

  Basically, each item in the list should be with format
  `($NAME, $SPACE_TYPE): $PATH`. `DISABLE` at the beginning of the line can
  disable a particular boundary.

  Sample:

  (age, z): $AGE_BOUNDARY_PATH
  (gender, w): $GENDER_BOUNDARY_PATH
  DISABLE(pose, wp): $POSE_BOUNDARY_PATH

  Args:
    boundary_list_path: Path to the boundary list.

  Returns:
    A dictionary, whose key is a two-element tuple (boundary_name, space_type)
      and value is the corresponding boundary path.

  Raise:
    ValueError: If the given boundary list does not exist.
  """
  if not os.path.isfile(boundary_list_path):
    raise ValueError(f'Boundary list `boundary_list_path` does not exist!')

  boundaries = {}
  with open(boundary_list_path, 'r') as f:
    for line in f:
      if line[:len('DISABLE')] == 'DISABLE':
        continue
      boundary_info, boundary_path = line.strip().split(':')
      boundary_name, space_type = boundary_info.strip()[1:-1].split(',')
      boundary_name = boundary_name.strip()
      space_type = space_type.strip().lower()
      boundary_path = boundary_path.strip()
      boundaries[(boundary_name, space_type)] = boundary_path
  return boundaries