File size: 6,108 Bytes
9915c5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# Dataset parameters
# Each dataset should contain 2 folders train and test
# Each video can be represented as:
#   - an image of concatenated frames
#   - '.mp4' or '.gif'
#   - folder with all frames from a specific video
# In case of Taichi. Same (youtube) video can be splitted in many parts (chunks). Each part has a following
# format (id)#other#info.mp4. For example '12335#adsbf.mp4' has an id 12335. In case of TaiChi id stands for youtube
# video id.
dataset_params:
  # Path to data, data can be stored in several formats: .mp4 or .gif videos, stacked .png images or folders with frames.
  root_dir: data/taichi-png
  # Image shape, needed for staked .png format.
  frame_shape: [256, 256, 3]
  # In case of TaiChi single video can be splitted in many chunks, or the maybe several videos for single person.
  # In this case epoch can be a pass over different videos (if id_sampling=True) or over different chunks (if id_sampling=False)
  # If the name of the video '12335#adsbf.mp4' the id is assumed to be 12335
  id_sampling: True
  # List with pairs for animation, None for random pairs
  pairs_list: data/taichi256.csv
  # Augmentation parameters see augmentation.py for all posible augmentations
  augmentation_params:
    flip_param:
      horizontal_flip: True
      time_flip: True
    jitter_param:
      brightness: 0.1
      contrast: 0.1
      saturation: 0.1
      hue: 0.1

# Defines model architecture
model_params:
  common_params:
    # Number of keypoint
    num_kp: 10
    # Number of channels per image
    num_channels: 3
    # Using first or zero order model
    estimate_jacobian: True
  kp_detector_params:
     # Softmax temperature for keypoint heatmaps
     temperature: 0.1
     # Number of features mutliplier
     block_expansion: 32
     # Maximum allowed number of features
     max_features: 1024
     # Number of block in Unet. Can be increased or decreased depending or resolution.
     num_blocks: 5
     # Keypioint is predicted on smaller images for better performance,
     # scale_factor=0.25 means that 256x256 image will be resized to 64x64
     scale_factor: 0.25
  generator_params:
    # Number of features mutliplier
    block_expansion: 64
    # Maximum allowed number of features
    max_features: 512
    # Number of downsampling blocks in Jonson architecture.
    # Can be increased or decreased depending or resolution.
    num_down_blocks: 2
    # Number of ResBlocks  in Jonson architecture.
    num_bottleneck_blocks: 6
    # Use occlusion map or not
    estimate_occlusion_map: True

    dense_motion_params:
      # Number of features mutliplier
      block_expansion: 64
      # Maximum allowed number of features
      max_features: 1024
      # Number of block in Unet. Can be increased or decreased depending or resolution.
      num_blocks: 5
      # Dense motion is predicted on smaller images for better performance,
      # scale_factor=0.25 means that 256x256 image will be resized to 64x64
      scale_factor: 0.25
  discriminator_params:
    # Discriminator can be multiscale, if you want 2 discriminator on original
    # resolution and half of the original, specify scales: [1, 0.5]
    scales: [1]
    # Number of features mutliplier
    block_expansion: 32
    # Maximum allowed number of features
    max_features: 512
    # Number of blocks. Can be increased or decreased depending or resolution.
    num_blocks: 4

# Parameters of training
train_params:
  # Number of training epochs 
  num_epochs: 100
  # For better i/o performance when number of videos is small number of epochs can be multiplied by this number.
  # Thus effectivlly with num_repeats=100 each epoch is 100 times larger. 
  num_repeats: 150
  # Drop learning rate by 10 times after this epochs 
  epoch_milestones: [60, 90]
  # Initial learing rate for all modules
  lr_generator: 2.0e-4
  lr_discriminator: 2.0e-4
  lr_kp_detector: 2.0e-4
  batch_size: 30
  # Scales for perceptual pyramide loss. If scales = [1, 0.5, 0.25, 0.125] and image resolution is 256x256,
  # than the loss will be computer on resolutions 256x256, 128x128, 64x64, 32x32.
  scales: [1, 0.5, 0.25, 0.125]
  # Save checkpoint this frequently. If checkpoint_freq=50, checkpoint will be saved every 50 epochs.
  checkpoint_freq: 50
  # Parameters of transform for equivariance loss
  transform_params:
    # Sigma for affine part
    sigma_affine: 0.05
    # Sigma for deformation part
    sigma_tps: 0.005
    # Number of point in the deformation grid
    points_tps: 5
  loss_weights:
    # Weight for LSGAN loss in generator, 0 for no adversarial loss.
    generator_gan: 0
    # Weight for LSGAN loss in discriminator
    discriminator_gan: 1
    # Weights for feature matching loss, the number should be the same as number of blocks in discriminator.
    feature_matching: [10, 10, 10, 10]
    # Weights for perceptual loss.
    perceptual: [10, 10, 10, 10, 10]
    # Weights for value equivariance.
    equivariance_value: 10
    # Weights for jacobian equivariance.
    equivariance_jacobian: 10

# Parameters of reconstruction
reconstruction_params:
  # Maximum number of videos for reconstruction
  num_videos: 1000
  # Format for visualization, note that results will be also stored in staked .png.
  format: '.mp4'

# Parameters of animation
animate_params:
  # Maximum number of pairs for animation, the pairs will be either taken from pairs_list or random.
  num_pairs: 50
  # Format for visualization, note that results will be also stored in staked .png.
  format: '.mp4'
  # Normalization of diriving keypoints
  normalization_params:
    # Increase or decrease relative movement scale depending on the size of the object
    adapt_movement_scale: False
    # Apply only relative displacement of the keypoint
    use_relative_movement: True
    # Apply only relative change in jacobian
    use_relative_jacobian: True

# Visualization parameters
visualizer_params:
  # Draw keypoints of this size, increase or decrease depending on resolution
  kp_size: 5
  # Draw white border around images
  draw_border: True
  # Color map for keypoints
  colormap: 'gist_rainbow'