fd_fomm / config /taichi-256.yaml
Farazquraishi's picture
Duplicate from abhishek/first-order-motion-model
2218b8f
# Dataset parameters
# Each dataset should contain 2 folders train and test
# Each video can be represented as:
# - an image of concatenated frames
# - '.mp4' or '.gif'
# - folder with all frames from a specific video
# In case of Taichi. Same (youtube) video can be splitted in many parts (chunks). Each part has a following
# format (id)#other#info.mp4. For example '12335#adsbf.mp4' has an id 12335. In case of TaiChi id stands for youtube
# video id.
dataset_params:
# Path to data, data can be stored in several formats: .mp4 or .gif videos, stacked .png images or folders with frames.
root_dir: data/taichi-png
# Image shape, needed for staked .png format.
frame_shape: [256, 256, 3]
# In case of TaiChi single video can be splitted in many chunks, or the maybe several videos for single person.
# In this case epoch can be a pass over different videos (if id_sampling=True) or over different chunks (if id_sampling=False)
# If the name of the video '12335#adsbf.mp4' the id is assumed to be 12335
id_sampling: True
# List with pairs for animation, None for random pairs
pairs_list: data/taichi256.csv
# Augmentation parameters see augmentation.py for all posible augmentations
augmentation_params:
flip_param:
horizontal_flip: True
time_flip: True
jitter_param:
brightness: 0.1
contrast: 0.1
saturation: 0.1
hue: 0.1
# Defines model architecture
model_params:
common_params:
# Number of keypoint
num_kp: 10
# Number of channels per image
num_channels: 3
# Using first or zero order model
estimate_jacobian: True
kp_detector_params:
# Softmax temperature for keypoint heatmaps
temperature: 0.1
# Number of features mutliplier
block_expansion: 32
# Maximum allowed number of features
max_features: 1024
# Number of block in Unet. Can be increased or decreased depending or resolution.
num_blocks: 5
# Keypioint is predicted on smaller images for better performance,
# scale_factor=0.25 means that 256x256 image will be resized to 64x64
scale_factor: 0.25
generator_params:
# Number of features mutliplier
block_expansion: 64
# Maximum allowed number of features
max_features: 512
# Number of downsampling blocks in Jonson architecture.
# Can be increased or decreased depending or resolution.
num_down_blocks: 2
# Number of ResBlocks in Jonson architecture.
num_bottleneck_blocks: 6
# Use occlusion map or not
estimate_occlusion_map: True
dense_motion_params:
# Number of features mutliplier
block_expansion: 64
# Maximum allowed number of features
max_features: 1024
# Number of block in Unet. Can be increased or decreased depending or resolution.
num_blocks: 5
# Dense motion is predicted on smaller images for better performance,
# scale_factor=0.25 means that 256x256 image will be resized to 64x64
scale_factor: 0.25
discriminator_params:
# Discriminator can be multiscale, if you want 2 discriminator on original
# resolution and half of the original, specify scales: [1, 0.5]
scales: [1]
# Number of features mutliplier
block_expansion: 32
# Maximum allowed number of features
max_features: 512
# Number of blocks. Can be increased or decreased depending or resolution.
num_blocks: 4
# Parameters of training
train_params:
# Number of training epochs
num_epochs: 100
# For better i/o performance when number of videos is small number of epochs can be multiplied by this number.
# Thus effectivlly with num_repeats=100 each epoch is 100 times larger.
num_repeats: 150
# Drop learning rate by 10 times after this epochs
epoch_milestones: [60, 90]
# Initial learing rate for all modules
lr_generator: 2.0e-4
lr_discriminator: 2.0e-4
lr_kp_detector: 2.0e-4
batch_size: 30
# Scales for perceptual pyramide loss. If scales = [1, 0.5, 0.25, 0.125] and image resolution is 256x256,
# than the loss will be computer on resolutions 256x256, 128x128, 64x64, 32x32.
scales: [1, 0.5, 0.25, 0.125]
# Save checkpoint this frequently. If checkpoint_freq=50, checkpoint will be saved every 50 epochs.
checkpoint_freq: 50
# Parameters of transform for equivariance loss
transform_params:
# Sigma for affine part
sigma_affine: 0.05
# Sigma for deformation part
sigma_tps: 0.005
# Number of point in the deformation grid
points_tps: 5
loss_weights:
# Weight for LSGAN loss in generator, 0 for no adversarial loss.
generator_gan: 0
# Weight for LSGAN loss in discriminator
discriminator_gan: 1
# Weights for feature matching loss, the number should be the same as number of blocks in discriminator.
feature_matching: [10, 10, 10, 10]
# Weights for perceptual loss.
perceptual: [10, 10, 10, 10, 10]
# Weights for value equivariance.
equivariance_value: 10
# Weights for jacobian equivariance.
equivariance_jacobian: 10
# Parameters of reconstruction
reconstruction_params:
# Maximum number of videos for reconstruction
num_videos: 1000
# Format for visualization, note that results will be also stored in staked .png.
format: '.mp4'
# Parameters of animation
animate_params:
# Maximum number of pairs for animation, the pairs will be either taken from pairs_list or random.
num_pairs: 50
# Format for visualization, note that results will be also stored in staked .png.
format: '.mp4'
# Normalization of diriving keypoints
normalization_params:
# Increase or decrease relative movement scale depending on the size of the object
adapt_movement_scale: False
# Apply only relative displacement of the keypoint
use_relative_movement: True
# Apply only relative change in jacobian
use_relative_jacobian: True
# Visualization parameters
visualizer_params:
# Draw keypoints of this size, increase or decrease depending on resolution
kp_size: 5
# Draw white border around images
draw_border: True
# Color map for keypoints
colormap: 'gist_rainbow'