thin-plate-spline-motion-model / 768 /vox-768-finetune.yaml
root
store config files with checkpoints
d1a59ac
# Use this file to finetune from a pretrained 256x256 model
dataset_params:
root_dir: ./video-preprocessing/vox2-768
frame_shape: 768,768,3
id_sampling: True
augmentation_params:
flip_param:
horizontal_flip: True
time_flip: True
jitter_param:
brightness: 0.1
contrast: 0.1
saturation: 0.1
hue: 0.1
model_params:
common_params:
num_tps: 10
num_channels: 3
bg: True
multi_mask: True
generator_params:
block_expansion: 64
max_features: 512
num_down_blocks: 3
dense_motion_params:
block_expansion: 64
max_features: 1024
num_blocks: 5
scale_factor: 0.25
avd_network_params:
id_bottle_size: 128
pose_bottle_size: 128
train_params:
visualize_model: False
num_epochs: 80
num_repeats: 10
# Higher LR seems to bring problems when finetuning
lr_generator: 3.0e-5
batch_size: 2
scales: [1, 0.5, 0.25, 0.125, 0.0625]
dataloader_workers: 8
checkpoint_freq: 2
dropout_epoch: 0
dropout_maxp: 0.3
dropout_startp: 0.1
dropout_inc_epoch: 10
bg_start: 81
freeze_kp_detector: True
freeze_bg_predictor: True
transform_params:
sigma_affine: 0.05
sigma_tps: 0.005
points_tps: 5
loss_weights:
perceptual: [10, 10, 10, 10, 10]
equivariance_value: 10
warp_loss: 10
bg: 10
optimizer: 'adamw'
optimizer_params:
betas: [ 0.9, 0.999 ]
weight_decay: 0.1
train_avd_params:
num_epochs: 200
num_repeats: 1
batch_size: 1
dataloader_workers: 6
checkpoint_freq: 1
epoch_milestones: [140, 180]
lr: 1.0e-3
lambda_shift: 1
random_scale: 0.25
visualizer_params:
kp_size: 5
draw_border: True
colormap: 'gist_rainbow'