# Use this file to finetune from a pretrained 256x256 model | |
dataset_params: | |
root_dir: ./video-preprocessing/vox2-768 | |
frame_shape: 768,768,3 | |
id_sampling: True | |
augmentation_params: | |
flip_param: | |
horizontal_flip: True | |
time_flip: True | |
jitter_param: | |
brightness: 0.1 | |
contrast: 0.1 | |
saturation: 0.1 | |
hue: 0.1 | |
model_params: | |
common_params: | |
num_tps: 10 | |
num_channels: 3 | |
bg: True | |
multi_mask: True | |
generator_params: | |
block_expansion: 64 | |
max_features: 512 | |
num_down_blocks: 3 | |
dense_motion_params: | |
block_expansion: 64 | |
max_features: 1024 | |
num_blocks: 5 | |
scale_factor: 0.25 | |
avd_network_params: | |
id_bottle_size: 128 | |
pose_bottle_size: 128 | |
train_params: | |
visualize_model: False | |
num_epochs: 80 | |
num_repeats: 10 | |
# Higher LR seems to bring problems when finetuning | |
lr_generator: 3.0e-5 | |
batch_size: 2 | |
scales: [1, 0.5, 0.25, 0.125, 0.0625] | |
dataloader_workers: 8 | |
checkpoint_freq: 2 | |
dropout_epoch: 0 | |
dropout_maxp: 0.3 | |
dropout_startp: 0.1 | |
dropout_inc_epoch: 10 | |
bg_start: 81 | |
freeze_kp_detector: True | |
freeze_bg_predictor: True | |
transform_params: | |
sigma_affine: 0.05 | |
sigma_tps: 0.005 | |
points_tps: 5 | |
loss_weights: | |
perceptual: [10, 10, 10, 10, 10] | |
equivariance_value: 10 | |
warp_loss: 10 | |
bg: 10 | |
optimizer: 'adamw' | |
optimizer_params: | |
betas: [ 0.9, 0.999 ] | |
weight_decay: 0.1 | |
train_avd_params: | |
num_epochs: 200 | |
num_repeats: 1 | |
batch_size: 1 | |
dataloader_workers: 6 | |
checkpoint_freq: 1 | |
epoch_milestones: [140, 180] | |
lr: 1.0e-3 | |
lambda_shift: 1 | |
random_scale: 0.25 | |
visualizer_params: | |
kp_size: 5 | |
draw_border: True | |
colormap: 'gist_rainbow' | |