Spaces:

abhishek
/

first-order-motion-model

Build error

App Files Files Community

first-order-motion-model / config /taichi-adv-256.yaml

abhishek HF staff

first commit

9915c5d over 2 years ago

raw

history blame contribute delete

No virus

5.65 kB

	# Dataset parameters
	dataset_params:
	# Path to data, data can be stored in several formats: .mp4 or .gif videos, stacked .png images or folders with frames.
	root_dir: data/taichi-png
	# Image shape, needed for staked .png format.
	frame_shape: [256, 256, 3]
	# In case of TaiChi single video can be splitted in many chunks, or the maybe several videos for single person.
	# In this case epoch can be a pass over different videos (if id_sampling=True) or over different chunks (if id_sampling=False)
	# If the name of the video '12335#adsbf.mp4' the id is assumed to be 12335
	id_sampling: True
	# List with pairs for animation, None for random pairs
	pairs_list: data/taichi256.csv
	# Augmentation parameters see augmentation.py for all posible augmentations
	augmentation_params:
	flip_param:
	horizontal_flip: True
	time_flip: True
	jitter_param:
	brightness: 0.1
	contrast: 0.1
	saturation: 0.1
	hue: 0.1

	# Defines model architecture
	model_params:
	common_params:
	# Number of keypoint
	num_kp: 10
	# Number of channels per image
	num_channels: 3
	# Using first or zero order model
	estimate_jacobian: True
	kp_detector_params:
	# Softmax temperature for keypoint heatmaps
	temperature: 0.1
	# Number of features mutliplier
	block_expansion: 32
	# Maximum allowed number of features
	max_features: 1024
	# Number of block in Unet. Can be increased or decreased depending or resolution.
	num_blocks: 5
	# Keypioint is predicted on smaller images for better performance,
	# scale_factor=0.25 means that 256x256 image will be resized to 64x64
	scale_factor: 0.25
	generator_params:
	# Number of features mutliplier
	block_expansion: 64
	# Maximum allowed number of features
	max_features: 512
	# Number of downsampling blocks in Jonson architecture.
	# Can be increased or decreased depending or resolution.
	num_down_blocks: 2
	# Number of ResBlocks in Jonson architecture.
	num_bottleneck_blocks: 6
	# Use occlusion map or not
	estimate_occlusion_map: True

	dense_motion_params:
	# Number of features mutliplier
	block_expansion: 64
	# Maximum allowed number of features
	max_features: 1024
	# Number of block in Unet. Can be increased or decreased depending or resolution.
	num_blocks: 5
	# Dense motion is predicted on smaller images for better performance,
	# scale_factor=0.25 means that 256x256 image will be resized to 64x64
	scale_factor: 0.25
	discriminator_params:
	# Discriminator can be multiscale, if you want 2 discriminator on original
	# resolution and half of the original, specify scales: [1, 0.5]
	scales: [1]
	# Number of features mutliplier
	block_expansion: 32
	# Maximum allowed number of features
	max_features: 512
	# Number of blocks. Can be increased or decreased depending or resolution.
	num_blocks: 4
	use_kp: True

	# Parameters of training
	train_params:
	# Number of training epochs
	num_epochs: 150
	# For better i/o performance when number of videos is small number of epochs can be multiplied by this number.
	# Thus effectivlly with num_repeats=100 each epoch is 100 times larger.
	num_repeats: 150
	# Drop learning rate by 10 times after this epochs
	epoch_milestones: []
	# Initial learing rate for all modules
	lr_generator: 2.0e-4
	lr_discriminator: 2.0e-4
	lr_kp_detector: 0
	batch_size: 27
	# Scales for perceptual pyramide loss. If scales = [1, 0.5, 0.25, 0.125] and image resolution is 256x256,
	# than the loss will be computer on resolutions 256x256, 128x128, 64x64, 32x32.
	scales: [1, 0.5, 0.25, 0.125]
	# Save checkpoint this frequently. If checkpoint_freq=50, checkpoint will be saved every 50 epochs.
	checkpoint_freq: 50
	# Parameters of transform for equivariance loss
	transform_params:
	# Sigma for affine part
	sigma_affine: 0.05
	# Sigma for deformation part
	sigma_tps: 0.005
	# Number of point in the deformation grid
	points_tps: 5
	loss_weights:
	# Weight for LSGAN loss in generator
	generator_gan: 1
	# Weight for LSGAN loss in discriminator
	discriminator_gan: 1
	# Weights for feature matching loss, the number should be the same as number of blocks in discriminator.
	feature_matching: [10, 10, 10, 10]
	# Weights for perceptual loss.
	perceptual: [10, 10, 10, 10, 10]
	# Weights for value equivariance.
	equivariance_value: 10
	# Weights for jacobian equivariance.
	equivariance_jacobian: 10

	# Parameters of reconstruction
	reconstruction_params:
	# Maximum number of videos for reconstruction
	num_videos: 1000
	# Format for visualization, note that results will be also stored in staked .png.
	format: '.mp4'

	# Parameters of animation
	animate_params:
	# Maximum number of pairs for animation, the pairs will be either taken from pairs_list or random.
	num_pairs: 50
	# Format for visualization, note that results will be also stored in staked .png.
	format: '.mp4'
	# Normalization of diriving keypoints
	normalization_params:
	# Increase or decrease relative movement scale depending on the size of the object
	adapt_movement_scale: False
	# Apply only relative displacement of the keypoint
	use_relative_movement: True
	# Apply only relative change in jacobian
	use_relative_jacobian: True

	# Visualization parameters
	visualizer_params:
	# Draw keypoints of this size, increase or decrease depending on resolution
	kp_size: 5
	# Draw white border around images
	draw_border: True
	# Color map for keypoints
	colormap: 'gist_rainbow'