model: base_learning_rate: 1.0e-6 target: refnet.models.animator.Animator params: linear_start: 0.00085 linear_end: 0.0120 num_timesteps_cond: 1 log_every_t: 200 timesteps: 1000 first_stage_key: image cond_stage_key: reference control_key: control image_size: 64 channels: 4 cond_stage_trainable: false conditioning_key: inject monitor: val/loss_simple_ema scale_factor: 0.18215 use_ema: true ucg_rate: 0.1 use_token: true offset_noise_level: 0.05 is_first_stage: False unet_config: target: refnet.modules.unet.DualCondUNet params: image_size: 32 # unused in_channels: 4 c_channels: 3 out_channels: 4 model_channels: 320 attention_resolutions: [ 4, 2, 1 ] num_res_blocks: 2 channel_mult: [ 1, 2, 4, 4 ] num_head_channels: 64 use_spatial_transformer: True transformer_depth: 1 context_dim: 768 use_checkpoint: True legacy: False use_temporal: true frames_t: 16 refnet_config: target: refnet.modules.unet.ReferenceNet params: image_size: 32 # unused in_channels: 4 out_channels: 4 model_channels: 320 attention_resolutions: [ 4, 2, 1 ] num_res_blocks: 2 channel_mult: [ 1, 2, 4, 4 ] num_head_channels: 64 use_spatial_transformer: True transformer_depth: 1 context_dim: 768 use_checkpoint: True legacy: False first_stage_config: target: ldm.models.autoencoder.AutoencoderKL params: embed_dim: 4 monitor: val/rec_loss ddconfig: double_z: true z_channels: 4 resolution: 512 in_channels: 3 out_ch: 3 ch: 128 ch_mult: [1, 2, 4, 4] num_res_blocks: 2 attn_resolutions: [] dropout: 0.0 lossconfig: target: torch.nn.Identity is_first_stage: False cond_stage_config: target: refnet.modules.encoders.FrozenOpenCLIPImageEmbedder params: arch: ViT-L-14 output_tokens: true is_first_stage: False dataloader: class: AnimateLoader params: transform_list: flip: true rotate: false resize: true jitter: False rotate_range: 45 refset_key: reference # only use deformation training in ColorizeDiffusion v2 load_size: 576 crop_size: 512 # crop images to (crop_size, crop_size), randomly crop images when crop_size < load_size keep_ratio: false inverse_grayscale: true is_first_stage: False shuffle: true