DiffIR2VR / configs /inference /my_cldm.yaml
jimmycv07's picture
first commit
1de8821
raw
history blame
1.91 kB
target: model.ControlLDM
params:
latent_warp_cfg:
latent_control: True
# interval: 5
# x0_strength: 1
warp_period: [0, 0.1]
merge_period: [0, 0]
cross_period: [0, 0]
mask_period: [0, 0]
ada_period: [0, 0]
VidToMe_cfg:
flow_merge: False
ToMe_period: [0, 1]
merge_ratio: [0.9, 0]
merge_global: False
global_merge_ratio: 0.3
seed: 123
batch_size: 1
align_batch: False
global_rand: 0.1
latent_scale_factor: 0.18215
unet_cfg:
use_checkpoint: True
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False
vae_cfg:
embed_dim: 4
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
clip_cfg:
embed_dim: 1024
vision_cfg:
image_size: 224
layers: 32
width: 1280
head_width: 80
patch_size: 14
text_cfg:
context_length: 77
vocab_size: 49408
width: 1024
heads: 16
layers: 24
layer: "penultimate"
controlnet_cfg:
use_checkpoint: True
image_size: 32 # unused
in_channels: 4
hint_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False