File size: 1,905 Bytes
1de8821
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89


target: model.ControlLDM
params:
  latent_warp_cfg:
    latent_control: True
    # interval: 5
    # x0_strength: 1
    warp_period: [0, 0.1]
    merge_period: [0, 0]
    cross_period: [0, 0]
    mask_period: [0, 0]
    ada_period: [0, 0]

  VidToMe_cfg:
      flow_merge: False
      ToMe_period: [0, 1]
      merge_ratio: [0.9, 0]
      merge_global: False
      global_merge_ratio: 0.3
      seed: 123
      batch_size: 1
      align_batch: False
      global_rand: 0.1

  latent_scale_factor: 0.18215
  unet_cfg:
    use_checkpoint: True
    image_size: 32 # unused
    in_channels: 4
    out_channels: 4
    model_channels: 320
    attention_resolutions: [ 4, 2, 1 ]
    num_res_blocks: 2
    channel_mult: [ 1, 2, 4, 4 ]
    num_head_channels: 64 # need to fix for flash-attn
    use_spatial_transformer: True
    use_linear_in_transformer: True
    transformer_depth: 1
    context_dim: 1024
    legacy: False
  vae_cfg:
    embed_dim: 4
    ddconfig:
      double_z: true
      z_channels: 4
      resolution: 256
      in_channels: 3
      out_ch: 3
      ch: 128
      ch_mult:
      - 1
      - 2
      - 4
      - 4
      num_res_blocks: 2
      attn_resolutions: []
      dropout: 0.0
  clip_cfg:
    embed_dim: 1024
    vision_cfg:
      image_size: 224
      layers: 32
      width: 1280
      head_width: 80
      patch_size: 14
    text_cfg:
      context_length: 77
      vocab_size: 49408
      width: 1024
      heads: 16
      layers: 24
    layer: "penultimate"
  controlnet_cfg:
    use_checkpoint: True
    image_size: 32 # unused
    in_channels: 4
    hint_channels: 4
    model_channels: 320
    attention_resolutions: [ 4, 2, 1 ]
    num_res_blocks: 2
    channel_mult: [ 1, 2, 4, 4 ]
    num_head_channels: 64 # need to fix for flash-attn
    use_spatial_transformer: True
    use_linear_in_transformer: True
    transformer_depth: 1
    context_dim: 1024
    legacy: False