ClashSAN tsukimiya commited on
Commit
503abc4
1 Parent(s): 6a727e7

Create cldm_v21.yaml (#3)

Browse files

- Create cldm_v21.yaml (f391e6020d5503b347d5d8d8a0c4acc272c1cc5a)


Co-authored-by: Tsukimiya <tsukimiya@users.noreply.huggingface.co>

Files changed (1) hide show
  1. cldm_v21.yaml +85 -0
cldm_v21.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ target: cldm.cldm.ControlLDM
3
+ params:
4
+ linear_start: 0.00085
5
+ linear_end: 0.0120
6
+ num_timesteps_cond: 1
7
+ log_every_t: 200
8
+ timesteps: 1000
9
+ first_stage_key: "jpg"
10
+ cond_stage_key: "txt"
11
+ control_key: "hint"
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: false
15
+ conditioning_key: crossattn
16
+ monitor: val/loss_simple_ema
17
+ scale_factor: 0.18215
18
+ use_ema: False
19
+ only_mid_control: False
20
+
21
+ control_stage_config:
22
+ target: cldm.cldm.ControlNet
23
+ params:
24
+ use_checkpoint: True
25
+ image_size: 32 # unused
26
+ in_channels: 4
27
+ hint_channels: 3
28
+ model_channels: 320
29
+ attention_resolutions: [ 4, 2, 1 ]
30
+ num_res_blocks: 2
31
+ channel_mult: [ 1, 2, 4, 4 ]
32
+ num_head_channels: 64 # need to fix for flash-attn
33
+ use_spatial_transformer: True
34
+ use_linear_in_transformer: True
35
+ transformer_depth: 1
36
+ context_dim: 1024
37
+ legacy: False
38
+
39
+ unet_config:
40
+ target: cldm.cldm.ControlledUnetModel
41
+ params:
42
+ use_checkpoint: True
43
+ image_size: 32 # unused
44
+ in_channels: 4
45
+ out_channels: 4
46
+ model_channels: 320
47
+ attention_resolutions: [ 4, 2, 1 ]
48
+ num_res_blocks: 2
49
+ channel_mult: [ 1, 2, 4, 4 ]
50
+ num_head_channels: 64 # need to fix for flash-attn
51
+ use_spatial_transformer: True
52
+ use_linear_in_transformer: True
53
+ transformer_depth: 1
54
+ context_dim: 1024
55
+ legacy: False
56
+
57
+ first_stage_config:
58
+ target: ldm.models.autoencoder.AutoencoderKL
59
+ params:
60
+ embed_dim: 4
61
+ monitor: val/rec_loss
62
+ ddconfig:
63
+ #attn_type: "vanilla-xformers"
64
+ double_z: true
65
+ z_channels: 4
66
+ resolution: 256
67
+ in_channels: 3
68
+ out_ch: 3
69
+ ch: 128
70
+ ch_mult:
71
+ - 1
72
+ - 2
73
+ - 4
74
+ - 4
75
+ num_res_blocks: 2
76
+ attn_resolutions: []
77
+ dropout: 0.0
78
+ lossconfig:
79
+ target: torch.nn.Identity
80
+
81
+ cond_stage_config:
82
+ target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
83
+ params:
84
+ freeze: True
85
+ layer: "penultimate"