nyanko7 commited on
Commit
fbacde2
1 Parent(s): 2e2d54e

Create sdxl_base.yaml

Browse files
Files changed (1) hide show
  1. sdxl_base.yaml +86 -0
sdxl_base.yaml ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ target: sgm.models.diffusion.DiffusionEngine
3
+ params:
4
+ scale_factor: 0.13025
5
+ disable_first_stage_autocast: True
6
+
7
+ network_config:
8
+ target: sgm.modules.diffusionmodules.openaimodel.UNetModel
9
+ params:
10
+ adm_in_channels: 2816
11
+ num_classes: sequential
12
+ use_checkpoint: True
13
+ in_channels: 4
14
+ out_channels: 4
15
+ model_channels: 320
16
+ attention_resolutions: [4, 2]
17
+ num_res_blocks: 2
18
+ channel_mult: [1, 2, 4]
19
+ num_head_channels: 64
20
+ use_spatial_transformer: True
21
+ use_linear_in_transformer: True
22
+ transformer_depth: [1, 2, 10] # note: the first is unused (due to attn_res starting at 2) 32, 16, 8 --> 64, 32, 16
23
+ context_dim: 2048
24
+ spatial_transformer_attn_type: softmax
25
+ legacy: False
26
+
27
+ conditioner_config:
28
+ target: sgm.modules.GeneralConditioner
29
+ params:
30
+ emb_models:
31
+ # crossattn cond
32
+ - is_trainable: False
33
+ input_key: prompts
34
+ target: sgm.encoders.FrozenCLIPEmbedder
35
+ params:
36
+ layer: hidden
37
+ layer_idx: 11
38
+ # crossattn and vector cond
39
+ - is_trainable: False
40
+ input_key: prompts
41
+ target: sgm.encoders.FrozenOpenCLIPEmbedder2
42
+ params:
43
+ arch: ViT-bigG-14
44
+ version: laion2b_s39b_b160k
45
+ freeze: True
46
+ layer: penultimate
47
+ always_return_pooled: True
48
+ legacy: False
49
+ # vector cond
50
+ - is_trainable: False
51
+ input_key: original_size_as_tuple
52
+ target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
53
+ params:
54
+ outdim: 256 # multiplied by two
55
+ # vector cond
56
+ - is_trainable: False
57
+ input_key: crop_coords_top_left
58
+ target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
59
+ params:
60
+ outdim: 256 # multiplied by two
61
+ # vector cond
62
+ - is_trainable: False
63
+ input_key: target_size_as_tuple
64
+ target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
65
+ params:
66
+ outdim: 256 # multiplied by two
67
+
68
+ first_stage_config:
69
+ target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
70
+ params:
71
+ embed_dim: 4
72
+ monitor: val/rec_loss
73
+ ddconfig:
74
+ attn_type: vanilla
75
+ double_z: true
76
+ z_channels: 4
77
+ resolution: 256
78
+ in_channels: 3
79
+ out_ch: 3
80
+ ch: 128
81
+ ch_mult: [1, 2, 4, 4]
82
+ num_res_blocks: 2
83
+ attn_resolutions: []
84
+ dropout: 0.0
85
+ lossconfig:
86
+ target: torch.nn.Identity