File size: 5,612 Bytes
00fc29f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
model:
  target: BOOXEL.models.BOOXEL_model.BOOXELModel
  params:
    ae_dtype: bf16
    diffusion_dtype: fp16
    scale_factor: 0.13025
    disable_first_stage_autocast: True
    network_wrapper: sgm.modules.diffusionmodules.wrappers.ControlWrapper

    denoiser_config:
      target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiserWithControl
      params:
        num_idx: 1000
        weighting_config:
          target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
        scaling_config:
          target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
        discretization_config:
          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization

    control_stage_config:
      target: BOOXEL.modules.BOOXEL_v0.GLVControl
      params:
        adm_in_channels: 2816
        num_classes: sequential
        use_checkpoint: True
        in_channels: 4
        out_channels: 4
        model_channels: 320
        attention_resolutions: [4, 2]
        num_res_blocks: 2
        channel_mult: [1, 2, 4]
        num_head_channels: 64
        use_spatial_transformer: True
        use_linear_in_transformer: True
        transformer_depth: [1, 2, 10]  # 注:第一个未使用(因为 attn_res 从 2 开始)32, 16, 8 --> 64, 32, 16
#        transformer_depth: [1, 1, 4]
        context_dim: 2048
        spatial_transformer_attn_type: softmax-xformers
        legacy: False
        input_upscale: 1

    network_config:
      target: BOOXEL.modules.BOOXEL_v0.LightGLVUNet
      params:
        mode: XL-base
        project_type: ZeroSFT
        project_channel_scale: 2
        adm_in_channels: 2816
        num_classes: sequential
        use_checkpoint: True
        in_channels: 4
        out_channels: 4
        model_channels: 320
        attention_resolutions: [4, 2]
        num_res_blocks: 2
        channel_mult: [1, 2, 4]
        num_head_channels: 64
        use_spatial_transformer: True
        use_linear_in_transformer: True
        transformer_depth: [1, 2, 10]  # 注:第一个未使用(因为 attn_res 从 2 开始)32, 16, 8 --> 64, 32, 16
        context_dim: 2048
        spatial_transformer_attn_type: softmax-xformers
        legacy: False

    conditioner_config:
      target: sgm.modules.GeneralConditionerWithControl
      params:
        emb_models:
          # 交叉连接条件
          - is_trainable: False
            input_key: txt
            target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
            params:
              layer: hidden
              layer_idx: 11
          # 交叉和矢量条件
          - is_trainable: False
            input_key: txt
            target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
            params:
              arch: ViT-bigG-14
              version: laion2b_s39b_b160k
              freeze: True
              layer: penultimate
              always_return_pooled: True
              legacy: False
          # 向量条件
          - is_trainable: False
            input_key: original_size_as_tuple
            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
            params:
              outdim: 256  # 乘以二
          # 向量条件
          - is_trainable: False
            input_key: crop_coords_top_left
            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
            params:
              outdim: 256  # 乘以二
          # 向量条件
          - is_trainable: False
            input_key: target_size_as_tuple
            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
            params:
              outdim: 256  # 乘以二

    first_stage_config:
      target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
      params:
        ckpt_path: ~
        embed_dim: 4
        monitor: val/rec_loss
        ddconfig:
          attn_type: vanilla-xformers
          double_z: true
          z_channels: 4
          resolution: 256
          in_channels: 3
          out_ch: 3
          ch: 128
          ch_mult: [ 1, 2, 4, 4 ]
          num_res_blocks: 2
          attn_resolutions: [ ]
          dropout: 0.0
        lossconfig:
          target: torch.nn.Identity

    sampler_config:
      target: sgm.modules.diffusionmodules.sampling.TiledRestoreEDMSampler
      params:
        num_steps: 100
        restore_cfg: 4.0
        s_churn: 0
        s_noise: 1.003
        tile_size: 128
        tile_stride: 64
        discretization_config:
          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
        guider_config:
          target: sgm.modules.diffusionmodules.guiders.LinearCFG
          params:
            scale: 7.5
            scale_min: 4.0

    p_p:
        '电影级,高对比度,高度精细,使用哈苏相机拍摄,超精细照片,逼真的最大细节,32K,调色,超高清,极致的细节,皮肤毛孔细节,超清晰度,完美无变形。'
    n_p:
        '绘画,油画,插图,绘图,艺术,素描,动漫,卡通,CG 风格,3D 渲染,虚幻引擎,模糊,混色,不清晰,怪异纹理,丑陋,肮脏,凌乱,质量最差,质量低,框架,水印,签名,JPEG 伪影,变形,低分辨率,过度平滑'

SDXL_CKPT: ckpt_sd_xl_base_1.0/sd_xl_base_1.0_0.9vae.safetensors
BOOXEL_CKPT_F: yanranxiaoxi_booxel/BOOXEL-v0.F.ckpt
BOOXEL_CKPT_Q: yanranxiaoxi_booxel/BOOXEL-v0.Q.ckpt
BOOXEL_CKPT: ~