Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,612 Bytes
00fc29f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
model:
target: BOOXEL.models.BOOXEL_model.BOOXELModel
params:
ae_dtype: bf16
diffusion_dtype: fp16
scale_factor: 0.13025
disable_first_stage_autocast: True
network_wrapper: sgm.modules.diffusionmodules.wrappers.ControlWrapper
denoiser_config:
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiserWithControl
params:
num_idx: 1000
weighting_config:
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
scaling_config:
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
discretization_config:
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
control_stage_config:
target: BOOXEL.modules.BOOXEL_v0.GLVControl
params:
adm_in_channels: 2816
num_classes: sequential
use_checkpoint: True
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [4, 2]
num_res_blocks: 2
channel_mult: [1, 2, 4]
num_head_channels: 64
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: [1, 2, 10] # 注:第一个未使用(因为 attn_res 从 2 开始)32, 16, 8 --> 64, 32, 16
# transformer_depth: [1, 1, 4]
context_dim: 2048
spatial_transformer_attn_type: softmax-xformers
legacy: False
input_upscale: 1
network_config:
target: BOOXEL.modules.BOOXEL_v0.LightGLVUNet
params:
mode: XL-base
project_type: ZeroSFT
project_channel_scale: 2
adm_in_channels: 2816
num_classes: sequential
use_checkpoint: True
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [4, 2]
num_res_blocks: 2
channel_mult: [1, 2, 4]
num_head_channels: 64
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: [1, 2, 10] # 注:第一个未使用(因为 attn_res 从 2 开始)32, 16, 8 --> 64, 32, 16
context_dim: 2048
spatial_transformer_attn_type: softmax-xformers
legacy: False
conditioner_config:
target: sgm.modules.GeneralConditionerWithControl
params:
emb_models:
# 交叉连接条件
- is_trainable: False
input_key: txt
target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
params:
layer: hidden
layer_idx: 11
# 交叉和矢量条件
- is_trainable: False
input_key: txt
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
params:
arch: ViT-bigG-14
version: laion2b_s39b_b160k
freeze: True
layer: penultimate
always_return_pooled: True
legacy: False
# 向量条件
- is_trainable: False
input_key: original_size_as_tuple
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # 乘以二
# 向量条件
- is_trainable: False
input_key: crop_coords_top_left
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # 乘以二
# 向量条件
- is_trainable: False
input_key: target_size_as_tuple
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # 乘以二
first_stage_config:
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
params:
ckpt_path: ~
embed_dim: 4
monitor: val/rec_loss
ddconfig:
attn_type: vanilla-xformers
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [ 1, 2, 4, 4 ]
num_res_blocks: 2
attn_resolutions: [ ]
dropout: 0.0
lossconfig:
target: torch.nn.Identity
sampler_config:
target: sgm.modules.diffusionmodules.sampling.TiledRestoreEDMSampler
params:
num_steps: 100
restore_cfg: 4.0
s_churn: 0
s_noise: 1.003
tile_size: 128
tile_stride: 64
discretization_config:
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
guider_config:
target: sgm.modules.diffusionmodules.guiders.LinearCFG
params:
scale: 7.5
scale_min: 4.0
p_p:
'电影级,高对比度,高度精细,使用哈苏相机拍摄,超精细照片,逼真的最大细节,32K,调色,超高清,极致的细节,皮肤毛孔细节,超清晰度,完美无变形。'
n_p:
'绘画,油画,插图,绘图,艺术,素描,动漫,卡通,CG 风格,3D 渲染,虚幻引擎,模糊,混色,不清晰,怪异纹理,丑陋,肮脏,凌乱,质量最差,质量低,框架,水印,签名,JPEG 伪影,变形,低分辨率,过度平滑'
SDXL_CKPT: ckpt_sd_xl_base_1.0/sd_xl_base_1.0_0.9vae.safetensors
BOOXEL_CKPT_F: yanranxiaoxi_booxel/BOOXEL-v0.F.ckpt
BOOXEL_CKPT_Q: yanranxiaoxi_booxel/BOOXEL-v0.Q.ckpt
BOOXEL_CKPT: ~
|