model: target: sgm.models.diffusion.DiffusionEngine params: scale_factor: 0.18215 disable_first_stage_autocast: True denoiser_config: target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser params: num_idx: 1000 scaling_config: target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling discretization_config: target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization network_config: target: sgm.modules.diffusionmodules.openaimodel.UNetModel params: use_checkpoint: True in_channels: 4 out_channels: 4 model_channels: 320 attention_resolutions: [4, 2, 1] num_res_blocks: 2 channel_mult: [1, 2, 4, 4] num_head_channels: 64 use_linear_in_transformer: True transformer_depth: 1 context_dim: 1024 conditioner_config: target: sgm.modules.GeneralConditioner params: emb_models: - is_trainable: False input_key: txt target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder params: freeze: true layer: penultimate first_stage_config: target: sgm.models.autoencoder.AutoencoderKL params: embed_dim: 4 monitor: val/rec_loss ddconfig: double_z: true z_channels: 4 resolution: 256 in_channels: 3 out_ch: 3 ch: 128 ch_mult: [1, 2, 4, 4] num_res_blocks: 2 attn_resolutions: [] dropout: 0.0 lossconfig: target: torch.nn.Identity