| { |
| "_class_name": "AutoencoderDC", |
| "_diffusers_version": "0.32.0.dev0", |
| "_name_or_path": "mit-han-lab/dc-ae-f32c32-sana-1.0-diffusers", |
| "attention_head_dim": 32, |
| "decoder_act_fns": "silu", |
| "decoder_block_out_channels": [ |
| 128, |
| 256, |
| 512, |
| 512, |
| 1024, |
| 1024 |
| ], |
| "decoder_block_types": [ |
| "ResBlock", |
| "ResBlock", |
| "ResBlock", |
| "EfficientViTBlock", |
| "EfficientViTBlock", |
| "EfficientViTBlock" |
| ], |
| "decoder_layers_per_block": [ |
| 3, |
| 3, |
| 3, |
| 3, |
| 3, |
| 3 |
| ], |
| "decoder_norm_types": "rms_norm", |
| "decoder_qkv_multiscales": [ |
| [], |
| [], |
| [], |
| [ |
| 5 |
| ], |
| [ |
| 5 |
| ], |
| [ |
| 5 |
| ] |
| ], |
| "downsample_block_type": "Conv", |
| "encoder_block_out_channels": [ |
| 128, |
| 256, |
| 512, |
| 512, |
| 1024, |
| 1024 |
| ], |
| "encoder_block_types": [ |
| "ResBlock", |
| "ResBlock", |
| "ResBlock", |
| "EfficientViTBlock", |
| "EfficientViTBlock", |
| "EfficientViTBlock" |
| ], |
| "encoder_layers_per_block": [ |
| 2, |
| 2, |
| 2, |
| 3, |
| 3, |
| 3 |
| ], |
| "encoder_qkv_multiscales": [ |
| [], |
| [], |
| [], |
| [ |
| 5 |
| ], |
| [ |
| 5 |
| ], |
| [ |
| 5 |
| ] |
| ], |
| "in_channels": 3, |
| "latent_channels": 32, |
| "scaling_factor": 0.41407, |
| "upsample_block_type": "interpolate" |
| } |
|
|