{ | |
"_class_name": "AutoencoderDC", | |
"_diffusers_version": "0.32.0.dev0", | |
"_name_or_path": "mit-han-lab/dc-ae-f32c32-sana-1.0-diffusers", | |
"attention_head_dim": 32, | |
"decoder_act_fns": "silu", | |
"decoder_block_out_channels": [ | |
128, | |
256, | |
512, | |
512, | |
1024, | |
1024 | |
], | |
"decoder_block_types": [ | |
"ResBlock", | |
"ResBlock", | |
"ResBlock", | |
"EfficientViTBlock", | |
"EfficientViTBlock", | |
"EfficientViTBlock" | |
], | |
"decoder_layers_per_block": [ | |
3, | |
3, | |
3, | |
3, | |
3, | |
3 | |
], | |
"decoder_norm_types": "rms_norm", | |
"decoder_qkv_multiscales": [ | |
[], | |
[], | |
[], | |
[ | |
5 | |
], | |
[ | |
5 | |
], | |
[ | |
5 | |
] | |
], | |
"downsample_block_type": "Conv", | |
"encoder_block_out_channels": [ | |
128, | |
256, | |
512, | |
512, | |
1024, | |
1024 | |
], | |
"encoder_block_types": [ | |
"ResBlock", | |
"ResBlock", | |
"ResBlock", | |
"EfficientViTBlock", | |
"EfficientViTBlock", | |
"EfficientViTBlock" | |
], | |
"encoder_layers_per_block": [ | |
2, | |
2, | |
2, | |
3, | |
3, | |
3 | |
], | |
"encoder_qkv_multiscales": [ | |
[], | |
[], | |
[], | |
[ | |
5 | |
], | |
[ | |
5 | |
], | |
[ | |
5 | |
] | |
], | |
"in_channels": 3, | |
"latent_channels": 32, | |
"scaling_factor": 0.41407, | |
"upsample_block_type": "interpolate" | |
} | |