|
{ |
|
"_class_name": "SD3Transformer2DModelWithAudioHQ", |
|
"_diffusers_version": "0.30.0.dev0", |
|
"_name_or_path": "/localhome/data/ckpts/shared/stable-diffusion-3.5-medium", |
|
"add_audio": true, |
|
"add_clip": false, |
|
"attention_head_dim": 64, |
|
"audio_input_dim": 8, |
|
"caption_projection_dim": 1536, |
|
"decoder_config": "", |
|
"drop_audio": false, |
|
"drop_image": false, |
|
"drop_text": false, |
|
"dual_attention_layers": [ |
|
0, |
|
1, |
|
2, |
|
3, |
|
4, |
|
5, |
|
6, |
|
7, |
|
8, |
|
9, |
|
10, |
|
11, |
|
12 |
|
], |
|
"image_bind": false, |
|
"in_channels": 16, |
|
"joint_attention_dim": 4096, |
|
"num_attention_heads": 24, |
|
"num_layers": 24, |
|
"out_channels": 16, |
|
"patch_size": 2, |
|
"pooled_projection_dim": 2048, |
|
"pos_embed_max_size": 384, |
|
"qk_norm": "rms_norm", |
|
"sample_size": 128, |
|
"use_audio_mae": false |
|
} |
|
|