{ "_class_name": "StableAudioDiTModel", "_diffusers_version": "0.30.0.dev0", "attention_head_dim": 64, "cross_attention_dim": 768, "cross_attention_input_dim": 768, "global_states_input_dim": 1536, "in_channels": 64, "num_attention_heads": 24, "num_key_value_attention_heads": 12, "num_layers": 24, "out_channels": 64, "sample_size": 1024.0, "time_proj_dim": 256 }