|
{ |
|
"architectures": [ |
|
"STDiT" |
|
], |
|
"auto_map": { |
|
"AutoConfig": "configuration_stdit.STDiTConfig", |
|
"AutoModel": "modeling_stdit.STDiT" |
|
}, |
|
"caption_channels": 4096, |
|
"class_dropout_prob": 0.1, |
|
"depth": 28, |
|
"drop_path": 0.0, |
|
"enable_flash_attn": false, |
|
"enable_layernorm_kernel": false, |
|
"enable_sequence_parallelism": false, |
|
"freeze": null, |
|
"hidden_size": 1152, |
|
"in_channels": 4, |
|
"input_size": [ |
|
16, |
|
64, |
|
64 |
|
], |
|
"mlp_ratio": 4.0, |
|
"model_max_length": 120, |
|
"model_type": "stdit", |
|
"no_temporal_pos_emb": false, |
|
"num_heads": 16, |
|
"patch_size": [ |
|
1, |
|
2, |
|
2 |
|
], |
|
"pred_sigma": true, |
|
"space_scale": 0.5, |
|
"time_scale": 1.0, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.38.2" |
|
} |
|
|