{ "architectures": [ "STDiT" ], "auto_map": { "AutoConfig": "configuration_stdit.STDiTConfig", "AutoModel": "modeling_stdit.STDiT" }, "caption_channels": 4096, "class_dropout_prob": 0.1, "depth": 28, "drop_path": 0.0, "enable_flash_attn": false, "enable_flashattn": false, "enable_layernorm_kernel": false, "enable_sequence_parallelism": false, "freeze": null, "hidden_size": 1152, "in_channels": 4, "input_size": [ 16, 32, 32 ], "mlp_ratio": 4.0, "model_max_length": 120, "model_type": "stdit", "no_temporal_pos_emb": false, "num_heads": 16, "patch_size": [ 1, 2, 2 ], "pred_sigma": true, "space_scale": 0.5, "time_scale": 1.0, "torch_dtype": "float32", "transformers_version": "4.38.2" }