transformer_additional_kwargs: transformer_type: "Transformer3DModel" patch_3d: false fake_3d: false basic_block_type: "motionmodule" time_position_encoding_before_transformer: false motion_module_type: "Vanilla" enable_uvit: true motion_module_kwargs: num_attention_heads: 8 num_transformer_block: 1 attention_block_types: [ "Temporal_Self", "Temporal_Self" ] temporal_position_encoding: true temporal_position_encoding_max_len: 4096 temporal_attention_dim_div: 1 block_size: 1 vae_kwargs: vae_type: "AutoencoderKLMagvit" mini_batch_encoder: 9 mini_batch_decoder: 3 slice_mag_vae: true slice_compression_vae: false cache_compression_vae: false cache_mag_vae: false text_encoder_kwargs: enable_multi_text_encoder: false