alibaba-pai
/

EasyAnimateV2-XL-2-768x768

@@ -58,11 +58,11 @@ cd ../../
 # Model zoo
 EasyAnimateV2:
-| Name | Type | Storage Space | Url | Hugging Face | Description |
-|--|--|--|--|--|--|
-| EasyAnimateV2-XL-2-512x512.tar | EasyAnimateV2 | 16.2GB | [Download](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/Diffusion_Transformer/EasyAnimateV2-XL-2-512x512.tar) | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV2-XL-2-512x512) | EasyAnimateV2 official weights for 512x512 resolution. Training with 144 frames and fps 24 |
-| EasyAnimateV2-XL-2-768x768.tar | EasyAnimateV2 | 16.2GB | [Download](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/Diffusion_Transformer/EasyAnimateV2-XL-2-768x768.tar) | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV2-XL-2-768x768) | EasyAnimateV2 official weights for 768x768 resolution. Training with 144 frames and fps 24 |
-| easyanimatev2_minimalism_lora.safetensors | Lora of Pixart | 485.1MB | [Download](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/Personalized_Model/easyanimatev2_minimalism_lora.safetensors) | - | A lora training with a specifial type images. Images can be downloaded from [Url](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/webui/Minimalism.zip). |
 # Algorithm Detailed

 # Model zoo
 EasyAnimateV2:
+| Name | Type | Storage Space | Url | Hugging Face | Model Scope | Description |
+|--|--|--|--|--|--|--|
+| EasyAnimateV2-XL-2-512x512.tar | EasyAnimateV2 | 16.2GB | - | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV2-XL-2-512x512)| [😄Link](https://modelscope.cn/models/PAI/EasyAnimateV2-XL-2-512x512)| EasyAnimateV2 official weights for 512x512 resolution. Training with 144 frames and fps 24 |
+| EasyAnimateV2-XL-2-768x768.tar | EasyAnimateV2 | 16.2GB | - | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV2-XL-2-768x768) | [😄Link](https://modelscope.cn/models/PAI/EasyAnimateV2-XL-2-768x768)| EasyAnimateV2 official weights for 768x768 resolution. Training with 144 frames and fps 24 |
+| easyanimatev2_minimalism_lora.safetensors | Lora of Pixart | 485.1MB | [Download](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/Personalized_Model/easyanimatev2_minimalism_lora.safetensors)| - | - | A lora training with a specifial type images. Images can be downloaded from [Url](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v2/Minimalism.zip). |
 # Algorithm Detailed

transformer/config.json CHANGED Viewed

@@ -1,16 +1,22 @@
 {
   "_class_name": "Transformer3DModel",
-  "_diffusers_version": "0.27.0",
   "activation_fn": "gelu-approximate",
   "attention_bias": true,
   "attention_head_dim": 72,
   "attention_type": "default",
   "basic_block_type": "motionmodule",
   "caption_channels": 4096,
   "cross_attention_dim": 1152,
   "decay": 0.9999,
   "double_self_attention": false,
   "dropout": 0.0,
   "enable_uvit": true,
   "fake_3d": false,
   "in_channels": 4,
@@ -28,6 +34,8 @@
     "temporal_position_encoding": true,
     "temporal_position_encoding_max_len": 4096
   },
   "motion_module_type": "Vanilla",
   "norm_elementwise_affine": false,
   "norm_eps": 1e-06,
@@ -43,6 +51,8 @@
   "patch_3d": false,
   "patch_size": 2,
   "power": 0.6666666666666666,
   "sample_size": 64,
   "time_patch_size": null,
   "time_position_encoding_before_transformer": false,

 {
   "_class_name": "Transformer3DModel",
+  "_diffusers_version": "0.30.1",
   "activation_fn": "gelu-approximate",
+  "add_noise_in_inpaint_model": false,
+  "after_norm": false,
   "attention_bias": true,
   "attention_head_dim": 72,
   "attention_type": "default",
   "basic_block_type": "motionmodule",
   "caption_channels": 4096,
+  "casual_3d": false,
+  "casual_3d_upsampler_index": null,
   "cross_attention_dim": 1152,
   "decay": 0.9999,
   "double_self_attention": false,
   "dropout": 0.0,
+  "enable_clip_in_inpaint": true,
+  "enable_text_attention_mask": true,
   "enable_uvit": true,
   "fake_3d": false,
   "in_channels": 4,
     "temporal_position_encoding": true,
     "temporal_position_encoding_max_len": 4096
   },
+  "motion_module_kwargs_even": null,
+  "motion_module_kwargs_odd": null,
   "motion_module_type": "Vanilla",
   "norm_elementwise_affine": false,
   "norm_eps": 1e-06,
   "patch_3d": false,
   "patch_size": 2,
   "power": 0.6666666666666666,
+  "qk_norm": false,
+  "resize_inpaint_mask_directly": false,
   "sample_size": 64,
   "time_patch_size": null,
   "time_position_encoding_before_transformer": false,

vae/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-  "_class_name": "AutoencoderKL",
-  "_diffusers_version": "0.22.0.dev0",
   "act_fn": "silu",
   "block_out_channels": [
     128,
@@ -8,9 +8,18 @@
     512,
     512
   ],
   "down_block_types": [
-    "SpatialDownBlock3D",
-    "SpatialTemporalDownBlock3D",
     "SpatialTemporalDownBlock3D",
     "SpatialTemporalDownBlock3D"
   ],
@@ -18,18 +27,31 @@
   "in_channels": 3,
   "latent_channels": 4,
   "layers_per_block": 2,
   "norm_num_groups": 32,
   "out_channels": 3,
   "sample_size": 256,
   "scaling_factor": 0.18215,
   "slice_compression_vae": false,
-  "mid_block_attention_type": "3d",
-  "mini_batch_encoder": 9,
-  "mini_batch_decoder": 3,
   "up_block_types": [
-    "SpatialUpBlock3D",
-    "SpatialTemporalUpBlock3D",
     "SpatialTemporalUpBlock3D",
     "SpatialTemporalUpBlock3D"
-  ]
 }

 {
+  "_class_name": "AutoencoderKLMagvit",
+  "_diffusers_version": "0.30.1",
   "act_fn": "silu",
   "block_out_channels": [
     128,
     512,
     512
   ],
+  "cache_compression_vae": false,
+  "cache_mag_vae": false,
+  "ch": 128,
+  "ch_mult": [
+    1,
+    2,
+    4,
+    4
+  ],
   "down_block_types": [
+    "SpatialDownBlock3D",
+    "SpatialTemporalDownBlock3D",
     "SpatialTemporalDownBlock3D",
     "SpatialTemporalDownBlock3D"
   ],
   "in_channels": 3,
   "latent_channels": 4,
   "layers_per_block": 2,
+  "mid_block_attention_type": "3d",
+  "mid_block_num_attention_heads": 1,
+  "mid_block_type": "MidBlock3D",
+  "mid_block_use_attention": true,
+  "mini_batch_decoder": 3,
+  "mini_batch_encoder": 9,
   "norm_num_groups": 32,
+  "num_attention_heads": 1,
   "out_channels": 3,
   "sample_size": 256,
   "scaling_factor": 0.18215,
   "slice_compression_vae": false,
+  "slice_mag_vae": true,
+  "spatial_group_norm": false,
+  "tile_overlap_factor": 0.25,
+  "tile_sample_min_size": 384,
   "up_block_types": [
+    "SpatialUpBlock3D",
+    "SpatialTemporalUpBlock3D",
     "SpatialTemporalUpBlock3D",
     "SpatialTemporalUpBlock3D"
+  ],
+  "upcast_vae": false,
+  "use_gc_blocks": null,
+  "use_tiling": false,
+  "use_tiling_decoder": false,
+  "use_tiling_encoder": false
 }