bubbliiiing commited on
Commit
861b433
1 Parent(s): bbf073a

Update config

Browse files
Files changed (3) hide show
  1. README.md +5 -5
  2. transformer/config.json +11 -1
  3. vae/config.json +32 -10
README.md CHANGED
@@ -58,11 +58,11 @@ cd ../../
58
  # Model zoo
59
 
60
  EasyAnimateV2:
61
- | Name | Type | Storage Space | Url | Hugging Face | Description |
62
- |--|--|--|--|--|--|
63
- | EasyAnimateV2-XL-2-512x512.tar | EasyAnimateV2 | 16.2GB | [Download](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/Diffusion_Transformer/EasyAnimateV2-XL-2-512x512.tar) | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV2-XL-2-512x512) | EasyAnimateV2 official weights for 512x512 resolution. Training with 144 frames and fps 24 |
64
- | EasyAnimateV2-XL-2-768x768.tar | EasyAnimateV2 | 16.2GB | [Download](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/Diffusion_Transformer/EasyAnimateV2-XL-2-768x768.tar) | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV2-XL-2-768x768) | EasyAnimateV2 official weights for 768x768 resolution. Training with 144 frames and fps 24 |
65
- | easyanimatev2_minimalism_lora.safetensors | Lora of Pixart | 485.1MB | [Download](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/Personalized_Model/easyanimatev2_minimalism_lora.safetensors) | - | A lora training with a specifial type images. Images can be downloaded from [Url](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/webui/Minimalism.zip). |
66
 
67
 
68
  # Algorithm Detailed
 
58
  # Model zoo
59
 
60
  EasyAnimateV2:
61
+ | Name | Type | Storage Space | Url | Hugging Face | Model Scope | Description |
62
+ |--|--|--|--|--|--|--|
63
+ | EasyAnimateV2-XL-2-512x512.tar | EasyAnimateV2 | 16.2GB | - | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV2-XL-2-512x512)| [😄Link](https://modelscope.cn/models/PAI/EasyAnimateV2-XL-2-512x512)| EasyAnimateV2 official weights for 512x512 resolution. Training with 144 frames and fps 24 |
64
+ | EasyAnimateV2-XL-2-768x768.tar | EasyAnimateV2 | 16.2GB | - | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV2-XL-2-768x768) | [😄Link](https://modelscope.cn/models/PAI/EasyAnimateV2-XL-2-768x768)| EasyAnimateV2 official weights for 768x768 resolution. Training with 144 frames and fps 24 |
65
+ | easyanimatev2_minimalism_lora.safetensors | Lora of Pixart | 485.1MB | [Download](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/Personalized_Model/easyanimatev2_minimalism_lora.safetensors)| - | - | A lora training with a specifial type images. Images can be downloaded from [Url](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v2/Minimalism.zip). |
66
 
67
 
68
  # Algorithm Detailed
transformer/config.json CHANGED
@@ -1,16 +1,22 @@
1
  {
2
  "_class_name": "Transformer3DModel",
3
- "_diffusers_version": "0.27.0",
4
  "activation_fn": "gelu-approximate",
 
 
5
  "attention_bias": true,
6
  "attention_head_dim": 72,
7
  "attention_type": "default",
8
  "basic_block_type": "motionmodule",
9
  "caption_channels": 4096,
 
 
10
  "cross_attention_dim": 1152,
11
  "decay": 0.9999,
12
  "double_self_attention": false,
13
  "dropout": 0.0,
 
 
14
  "enable_uvit": true,
15
  "fake_3d": false,
16
  "in_channels": 4,
@@ -28,6 +34,8 @@
28
  "temporal_position_encoding": true,
29
  "temporal_position_encoding_max_len": 4096
30
  },
 
 
31
  "motion_module_type": "Vanilla",
32
  "norm_elementwise_affine": false,
33
  "norm_eps": 1e-06,
@@ -43,6 +51,8 @@
43
  "patch_3d": false,
44
  "patch_size": 2,
45
  "power": 0.6666666666666666,
 
 
46
  "sample_size": 64,
47
  "time_patch_size": null,
48
  "time_position_encoding_before_transformer": false,
 
1
  {
2
  "_class_name": "Transformer3DModel",
3
+ "_diffusers_version": "0.30.1",
4
  "activation_fn": "gelu-approximate",
5
+ "add_noise_in_inpaint_model": false,
6
+ "after_norm": false,
7
  "attention_bias": true,
8
  "attention_head_dim": 72,
9
  "attention_type": "default",
10
  "basic_block_type": "motionmodule",
11
  "caption_channels": 4096,
12
+ "casual_3d": false,
13
+ "casual_3d_upsampler_index": null,
14
  "cross_attention_dim": 1152,
15
  "decay": 0.9999,
16
  "double_self_attention": false,
17
  "dropout": 0.0,
18
+ "enable_clip_in_inpaint": true,
19
+ "enable_text_attention_mask": true,
20
  "enable_uvit": true,
21
  "fake_3d": false,
22
  "in_channels": 4,
 
34
  "temporal_position_encoding": true,
35
  "temporal_position_encoding_max_len": 4096
36
  },
37
+ "motion_module_kwargs_even": null,
38
+ "motion_module_kwargs_odd": null,
39
  "motion_module_type": "Vanilla",
40
  "norm_elementwise_affine": false,
41
  "norm_eps": 1e-06,
 
51
  "patch_3d": false,
52
  "patch_size": 2,
53
  "power": 0.6666666666666666,
54
+ "qk_norm": false,
55
+ "resize_inpaint_mask_directly": false,
56
  "sample_size": 64,
57
  "time_patch_size": null,
58
  "time_position_encoding_before_transformer": false,
vae/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "_class_name": "AutoencoderKL",
3
- "_diffusers_version": "0.22.0.dev0",
4
  "act_fn": "silu",
5
  "block_out_channels": [
6
  128,
@@ -8,9 +8,18 @@
8
  512,
9
  512
10
  ],
 
 
 
 
 
 
 
 
 
11
  "down_block_types": [
12
- "SpatialDownBlock3D",
13
- "SpatialTemporalDownBlock3D",
14
  "SpatialTemporalDownBlock3D",
15
  "SpatialTemporalDownBlock3D"
16
  ],
@@ -18,18 +27,31 @@
18
  "in_channels": 3,
19
  "latent_channels": 4,
20
  "layers_per_block": 2,
 
 
 
 
 
 
21
  "norm_num_groups": 32,
 
22
  "out_channels": 3,
23
  "sample_size": 256,
24
  "scaling_factor": 0.18215,
25
  "slice_compression_vae": false,
26
- "mid_block_attention_type": "3d",
27
- "mini_batch_encoder": 9,
28
- "mini_batch_decoder": 3,
 
29
  "up_block_types": [
30
- "SpatialUpBlock3D",
31
- "SpatialTemporalUpBlock3D",
32
  "SpatialTemporalUpBlock3D",
33
  "SpatialTemporalUpBlock3D"
34
- ]
 
 
 
 
 
35
  }
 
1
  {
2
+ "_class_name": "AutoencoderKLMagvit",
3
+ "_diffusers_version": "0.30.1",
4
  "act_fn": "silu",
5
  "block_out_channels": [
6
  128,
 
8
  512,
9
  512
10
  ],
11
+ "cache_compression_vae": false,
12
+ "cache_mag_vae": false,
13
+ "ch": 128,
14
+ "ch_mult": [
15
+ 1,
16
+ 2,
17
+ 4,
18
+ 4
19
+ ],
20
  "down_block_types": [
21
+ "SpatialDownBlock3D",
22
+ "SpatialTemporalDownBlock3D",
23
  "SpatialTemporalDownBlock3D",
24
  "SpatialTemporalDownBlock3D"
25
  ],
 
27
  "in_channels": 3,
28
  "latent_channels": 4,
29
  "layers_per_block": 2,
30
+ "mid_block_attention_type": "3d",
31
+ "mid_block_num_attention_heads": 1,
32
+ "mid_block_type": "MidBlock3D",
33
+ "mid_block_use_attention": true,
34
+ "mini_batch_decoder": 3,
35
+ "mini_batch_encoder": 9,
36
  "norm_num_groups": 32,
37
+ "num_attention_heads": 1,
38
  "out_channels": 3,
39
  "sample_size": 256,
40
  "scaling_factor": 0.18215,
41
  "slice_compression_vae": false,
42
+ "slice_mag_vae": true,
43
+ "spatial_group_norm": false,
44
+ "tile_overlap_factor": 0.25,
45
+ "tile_sample_min_size": 384,
46
  "up_block_types": [
47
+ "SpatialUpBlock3D",
48
+ "SpatialTemporalUpBlock3D",
49
  "SpatialTemporalUpBlock3D",
50
  "SpatialTemporalUpBlock3D"
51
+ ],
52
+ "upcast_vae": false,
53
+ "use_gc_blocks": null,
54
+ "use_tiling": false,
55
+ "use_tiling_decoder": false,
56
+ "use_tiling_encoder": false
57
  }