LarryTsai a-r-r-o-w HF staff commited on
Commit
e3d6332
1 Parent(s): 0a1b8a5

Add `model_index.json` and update modeling configs (#2)

Browse files

- add model_index.json; update configs (43d06687e1b23afacab5980e452638293b3722b0)
- update (7defb981dbb797a900f485c84f483d6d21e9520a)


Co-authored-by: Aryan V S <a-r-r-o-w@users.noreply.huggingface.co>

Files changed (3) hide show
  1. model_index.json +24 -0
  2. transformer/config.json +4 -12
  3. vae/config.json +25 -21
model_index.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AllegroPipeline",
3
+ "_diffusers_version": "0.31.0.dev0",
4
+ "scheduler": [
5
+ "diffusers",
6
+ "EulerAncestralDiscreteScheduler"
7
+ ],
8
+ "text_encoder": [
9
+ "transformers",
10
+ "T5EncoderModel"
11
+ ],
12
+ "tokenizer": [
13
+ "transformers",
14
+ "T5Tokenizer"
15
+ ],
16
+ "transformer": [
17
+ "diffusers",
18
+ "AllegroTransformer3DModel"
19
+ ],
20
+ "vae": [
21
+ "diffusers",
22
+ "AutoencoderKLAllegro"
23
+ ]
24
+ }
transformer/config.json CHANGED
@@ -1,38 +1,30 @@
1
  {
2
  "_class_name": "AllegroTransformer3DModel",
3
- "_diffusers_version": "0.28.0",
4
  "activation_fn": "gelu-approximate",
5
  "attention_bias": true,
6
  "attention_head_dim": 96,
7
- "ca_attention_mode": "xformers",
8
  "caption_channels": 4096,
9
  "cross_attention_dim": 2304,
10
- "double_self_attention": false,
11
- "downsampler": null,
12
  "dropout": 0.0,
13
  "in_channels": 4,
14
  "interpolation_scale_h": 2.0,
15
  "interpolation_scale_t": 2.2,
16
  "interpolation_scale_w": 2.0,
17
- "model_max_length": 300,
18
  "norm_elementwise_affine": false,
19
  "norm_eps": 1e-06,
20
  "norm_type": "ada_norm_single",
21
  "num_attention_heads": 24,
22
- "num_embeds_ada_norm": 1000,
23
  "num_layers": 32,
24
- "only_cross_attention": false,
25
  "out_channels": 4,
26
  "patch_size": 2,
27
  "patch_size_t": 1,
28
- "sa_attention_mode": "flash",
 
29
  "sample_size": [
30
  90,
31
  160
32
  ],
33
  "sample_size_t": 22,
34
- "upcast_attention": false,
35
- "use_additional_conditions": null,
36
- "use_linear_projection": false,
37
- "use_rope": true
38
  }
 
1
  {
2
  "_class_name": "AllegroTransformer3DModel",
3
+ "_diffusers_version": "0.31.0.dev0",
4
  "activation_fn": "gelu-approximate",
5
  "attention_bias": true,
6
  "attention_head_dim": 96,
 
7
  "caption_channels": 4096,
8
  "cross_attention_dim": 2304,
 
 
9
  "dropout": 0.0,
10
  "in_channels": 4,
11
  "interpolation_scale_h": 2.0,
12
  "interpolation_scale_t": 2.2,
13
  "interpolation_scale_w": 2.0,
 
14
  "norm_elementwise_affine": false,
15
  "norm_eps": 1e-06,
16
  "norm_type": "ada_norm_single",
17
  "num_attention_heads": 24,
 
18
  "num_layers": 32,
 
19
  "out_channels": 4,
20
  "patch_size": 2,
21
  "patch_size_t": 1,
22
+ "sample_frames": 22,
23
+ "sample_height": 90,
24
  "sample_size": [
25
  90,
26
  160
27
  ],
28
  "sample_size_t": 22,
29
+ "sample_width": 160
 
 
 
30
  }
vae/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "_class_name": "AllegroAutoencoderKL3D",
3
- "_diffusers_version": "0.28.0",
4
  "act_fn": "silu",
5
  "block_out_channels": [
6
  128,
@@ -8,33 +8,37 @@
8
  512,
9
  512
10
  ],
11
- "blocks_tempdown_li": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  true,
13
  true,
14
  false,
15
  false
16
  ],
17
- "blocks_tempup_li": [
18
  false,
19
  true,
20
  true,
21
  false
22
  ],
23
- "chunk_len": 24,
24
- "down_block_num": 4,
25
- "force_upcast": true,
26
- "in_channels": 3,
27
- "latent_channels": 4,
28
- "layers_per_block": 2,
29
- "load_mode": "full",
30
- "norm_num_groups": 32,
31
- "out_channels": 3,
32
- "sample_size": 320,
33
- "scale_factor": 0.13,
34
- "t_over": 8,
35
- "tile_overlap": [
36
- 120,
37
- 80
38
- ],
39
- "up_block_num": 4
40
  }
 
1
  {
2
+ "_class_name": "AutoencoderKLAllegro",
3
+ "_diffusers_version": "0.31.0.dev0",
4
  "act_fn": "silu",
5
  "block_out_channels": [
6
  128,
 
8
  512,
9
  512
10
  ],
11
+ "down_block_types": [
12
+ "AllegroDownBlock3D",
13
+ "AllegroDownBlock3D",
14
+ "AllegroDownBlock3D",
15
+ "AllegroDownBlock3D"
16
+ ],
17
+ "force_upcast": true,
18
+ "in_channels": 3,
19
+ "latent_channels": 4,
20
+ "layers_per_block": 2,
21
+ "norm_num_groups": 32,
22
+ "out_channels": 3,
23
+ "sample_size": 320,
24
+ "scaling_factor": 0.13,
25
+ "temporal_compression_ratio": 4,
26
+ "temporal_downsample_blocks": [
27
  true,
28
  true,
29
  false,
30
  false
31
  ],
32
+ "temporal_upsample_blocks": [
33
  false,
34
  true,
35
  true,
36
  false
37
  ],
38
+ "up_block_types": [
39
+ "AllegroUpBlock3D",
40
+ "AllegroUpBlock3D",
41
+ "AllegroUpBlock3D",
42
+ "AllegroUpBlock3D"
43
+ ]
 
 
 
 
 
 
 
 
 
 
 
44
  }