ehristoforu commited on
Commit
2a019c3
1 Parent(s): 9c993d2

Update unet/config.json

Browse files
Files changed (1) hide show
  1. unet/config.json +21 -17
unet/config.json CHANGED
@@ -1,16 +1,19 @@
1
  {
2
  "_class_name": "UNet2DConditionModel",
3
- "_diffusers_version": "0.19.0",
4
  "act_fn": "silu",
5
- "addition_embed_type": null,
6
  "addition_embed_type_num_heads": 64,
7
- "addition_time_embed_dim": null,
8
- "attention_head_dim": 8,
 
 
 
 
9
  "attention_type": "default",
10
  "block_out_channels": [
11
  320,
12
  640,
13
- 1280,
14
  1280
15
  ],
16
  "center_input_sample": false,
@@ -18,13 +21,12 @@
18
  "class_embeddings_concat": false,
19
  "conv_in_kernel": 3,
20
  "conv_out_kernel": 3,
21
- "cross_attention_dim": 768,
22
  "cross_attention_norm": null,
23
  "down_block_types": [
 
24
  "CrossAttnDownBlock2D",
25
- "CrossAttnDownBlock2D",
26
- "CrossAttnDownBlock2D",
27
- "DownBlock2D"
28
  ],
29
  "downsample_padding": 1,
30
  "dropout": 0.0,
@@ -44,24 +46,26 @@
44
  "num_class_embeds": null,
45
  "only_cross_attention": false,
46
  "out_channels": 4,
47
- "projection_class_embeddings_input_dim": null,
48
  "resnet_out_scale_factor": 1.0,
49
  "resnet_skip_time_act": false,
50
  "resnet_time_scale_shift": "default",
51
- "reverse_transformer_layers_per_block": null,
52
- "sample_size": 64,
53
  "time_cond_proj_dim": null,
54
  "time_embedding_act_fn": null,
55
  "time_embedding_dim": null,
56
  "time_embedding_type": "positional",
57
  "timestep_post_act": null,
58
- "transformer_layers_per_block": 1,
 
 
 
 
59
  "up_block_types": [
60
- "UpBlock2D",
61
  "CrossAttnUpBlock2D",
62
  "CrossAttnUpBlock2D",
63
- "CrossAttnUpBlock2D"
64
  ],
65
- "upcast_attention": false,
66
- "use_linear_projection": false
67
  }
 
1
  {
2
  "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.22.0.dev0",
4
  "act_fn": "silu",
5
+ "addition_embed_type": "text_time",
6
  "addition_embed_type_num_heads": 64,
7
+ "addition_time_embed_dim": 256,
8
+ "attention_head_dim": [
9
+ 5,
10
+ 10,
11
+ 20
12
+ ],
13
  "attention_type": "default",
14
  "block_out_channels": [
15
  320,
16
  640,
 
17
  1280
18
  ],
19
  "center_input_sample": false,
 
21
  "class_embeddings_concat": false,
22
  "conv_in_kernel": 3,
23
  "conv_out_kernel": 3,
24
+ "cross_attention_dim": 2048,
25
  "cross_attention_norm": null,
26
  "down_block_types": [
27
+ "DownBlock2D",
28
  "CrossAttnDownBlock2D",
29
+ "CrossAttnDownBlock2D"
 
 
30
  ],
31
  "downsample_padding": 1,
32
  "dropout": 0.0,
 
46
  "num_class_embeds": null,
47
  "only_cross_attention": false,
48
  "out_channels": 4,
49
+ "projection_class_embeddings_input_dim": 2816,
50
  "resnet_out_scale_factor": 1.0,
51
  "resnet_skip_time_act": false,
52
  "resnet_time_scale_shift": "default",
53
+ "sample_size": 128,
 
54
  "time_cond_proj_dim": null,
55
  "time_embedding_act_fn": null,
56
  "time_embedding_dim": null,
57
  "time_embedding_type": "positional",
58
  "timestep_post_act": null,
59
+ "transformer_layers_per_block": [
60
+ 1,
61
+ 2,
62
+ 10
63
+ ],
64
  "up_block_types": [
 
65
  "CrossAttnUpBlock2D",
66
  "CrossAttnUpBlock2D",
67
+ "UpBlock2D"
68
  ],
69
+ "upcast_attention": null,
70
+ "use_linear_projection": true
71
  }