TaylorJi commited on
Commit
70c2fce
·
verified ·
1 Parent(s): 0f456d4

Upload model_config_odf.json

Browse files
Files changed (1) hide show
  1. model_config_odf.json +161 -0
model_config_odf.json ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "diffusion_cond",
3
+ "sample_size": 1323000,
4
+ "sample_rate": 44100,
5
+ "audio_channels": 2,
6
+ "model": {
7
+ "pretransform": {
8
+ "type": "autoencoder",
9
+ "iterate_batch": true,
10
+ "config": {
11
+ "encoder": {
12
+ "type": "oobleck",
13
+ "requires_grad": false,
14
+ "config": {
15
+ "in_channels": 2,
16
+ "channels": 128,
17
+ "c_mults": [1, 2, 4, 8, 16],
18
+ "strides": [2, 4, 4, 8, 8],
19
+ "latent_dim": 128,
20
+ "use_snake": true
21
+ }
22
+ },
23
+ "decoder": {
24
+ "type": "oobleck",
25
+ "config": {
26
+ "out_channels": 2,
27
+ "channels": 128,
28
+ "c_mults": [1, 2, 4, 8, 16],
29
+ "strides": [2, 4, 4, 8, 8],
30
+ "latent_dim": 64,
31
+ "use_snake": true,
32
+ "final_tanh": false
33
+ }
34
+ },
35
+ "bottleneck": {
36
+ "type": "vae"
37
+ },
38
+ "latent_dim": 64,
39
+ "downsampling_ratio": 2048,
40
+ "io_channels": 2
41
+ }
42
+ },
43
+ "conditioning": {
44
+ "configs": [
45
+ {
46
+ "id": "prompt",
47
+ "type": "t5",
48
+ "config": {
49
+ "t5_model_name": "t5-base",
50
+ "max_length": 128
51
+ }
52
+ },
53
+ {
54
+ "id": "semantic",
55
+ "type": "semantic",
56
+ "config": {
57
+ "dim": 512,
58
+ "max_length": 30,
59
+ "rhythm_type": "odf"
60
+ }
61
+ },
62
+ {
63
+ "id": "rhythm",
64
+ "type": "rhythm",
65
+ "config": {
66
+ "dim": 1,
67
+ "max_length": 30
68
+ }
69
+ },
70
+ {
71
+ "id": "color",
72
+ "type": "color",
73
+ "config": {
74
+ "dim": 3072,
75
+ "max_length": 30
76
+ }
77
+ },
78
+ {
79
+ "id": "class",
80
+ "type": "int",
81
+ "config": {
82
+ "min_val": 0,
83
+ "max_val": 2
84
+ }
85
+ },
86
+ {
87
+ "id": "seconds_start",
88
+ "type": "number",
89
+ "config": {
90
+ "min_val": 0,
91
+ "max_val": 30
92
+ }
93
+ },
94
+ {
95
+ "id": "seconds_total",
96
+ "type": "number",
97
+ "config": {
98
+ "min_val": 0,
99
+ "max_val": 30
100
+ }
101
+ }
102
+ ],
103
+ "cond_dim": 768
104
+ },
105
+ "diffusion": {
106
+ "cross_attention_cond_ids": ["semantic","rhythm","seconds_start", "seconds_total","color"],
107
+ "global_cond_ids": ["seconds_start", "seconds_total","class"],
108
+ "type": "dit",
109
+ "config": {
110
+ "io_channels": 64,
111
+ "embed_dim": 1536,
112
+ "depth": 24,
113
+ "num_heads": 24,
114
+ "cond_token_dim": 768,
115
+ "global_cond_dim": 2304,
116
+ "project_cond_tokens": false,
117
+ "transformer_type": "continuous_transformer",
118
+ "attention_weight_type": "FiLM_choose"
119
+ }
120
+ },
121
+ "io_channels": 64
122
+ },
123
+ "training": {
124
+ "use_ema": true,
125
+ "log_loss_info": false,
126
+ "optimizer_configs": {
127
+ "diffusion": {
128
+ "optimizer": {
129
+ "type": "AdamW",
130
+ "config": {
131
+ "lr": 1e-4,
132
+ "betas": [0.9, 0.999],
133
+ "weight_decay": 1e-3
134
+ }
135
+ },
136
+ "scheduler": {
137
+ "type": "InverseLR",
138
+ "config": {
139
+ "inv_gamma": 1000000,
140
+ "power": 0.5,
141
+ "warmup": 0.99
142
+ }
143
+ }
144
+ }
145
+ },
146
+ "demo": {
147
+ "demo_every": 100,
148
+ "demo_steps": 250,
149
+ "num_demos": 2,
150
+ "demo_cond": [
151
+ {"prompt": "Amen break 174 BPM", "seconds_start": 0, "seconds_total": 12},
152
+ {"prompt": "A beautiful orchestral symphony, classical music", "seconds_start": 0, "seconds_total": 160},
153
+ {"prompt": "Chill hip-hop beat, chillhop", "seconds_start": 0, "seconds_total": 190},
154
+ {"prompt": "A pop song about love and loss", "seconds_start": 0, "seconds_total": 180}
155
+ ],
156
+ "demo_cfg_scales": [1,2,3,4],
157
+ "demo_cond_from_batch": true,
158
+ "demo_save_dir": "generated_demo/"
159
+ }
160
+ }
161
+ }