lmzjms commited on
Commit
e154cd6
1 Parent(s): fda2ed9

Upload img2audio_args.yaml

Browse files
text_to_audio/Make_An_Audio/configs/img_to_audio/img2audio_args.yaml ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-05
3
+ target: ldm.models.diffusion.ddpm_audio.LatentDiffusion_audio
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.0120
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 32 # unused
13
+ mel_dim: 10 # 80 // 2^3
14
+ mel_length: 78 # 624 // 2^3
15
+ channels: 4
16
+ cond_stage_trainable: false
17
+ conditioning_key: crossattn
18
+ monitor: val/loss_simple_ema
19
+ scale_by_std: True
20
+ use_ema: False
21
+
22
+ scheduler_config: # 10000 warmup steps
23
+ target: ldm.lr_scheduler.LambdaLinearScheduler
24
+ params:
25
+ warm_up_steps: [10000]
26
+ cycle_lengths: [10000000000000]
27
+ f_start: [1.e-6]
28
+ f_max: [1.]
29
+ f_min: [ 1.]
30
+
31
+ unet_config:
32
+ target: ldm.modules.diffusionmodules.custom_openaimodel.UNetModel
33
+ params:
34
+ image_size: 32 # ununsed
35
+ in_channels: 4
36
+ out_channels: 4
37
+ model_channels: 256
38
+ attention_resolutions:
39
+ - 1
40
+ - 2
41
+ num_res_blocks: 2
42
+ channel_mult: # num_down = len(ch_mult)-1
43
+ - 1
44
+ - 2
45
+ num_head_channels: 32
46
+ use_spatial_transformer: true
47
+ transformer_depth: 1
48
+ context_dim: 1024
49
+ use_context_project: false
50
+
51
+
52
+ first_stage_config:
53
+ target: ldm.models.autoencoder.AutoencoderKL
54
+ params:
55
+ embed_dim: 4
56
+ monitor: val/rec_loss
57
+ ddconfig:
58
+ double_z: true
59
+ z_channels: 4
60
+ resolution: 848
61
+ in_channels: 1
62
+ out_ch: 1
63
+ ch: 128
64
+ ch_mult: [ 1, 2, 2, 4 ] # num_down = len(ch_mult)-1
65
+ num_res_blocks: 2
66
+ attn_resolutions: [106, 212]
67
+ dropout: 0.0
68
+ lossconfig:
69
+ target: torch.nn.Identity
70
+
71
+ cond_stage_config:
72
+ target: ldm.modules.encoders.modules.FrozenGlobalNormOpenCLIPEmbedder
73
+ params:
74
+ freeze: True
75
+ delvisual: False
76
+
77
+