Adityadn commited on
Commit
cb91380
1 Parent(s): ae289df

Delete models/configs

Browse files
models/configs/anything_v3.yaml DELETED
@@ -1,73 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-04
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false # Note: different from the one we trained before
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
-
20
- scheduler_config: # 10000 warmup steps
21
- target: ldm.lr_scheduler.LambdaLinearScheduler
22
- params:
23
- warm_up_steps: [ 10000 ]
24
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
- f_start: [ 1.e-6 ]
26
- f_max: [ 1. ]
27
- f_min: [ 1. ]
28
-
29
- unet_config:
30
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
- params:
32
- image_size: 32 # unused
33
- in_channels: 4
34
- out_channels: 4
35
- model_channels: 320
36
- attention_resolutions: [ 4, 2, 1 ]
37
- num_res_blocks: 2
38
- channel_mult: [ 1, 2, 4, 4 ]
39
- num_heads: 8
40
- use_spatial_transformer: True
41
- transformer_depth: 1
42
- context_dim: 768
43
- use_checkpoint: True
44
- legacy: False
45
-
46
- first_stage_config:
47
- target: ldm.models.autoencoder.AutoencoderKL
48
- params:
49
- embed_dim: 4
50
- monitor: val/rec_loss
51
- ddconfig:
52
- double_z: true
53
- z_channels: 4
54
- resolution: 256
55
- in_channels: 3
56
- out_ch: 3
57
- ch: 128
58
- ch_mult:
59
- - 1
60
- - 2
61
- - 4
62
- - 4
63
- num_res_blocks: 2
64
- attn_resolutions: []
65
- dropout: 0.0
66
- lossconfig:
67
- target: torch.nn.Identity
68
-
69
- cond_stage_config:
70
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
71
- params:
72
- layer: "hidden"
73
- layer_idx: -2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/configs/v1-inference.yaml DELETED
@@ -1,70 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-04
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false # Note: different from the one we trained before
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
-
20
- scheduler_config: # 10000 warmup steps
21
- target: ldm.lr_scheduler.LambdaLinearScheduler
22
- params:
23
- warm_up_steps: [ 10000 ]
24
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
- f_start: [ 1.e-6 ]
26
- f_max: [ 1. ]
27
- f_min: [ 1. ]
28
-
29
- unet_config:
30
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
- params:
32
- image_size: 32 # unused
33
- in_channels: 4
34
- out_channels: 4
35
- model_channels: 320
36
- attention_resolutions: [ 4, 2, 1 ]
37
- num_res_blocks: 2
38
- channel_mult: [ 1, 2, 4, 4 ]
39
- num_heads: 8
40
- use_spatial_transformer: True
41
- transformer_depth: 1
42
- context_dim: 768
43
- use_checkpoint: True
44
- legacy: False
45
-
46
- first_stage_config:
47
- target: ldm.models.autoencoder.AutoencoderKL
48
- params:
49
- embed_dim: 4
50
- monitor: val/rec_loss
51
- ddconfig:
52
- double_z: true
53
- z_channels: 4
54
- resolution: 256
55
- in_channels: 3
56
- out_ch: 3
57
- ch: 128
58
- ch_mult:
59
- - 1
60
- - 2
61
- - 4
62
- - 4
63
- num_res_blocks: 2
64
- attn_resolutions: []
65
- dropout: 0.0
66
- lossconfig:
67
- target: torch.nn.Identity
68
-
69
- cond_stage_config:
70
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/configs/v1-inference_clip_skip_2.yaml DELETED
@@ -1,73 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-04
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false # Note: different from the one we trained before
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
-
20
- scheduler_config: # 10000 warmup steps
21
- target: ldm.lr_scheduler.LambdaLinearScheduler
22
- params:
23
- warm_up_steps: [ 10000 ]
24
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
- f_start: [ 1.e-6 ]
26
- f_max: [ 1. ]
27
- f_min: [ 1. ]
28
-
29
- unet_config:
30
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
- params:
32
- image_size: 32 # unused
33
- in_channels: 4
34
- out_channels: 4
35
- model_channels: 320
36
- attention_resolutions: [ 4, 2, 1 ]
37
- num_res_blocks: 2
38
- channel_mult: [ 1, 2, 4, 4 ]
39
- num_heads: 8
40
- use_spatial_transformer: True
41
- transformer_depth: 1
42
- context_dim: 768
43
- use_checkpoint: True
44
- legacy: False
45
-
46
- first_stage_config:
47
- target: ldm.models.autoencoder.AutoencoderKL
48
- params:
49
- embed_dim: 4
50
- monitor: val/rec_loss
51
- ddconfig:
52
- double_z: true
53
- z_channels: 4
54
- resolution: 256
55
- in_channels: 3
56
- out_ch: 3
57
- ch: 128
58
- ch_mult:
59
- - 1
60
- - 2
61
- - 4
62
- - 4
63
- num_res_blocks: 2
64
- attn_resolutions: []
65
- dropout: 0.0
66
- lossconfig:
67
- target: torch.nn.Identity
68
-
69
- cond_stage_config:
70
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
71
- params:
72
- layer: "hidden"
73
- layer_idx: -2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/configs/v1-inference_clip_skip_2_fp16.yaml DELETED
@@ -1,74 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-04
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false # Note: different from the one we trained before
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
-
20
- scheduler_config: # 10000 warmup steps
21
- target: ldm.lr_scheduler.LambdaLinearScheduler
22
- params:
23
- warm_up_steps: [ 10000 ]
24
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
- f_start: [ 1.e-6 ]
26
- f_max: [ 1. ]
27
- f_min: [ 1. ]
28
-
29
- unet_config:
30
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
- params:
32
- use_fp16: True
33
- image_size: 32 # unused
34
- in_channels: 4
35
- out_channels: 4
36
- model_channels: 320
37
- attention_resolutions: [ 4, 2, 1 ]
38
- num_res_blocks: 2
39
- channel_mult: [ 1, 2, 4, 4 ]
40
- num_heads: 8
41
- use_spatial_transformer: True
42
- transformer_depth: 1
43
- context_dim: 768
44
- use_checkpoint: True
45
- legacy: False
46
-
47
- first_stage_config:
48
- target: ldm.models.autoencoder.AutoencoderKL
49
- params:
50
- embed_dim: 4
51
- monitor: val/rec_loss
52
- ddconfig:
53
- double_z: true
54
- z_channels: 4
55
- resolution: 256
56
- in_channels: 3
57
- out_ch: 3
58
- ch: 128
59
- ch_mult:
60
- - 1
61
- - 2
62
- - 4
63
- - 4
64
- num_res_blocks: 2
65
- attn_resolutions: []
66
- dropout: 0.0
67
- lossconfig:
68
- target: torch.nn.Identity
69
-
70
- cond_stage_config:
71
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
72
- params:
73
- layer: "hidden"
74
- layer_idx: -2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/configs/v1-inference_fp16.yaml DELETED
@@ -1,71 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-04
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false # Note: different from the one we trained before
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
-
20
- scheduler_config: # 10000 warmup steps
21
- target: ldm.lr_scheduler.LambdaLinearScheduler
22
- params:
23
- warm_up_steps: [ 10000 ]
24
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
- f_start: [ 1.e-6 ]
26
- f_max: [ 1. ]
27
- f_min: [ 1. ]
28
-
29
- unet_config:
30
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
- params:
32
- use_fp16: True
33
- image_size: 32 # unused
34
- in_channels: 4
35
- out_channels: 4
36
- model_channels: 320
37
- attention_resolutions: [ 4, 2, 1 ]
38
- num_res_blocks: 2
39
- channel_mult: [ 1, 2, 4, 4 ]
40
- num_heads: 8
41
- use_spatial_transformer: True
42
- transformer_depth: 1
43
- context_dim: 768
44
- use_checkpoint: True
45
- legacy: False
46
-
47
- first_stage_config:
48
- target: ldm.models.autoencoder.AutoencoderKL
49
- params:
50
- embed_dim: 4
51
- monitor: val/rec_loss
52
- ddconfig:
53
- double_z: true
54
- z_channels: 4
55
- resolution: 256
56
- in_channels: 3
57
- out_ch: 3
58
- ch: 128
59
- ch_mult:
60
- - 1
61
- - 2
62
- - 4
63
- - 4
64
- num_res_blocks: 2
65
- attn_resolutions: []
66
- dropout: 0.0
67
- lossconfig:
68
- target: torch.nn.Identity
69
-
70
- cond_stage_config:
71
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/configs/v1-inpainting-inference.yaml DELETED
@@ -1,71 +0,0 @@
1
- model:
2
- base_learning_rate: 7.5e-05
3
- target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false # Note: different from the one we trained before
15
- conditioning_key: hybrid # important
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- finetune_keys: null
19
-
20
- scheduler_config: # 10000 warmup steps
21
- target: ldm.lr_scheduler.LambdaLinearScheduler
22
- params:
23
- warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch
24
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
- f_start: [ 1.e-6 ]
26
- f_max: [ 1. ]
27
- f_min: [ 1. ]
28
-
29
- unet_config:
30
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
- params:
32
- image_size: 32 # unused
33
- in_channels: 9 # 4 data + 4 downscaled image + 1 mask
34
- out_channels: 4
35
- model_channels: 320
36
- attention_resolutions: [ 4, 2, 1 ]
37
- num_res_blocks: 2
38
- channel_mult: [ 1, 2, 4, 4 ]
39
- num_heads: 8
40
- use_spatial_transformer: True
41
- transformer_depth: 1
42
- context_dim: 768
43
- use_checkpoint: True
44
- legacy: False
45
-
46
- first_stage_config:
47
- target: ldm.models.autoencoder.AutoencoderKL
48
- params:
49
- embed_dim: 4
50
- monitor: val/rec_loss
51
- ddconfig:
52
- double_z: true
53
- z_channels: 4
54
- resolution: 256
55
- in_channels: 3
56
- out_ch: 3
57
- ch: 128
58
- ch_mult:
59
- - 1
60
- - 2
61
- - 4
62
- - 4
63
- num_res_blocks: 2
64
- attn_resolutions: []
65
- dropout: 0.0
66
- lossconfig:
67
- target: torch.nn.Identity
68
-
69
- cond_stage_config:
70
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
71
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/configs/v2-inference-v.yaml DELETED
@@ -1,68 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-4
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- parameterization: "v"
6
- linear_start: 0.00085
7
- linear_end: 0.0120
8
- num_timesteps_cond: 1
9
- log_every_t: 200
10
- timesteps: 1000
11
- first_stage_key: "jpg"
12
- cond_stage_key: "txt"
13
- image_size: 64
14
- channels: 4
15
- cond_stage_trainable: false
16
- conditioning_key: crossattn
17
- monitor: val/loss_simple_ema
18
- scale_factor: 0.18215
19
- use_ema: False # we set this to false because this is an inference only config
20
-
21
- unet_config:
22
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23
- params:
24
- use_checkpoint: True
25
- use_fp16: True
26
- image_size: 32 # unused
27
- in_channels: 4
28
- out_channels: 4
29
- model_channels: 320
30
- attention_resolutions: [ 4, 2, 1 ]
31
- num_res_blocks: 2
32
- channel_mult: [ 1, 2, 4, 4 ]
33
- num_head_channels: 64 # need to fix for flash-attn
34
- use_spatial_transformer: True
35
- use_linear_in_transformer: True
36
- transformer_depth: 1
37
- context_dim: 1024
38
- legacy: False
39
-
40
- first_stage_config:
41
- target: ldm.models.autoencoder.AutoencoderKL
42
- params:
43
- embed_dim: 4
44
- monitor: val/rec_loss
45
- ddconfig:
46
- #attn_type: "vanilla-xformers"
47
- double_z: true
48
- z_channels: 4
49
- resolution: 256
50
- in_channels: 3
51
- out_ch: 3
52
- ch: 128
53
- ch_mult:
54
- - 1
55
- - 2
56
- - 4
57
- - 4
58
- num_res_blocks: 2
59
- attn_resolutions: []
60
- dropout: 0.0
61
- lossconfig:
62
- target: torch.nn.Identity
63
-
64
- cond_stage_config:
65
- target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
66
- params:
67
- freeze: True
68
- layer: "penultimate"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/configs/v2-inference-v_fp32.yaml DELETED
@@ -1,68 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-4
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- parameterization: "v"
6
- linear_start: 0.00085
7
- linear_end: 0.0120
8
- num_timesteps_cond: 1
9
- log_every_t: 200
10
- timesteps: 1000
11
- first_stage_key: "jpg"
12
- cond_stage_key: "txt"
13
- image_size: 64
14
- channels: 4
15
- cond_stage_trainable: false
16
- conditioning_key: crossattn
17
- monitor: val/loss_simple_ema
18
- scale_factor: 0.18215
19
- use_ema: False # we set this to false because this is an inference only config
20
-
21
- unet_config:
22
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23
- params:
24
- use_checkpoint: True
25
- use_fp16: False
26
- image_size: 32 # unused
27
- in_channels: 4
28
- out_channels: 4
29
- model_channels: 320
30
- attention_resolutions: [ 4, 2, 1 ]
31
- num_res_blocks: 2
32
- channel_mult: [ 1, 2, 4, 4 ]
33
- num_head_channels: 64 # need to fix for flash-attn
34
- use_spatial_transformer: True
35
- use_linear_in_transformer: True
36
- transformer_depth: 1
37
- context_dim: 1024
38
- legacy: False
39
-
40
- first_stage_config:
41
- target: ldm.models.autoencoder.AutoencoderKL
42
- params:
43
- embed_dim: 4
44
- monitor: val/rec_loss
45
- ddconfig:
46
- #attn_type: "vanilla-xformers"
47
- double_z: true
48
- z_channels: 4
49
- resolution: 256
50
- in_channels: 3
51
- out_ch: 3
52
- ch: 128
53
- ch_mult:
54
- - 1
55
- - 2
56
- - 4
57
- - 4
58
- num_res_blocks: 2
59
- attn_resolutions: []
60
- dropout: 0.0
61
- lossconfig:
62
- target: torch.nn.Identity
63
-
64
- cond_stage_config:
65
- target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
66
- params:
67
- freeze: True
68
- layer: "penultimate"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/configs/v2-inference.yaml DELETED
@@ -1,67 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-4
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False # we set this to false because this is an inference only config
19
-
20
- unet_config:
21
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
22
- params:
23
- use_checkpoint: True
24
- use_fp16: True
25
- image_size: 32 # unused
26
- in_channels: 4
27
- out_channels: 4
28
- model_channels: 320
29
- attention_resolutions: [ 4, 2, 1 ]
30
- num_res_blocks: 2
31
- channel_mult: [ 1, 2, 4, 4 ]
32
- num_head_channels: 64 # need to fix for flash-attn
33
- use_spatial_transformer: True
34
- use_linear_in_transformer: True
35
- transformer_depth: 1
36
- context_dim: 1024
37
- legacy: False
38
-
39
- first_stage_config:
40
- target: ldm.models.autoencoder.AutoencoderKL
41
- params:
42
- embed_dim: 4
43
- monitor: val/rec_loss
44
- ddconfig:
45
- #attn_type: "vanilla-xformers"
46
- double_z: true
47
- z_channels: 4
48
- resolution: 256
49
- in_channels: 3
50
- out_ch: 3
51
- ch: 128
52
- ch_mult:
53
- - 1
54
- - 2
55
- - 4
56
- - 4
57
- num_res_blocks: 2
58
- attn_resolutions: []
59
- dropout: 0.0
60
- lossconfig:
61
- target: torch.nn.Identity
62
-
63
- cond_stage_config:
64
- target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
65
- params:
66
- freeze: True
67
- layer: "penultimate"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/configs/v2-inference_fp32.yaml DELETED
@@ -1,67 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-4
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False # we set this to false because this is an inference only config
19
-
20
- unet_config:
21
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
22
- params:
23
- use_checkpoint: True
24
- use_fp16: False
25
- image_size: 32 # unused
26
- in_channels: 4
27
- out_channels: 4
28
- model_channels: 320
29
- attention_resolutions: [ 4, 2, 1 ]
30
- num_res_blocks: 2
31
- channel_mult: [ 1, 2, 4, 4 ]
32
- num_head_channels: 64 # need to fix for flash-attn
33
- use_spatial_transformer: True
34
- use_linear_in_transformer: True
35
- transformer_depth: 1
36
- context_dim: 1024
37
- legacy: False
38
-
39
- first_stage_config:
40
- target: ldm.models.autoencoder.AutoencoderKL
41
- params:
42
- embed_dim: 4
43
- monitor: val/rec_loss
44
- ddconfig:
45
- #attn_type: "vanilla-xformers"
46
- double_z: true
47
- z_channels: 4
48
- resolution: 256
49
- in_channels: 3
50
- out_ch: 3
51
- ch: 128
52
- ch_mult:
53
- - 1
54
- - 2
55
- - 4
56
- - 4
57
- num_res_blocks: 2
58
- attn_resolutions: []
59
- dropout: 0.0
60
- lossconfig:
61
- target: torch.nn.Identity
62
-
63
- cond_stage_config:
64
- target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
65
- params:
66
- freeze: True
67
- layer: "penultimate"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/configs/v2-inpainting-inference.yaml DELETED
@@ -1,158 +0,0 @@
1
- model:
2
- base_learning_rate: 5.0e-05
3
- target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: hybrid
16
- scale_factor: 0.18215
17
- monitor: val/loss_simple_ema
18
- finetune_keys: null
19
- use_ema: False
20
-
21
- unet_config:
22
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23
- params:
24
- use_checkpoint: True
25
- image_size: 32 # unused
26
- in_channels: 9
27
- out_channels: 4
28
- model_channels: 320
29
- attention_resolutions: [ 4, 2, 1 ]
30
- num_res_blocks: 2
31
- channel_mult: [ 1, 2, 4, 4 ]
32
- num_head_channels: 64 # need to fix for flash-attn
33
- use_spatial_transformer: True
34
- use_linear_in_transformer: True
35
- transformer_depth: 1
36
- context_dim: 1024
37
- legacy: False
38
-
39
- first_stage_config:
40
- target: ldm.models.autoencoder.AutoencoderKL
41
- params:
42
- embed_dim: 4
43
- monitor: val/rec_loss
44
- ddconfig:
45
- #attn_type: "vanilla-xformers"
46
- double_z: true
47
- z_channels: 4
48
- resolution: 256
49
- in_channels: 3
50
- out_ch: 3
51
- ch: 128
52
- ch_mult:
53
- - 1
54
- - 2
55
- - 4
56
- - 4
57
- num_res_blocks: 2
58
- attn_resolutions: [ ]
59
- dropout: 0.0
60
- lossconfig:
61
- target: torch.nn.Identity
62
-
63
- cond_stage_config:
64
- target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
65
- params:
66
- freeze: True
67
- layer: "penultimate"
68
-
69
-
70
- data:
71
- target: ldm.data.laion.WebDataModuleFromConfig
72
- params:
73
- tar_base: null # for concat as in LAION-A
74
- p_unsafe_threshold: 0.1
75
- filter_word_list: "data/filters.yaml"
76
- max_pwatermark: 0.45
77
- batch_size: 8
78
- num_workers: 6
79
- multinode: True
80
- min_size: 512
81
- train:
82
- shards:
83
- - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -"
84
- - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -"
85
- - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -"
86
- - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -"
87
- - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -" #{00000-94333}.tar"
88
- shuffle: 10000
89
- image_key: jpg
90
- image_transforms:
91
- - target: torchvision.transforms.Resize
92
- params:
93
- size: 512
94
- interpolation: 3
95
- - target: torchvision.transforms.RandomCrop
96
- params:
97
- size: 512
98
- postprocess:
99
- target: ldm.data.laion.AddMask
100
- params:
101
- mode: "512train-large"
102
- p_drop: 0.25
103
- # NOTE use enough shards to avoid empty validation loops in workers
104
- validation:
105
- shards:
106
- - "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - "
107
- shuffle: 0
108
- image_key: jpg
109
- image_transforms:
110
- - target: torchvision.transforms.Resize
111
- params:
112
- size: 512
113
- interpolation: 3
114
- - target: torchvision.transforms.CenterCrop
115
- params:
116
- size: 512
117
- postprocess:
118
- target: ldm.data.laion.AddMask
119
- params:
120
- mode: "512train-large"
121
- p_drop: 0.25
122
-
123
- lightning:
124
- find_unused_parameters: True
125
- modelcheckpoint:
126
- params:
127
- every_n_train_steps: 5000
128
-
129
- callbacks:
130
- metrics_over_trainsteps_checkpoint:
131
- params:
132
- every_n_train_steps: 10000
133
-
134
- image_logger:
135
- target: main.ImageLogger
136
- params:
137
- enable_autocast: False
138
- disabled: False
139
- batch_frequency: 1000
140
- max_images: 4
141
- increase_log_steps: False
142
- log_first_step: False
143
- log_images_kwargs:
144
- use_ema_scope: False
145
- inpaint: False
146
- plot_progressive_rows: False
147
- plot_diffusion_rows: False
148
- N: 4
149
- unconditional_guidance_scale: 5.0
150
- unconditional_guidance_label: [""]
151
- ddim_steps: 50 # todo check these out for depth2img,
152
- ddim_eta: 0.0 # todo check these out for depth2img,
153
-
154
- trainer:
155
- benchmark: True
156
- val_check_interval: 5000000
157
- num_sanity_val_steps: 0
158
- accumulate_grad_batches: 1