teticio commited on
Commit
529c646
1 Parent(s): 4f552a8

get working for monochrome

Browse files
audiodiffusion/utils.py CHANGED
@@ -31,27 +31,6 @@ def renew_vae_resnet_paths(old_list, n_shave_prefix_segments=0):
31
  return mapping
32
 
33
 
34
- def renew_attention_paths(old_list, n_shave_prefix_segments=0):
35
- """
36
- Updates paths inside attentions to the new naming scheme (local renaming)
37
- """
38
- mapping = []
39
- for old_item in old_list:
40
- new_item = old_item
41
-
42
- # new_item = new_item.replace('norm.weight', 'group_norm.weight')
43
- # new_item = new_item.replace('norm.bias', 'group_norm.bias')
44
-
45
- # new_item = new_item.replace('proj_out.weight', 'proj_attn.weight')
46
- # new_item = new_item.replace('proj_out.bias', 'proj_attn.bias')
47
-
48
- # new_item = shave_segments(new_item, n_shave_prefix_segments=n_shave_prefix_segments)
49
-
50
- mapping.append({"old": old_item, "new": new_item})
51
-
52
- return mapping
53
-
54
-
55
  def renew_vae_attention_paths(old_list, n_shave_prefix_segments=0):
56
  """
57
  Updates paths inside attentions to the new naming scheme (local renaming)
 
31
  return mapping
32
 
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def renew_vae_attention_paths(old_list, n_shave_prefix_segments=0):
35
  """
36
  Updates paths inside attentions to the new naming scheme (local renaming)
config/ldm_autoencoder_kl.yaml CHANGED
@@ -4,22 +4,23 @@ model:
4
  target: ldm.models.autoencoder.AutoencoderKL
5
  params:
6
  monitor: "val/rec_loss"
7
- embed_dim: 3
8
  lossconfig:
9
  target: ldm.modules.losses.LPIPSWithDiscriminator
10
  params:
11
  disc_start: 50001
12
  kl_weight: 0.000001
13
  disc_weight: 0.5
 
14
 
15
  ddconfig:
16
  double_z: True
17
- z_channels: 4
18
  resolution: 256
19
- in_channels: 3
20
- out_ch: 3
21
  ch: 128
22
- ch_mult: [ 1,2,4 ] # num_down = len(ch_mult)-1
23
  num_res_blocks: 2
24
  attn_resolutions: [ ]
25
  dropout: 0.0
@@ -27,5 +28,5 @@ model:
27
  lightning:
28
  trainer:
29
  benchmark: True
30
- accelerator: gpu
31
  devices: 1
 
4
  target: ldm.models.autoencoder.AutoencoderKL
5
  params:
6
  monitor: "val/rec_loss"
7
+ embed_dim: 1 # = in_channels
8
  lossconfig:
9
  target: ldm.modules.losses.LPIPSWithDiscriminator
10
  params:
11
  disc_start: 50001
12
  kl_weight: 0.000001
13
  disc_weight: 0.5
14
+ disc_in_channels: 1 # = out_ch
15
 
16
  ddconfig:
17
  double_z: True
18
+ z_channels: 1 # must = embed_dim due to HF limitation
19
  resolution: 256
20
+ in_channels: 1
21
+ out_ch: 1
22
  ch: 128
23
+ ch_mult: [ 1,2,4,4 ] # num_down = len(ch_mult)-1
24
  num_res_blocks: 2
25
  attn_resolutions: [ ]
26
  dropout: 0.0
 
28
  lightning:
29
  trainer:
30
  benchmark: True
31
+ #accelerator: gpu
32
  devices: 1
scripts/train_vae.py CHANGED
@@ -1,10 +1,6 @@
1
  # pip install -e git+https://github.com/CompVis/stable-diffusion.git@master
2
  # pip install -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
3
 
4
- # TODO
5
- # grayscale
6
- # docstrings
7
-
8
  import os
9
  import argparse
10
 
@@ -117,9 +113,9 @@ class HFModelCheckpoint(ModelCheckpoint):
117
  self.hf_checkpoint = hf_checkpoint
118
 
119
  def on_train_epoch_end(self, trainer, pl_module):
 
 
120
  super().on_train_epoch_end(trainer, pl_module)
121
- ldm_checkpoint = self.format_checkpoint_name(
122
- {'epoch': trainer.current_epoch})
123
  convert_ldm_to_hf_vae(ldm_checkpoint, self.ldm_config,
124
  self.hf_checkpoint)
125
 
@@ -148,6 +144,7 @@ if __name__ == "__main__":
148
  default=1)
149
  parser.add_argument("--resolution", type=int, default=256)
150
  parser.add_argument("--hop_length", type=int, default=512)
 
151
  args = parser.parse_args()
152
 
153
  config = OmegaConf.load(args.ldm_config_file)
@@ -165,7 +162,8 @@ if __name__ == "__main__":
165
  trainer_opt,
166
  resume_from_checkpoint=args.resume_from_checkpoint,
167
  callbacks=[
168
- ImageLogger(channels=config.model.params.ddconfig.out_ch,
 
169
  resolution=args.resolution,
170
  hop_length=args.hop_length),
171
  HFModelCheckpoint(ldm_config=config,
 
1
  # pip install -e git+https://github.com/CompVis/stable-diffusion.git@master
2
  # pip install -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
3
 
 
 
 
 
4
  import os
5
  import argparse
6
 
 
113
  self.hf_checkpoint = hf_checkpoint
114
 
115
  def on_train_epoch_end(self, trainer, pl_module):
116
+ ldm_checkpoint = self._get_metric_interpolated_filepath_name(
117
+ {'epoch': trainer.current_epoch}, trainer)
118
  super().on_train_epoch_end(trainer, pl_module)
 
 
119
  convert_ldm_to_hf_vae(ldm_checkpoint, self.ldm_config,
120
  self.hf_checkpoint)
121
 
 
144
  default=1)
145
  parser.add_argument("--resolution", type=int, default=256)
146
  parser.add_argument("--hop_length", type=int, default=512)
147
+ parser.add_argument("--save_images_batches", type=int, default=1000)
148
  args = parser.parse_args()
149
 
150
  config = OmegaConf.load(args.ldm_config_file)
 
162
  trainer_opt,
163
  resume_from_checkpoint=args.resume_from_checkpoint,
164
  callbacks=[
165
+ ImageLogger(every=args.save_images_batches,
166
+ channels=config.model.params.ddconfig.out_ch,
167
  resolution=args.resolution,
168
  hop_length=args.hop_length),
169
  HFModelCheckpoint(ldm_config=config,