# pixel resolution of the model resolution: 512 # train 4 channel VAE latents instead of RGB at 1/8 resolution vae_latents: true # number of channels at each level of the UNet channels: [128, 256, 384, 512] # which levels have attention attention: [0, 1, 2]