Spaces:
Paused
Paused
File size: 15,607 Bytes
f1ea451 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 |
from model.unet import ScaleAt
from model.latentnet import *
from diffusion.resample import UniformSampler
from diffusion.diffusion import space_timesteps
from typing import Tuple
from torch.utils.data import DataLoader
from config_base import BaseConfig
from diffusion import *
from diffusion.base import GenerativeType, LossType, ModelMeanType, ModelVarType, get_named_beta_schedule
from model import *
from choices import *
from multiprocessing import get_context
import os
from dataset_util import *
from torch.utils.data.distributed import DistributedSampler
from dataset import LatentDataLoader
@dataclass
class PretrainConfig(BaseConfig):
name: str
path: str
@dataclass
class TrainConfig(BaseConfig):
# random seed
seed: int = 0
train_mode: TrainMode = TrainMode.diffusion
train_cond0_prob: float = 0
train_pred_xstart_detach: bool = True
train_interpolate_prob: float = 0
train_interpolate_img: bool = False
manipulate_mode: ManipulateMode = ManipulateMode.celebahq_all
manipulate_cls: str = None
manipulate_shots: int = None
manipulate_loss: ManipulateLossType = ManipulateLossType.bce
manipulate_znormalize: bool = False
manipulate_seed: int = 0
accum_batches: int = 1
autoenc_mid_attn: bool = True
batch_size: int = 16
batch_size_eval: int = None
beatgans_gen_type: GenerativeType = GenerativeType.ddim
beatgans_loss_type: LossType = LossType.mse
beatgans_model_mean_type: ModelMeanType = ModelMeanType.eps
beatgans_model_var_type: ModelVarType = ModelVarType.fixed_large
beatgans_rescale_timesteps: bool = False
latent_infer_path: str = None
latent_znormalize: bool = False
latent_gen_type: GenerativeType = GenerativeType.ddim
latent_loss_type: LossType = LossType.mse
latent_model_mean_type: ModelMeanType = ModelMeanType.eps
latent_model_var_type: ModelVarType = ModelVarType.fixed_large
latent_rescale_timesteps: bool = False
latent_T_eval: int = 1_000
latent_clip_sample: bool = False
latent_beta_scheduler: str = 'linear'
beta_scheduler: str = 'linear'
data_name: str = ''
data_val_name: str = None
diffusion_type: str = None
dropout: float = 0.1
ema_decay: float = 0.9999
eval_num_images: int = 5_000
eval_every_samples: int = 200_000
eval_ema_every_samples: int = 200_000
fid_use_torch: bool = True
fp16: bool = False
grad_clip: float = 1
img_size: int = 64
lr: float = 0.0001
optimizer: OptimizerType = OptimizerType.adam
weight_decay: float = 0
model_conf: ModelConfig = None
model_name: ModelName = None
model_type: ModelType = None
net_attn: Tuple[int] = None
net_beatgans_attn_head: int = 1
# not necessarily the same as the the number of style channels
net_beatgans_embed_channels: int = 512
net_resblock_updown: bool = True
net_enc_use_time: bool = False
net_enc_pool: str = 'adaptivenonzero'
net_beatgans_gradient_checkpoint: bool = False
net_beatgans_resnet_two_cond: bool = False
net_beatgans_resnet_use_zero_module: bool = True
net_beatgans_resnet_scale_at: ScaleAt = ScaleAt.after_norm
net_beatgans_resnet_cond_channels: int = None
net_ch_mult: Tuple[int] = None
net_ch: int = 64
net_enc_attn: Tuple[int] = None
net_enc_k: int = None
# number of resblocks for the encoder (half-unet)
net_enc_num_res_blocks: int = 2
net_enc_channel_mult: Tuple[int] = None
net_enc_grad_checkpoint: bool = False
net_autoenc_stochastic: bool = False
net_latent_activation: Activation = Activation.silu
net_latent_channel_mult: Tuple[int] = (1, 2, 4)
net_latent_condition_bias: float = 0
net_latent_dropout: float = 0
net_latent_layers: int = None
net_latent_net_last_act: Activation = Activation.none
net_latent_net_type: LatentNetType = LatentNetType.none
net_latent_num_hid_channels: int = 1024
net_latent_num_time_layers: int = 2
net_latent_skip_layers: Tuple[int] = None
net_latent_time_emb_channels: int = 64
net_latent_use_norm: bool = False
net_latent_time_last_act: bool = False
net_num_res_blocks: int = 2
# number of resblocks for the UNET
net_num_input_res_blocks: int = None
net_enc_num_cls: int = None
num_workers: int = 4
parallel: bool = False
postfix: str = ''
sample_size: int = 64
sample_every_samples: int = 20_000
save_every_samples: int = 100_000
style_ch: int = 512
T_eval: int = 1_000
T_sampler: str = 'uniform'
T: int = 1_000
total_samples: int = 10_000_000
warmup: int = 0
pretrain: PretrainConfig = None
continue_from: PretrainConfig = None
eval_programs: Tuple[str] = None
# if present load the checkpoint from this path instead
eval_path: str = None
base_dir: str = 'checkpoints'
use_cache_dataset: bool = False
data_cache_dir: str = os.path.expanduser('~/cache')
work_cache_dir: str = os.path.expanduser('~/mycache')
# to be overridden
name: str = ''
def __post_init__(self):
self.batch_size_eval = self.batch_size_eval or self.batch_size
self.data_val_name = self.data_val_name or self.data_name
def scale_up_gpus(self, num_gpus, num_nodes=1):
self.eval_ema_every_samples *= num_gpus * num_nodes
self.eval_every_samples *= num_gpus * num_nodes
self.sample_every_samples *= num_gpus * num_nodes
self.batch_size *= num_gpus * num_nodes
self.batch_size_eval *= num_gpus * num_nodes
return self
@property
def batch_size_effective(self):
return self.batch_size * self.accum_batches
@property
def fid_cache(self):
# we try to use the local dirs to reduce the load over network drives
# hopefully, this would reduce the disconnection problems with sshfs
return f'{self.work_cache_dir}/eval_images/{self.data_name}_size{self.img_size}_{self.eval_num_images}'
@property
def data_path(self):
# may use the cache dir
path = data_paths[self.data_name]
if self.use_cache_dataset and path is not None:
path = use_cached_dataset_path(
path, f'{self.data_cache_dir}/{self.data_name}')
return path
@property
def logdir(self):
return f'{self.base_dir}/{self.name}'
@property
def generate_dir(self):
# we try to use the local dirs to reduce the load over network drives
# hopefully, this would reduce the disconnection problems with sshfs
return f'{self.work_cache_dir}/gen_images/{self.name}'
def _make_diffusion_conf(self, T=None):
if self.diffusion_type == 'beatgans':
# can use T < self.T for evaluation
# follows the guided-diffusion repo conventions
# t's are evenly spaced
if self.beatgans_gen_type == GenerativeType.ddpm:
section_counts = [T]
elif self.beatgans_gen_type == GenerativeType.ddim:
section_counts = f'ddim{T}'
else:
raise NotImplementedError()
return SpacedDiffusionBeatGansConfig(
gen_type=self.beatgans_gen_type,
model_type=self.model_type,
betas=get_named_beta_schedule(self.beta_scheduler, self.T),
model_mean_type=self.beatgans_model_mean_type,
model_var_type=self.beatgans_model_var_type,
loss_type=self.beatgans_loss_type,
rescale_timesteps=self.beatgans_rescale_timesteps,
use_timesteps=space_timesteps(num_timesteps=self.T,
section_counts=section_counts),
fp16=self.fp16,
)
else:
raise NotImplementedError()
def _make_latent_diffusion_conf(self, T=None):
# can use T < self.T for evaluation
# follows the guided-diffusion repo conventions
# t's are evenly spaced
if self.latent_gen_type == GenerativeType.ddpm:
section_counts = [T]
elif self.latent_gen_type == GenerativeType.ddim:
section_counts = f'ddim{T}'
else:
raise NotImplementedError()
return SpacedDiffusionBeatGansConfig(
train_pred_xstart_detach=self.train_pred_xstart_detach,
gen_type=self.latent_gen_type,
# latent's model is always ddpm
model_type=ModelType.ddpm,
# latent shares the beta scheduler and full T
betas=get_named_beta_schedule(self.latent_beta_scheduler, self.T),
model_mean_type=self.latent_model_mean_type,
model_var_type=self.latent_model_var_type,
loss_type=self.latent_loss_type,
rescale_timesteps=self.latent_rescale_timesteps,
use_timesteps=space_timesteps(num_timesteps=self.T,
section_counts=section_counts),
fp16=self.fp16,
)
@property
def model_out_channels(self):
return 3
def make_T_sampler(self):
if self.T_sampler == 'uniform':
return UniformSampler(self.T)
else:
raise NotImplementedError()
def make_diffusion_conf(self):
return self._make_diffusion_conf(self.T)
def make_eval_diffusion_conf(self):
return self._make_diffusion_conf(T=self.T_eval)
def make_latent_diffusion_conf(self):
return self._make_latent_diffusion_conf(T=self.T)
def make_latent_eval_diffusion_conf(self):
# latent can have different eval T
return self._make_latent_diffusion_conf(T=self.latent_T_eval)
def make_dataset(self, path=None, **kwargs):
return LatentDataLoader(self.window_size,
self.frame_jpgs,
self.lmd_feats_prefix,
self.audio_prefix,
self.raw_audio_prefix,
self.motion_latents_prefix,
self.pose_prefix,
self.db_name,
audio_hz=self.audio_hz)
def make_loader(self,
dataset,
shuffle: bool,
num_worker: bool = None,
drop_last: bool = True,
batch_size: int = None,
parallel: bool = False):
if parallel and distributed.is_initialized():
# drop last to make sure that there is no added special indexes
sampler = DistributedSampler(dataset,
shuffle=shuffle,
drop_last=True)
else:
sampler = None
return DataLoader(
dataset,
batch_size=batch_size or self.batch_size,
sampler=sampler,
# with sampler, use the sample instead of this option
shuffle=False if sampler else shuffle,
num_workers=num_worker or self.num_workers,
pin_memory=True,
drop_last=drop_last,
multiprocessing_context=get_context('fork'),
)
def make_model_conf(self):
if self.model_name == ModelName.beatgans_ddpm:
self.model_type = ModelType.ddpm
self.model_conf = BeatGANsUNetConfig(
attention_resolutions=self.net_attn,
channel_mult=self.net_ch_mult,
conv_resample=True,
dims=2,
dropout=self.dropout,
embed_channels=self.net_beatgans_embed_channels,
image_size=self.img_size,
in_channels=3,
model_channels=self.net_ch,
num_classes=None,
num_head_channels=-1,
num_heads_upsample=-1,
num_heads=self.net_beatgans_attn_head,
num_res_blocks=self.net_num_res_blocks,
num_input_res_blocks=self.net_num_input_res_blocks,
out_channels=self.model_out_channels,
resblock_updown=self.net_resblock_updown,
use_checkpoint=self.net_beatgans_gradient_checkpoint,
use_new_attention_order=False,
resnet_two_cond=self.net_beatgans_resnet_two_cond,
resnet_use_zero_module=self.
net_beatgans_resnet_use_zero_module,
)
elif self.model_name in [
ModelName.beatgans_autoenc,
]:
cls = BeatGANsAutoencConfig
# supports both autoenc and vaeddpm
if self.model_name == ModelName.beatgans_autoenc:
self.model_type = ModelType.autoencoder
else:
raise NotImplementedError()
if self.net_latent_net_type == LatentNetType.none:
latent_net_conf = None
elif self.net_latent_net_type == LatentNetType.skip:
latent_net_conf = MLPSkipNetConfig(
num_channels=self.style_ch,
skip_layers=self.net_latent_skip_layers,
num_hid_channels=self.net_latent_num_hid_channels,
num_layers=self.net_latent_layers,
num_time_emb_channels=self.net_latent_time_emb_channels,
activation=self.net_latent_activation,
use_norm=self.net_latent_use_norm,
condition_bias=self.net_latent_condition_bias,
dropout=self.net_latent_dropout,
last_act=self.net_latent_net_last_act,
num_time_layers=self.net_latent_num_time_layers,
time_last_act=self.net_latent_time_last_act,
)
else:
raise NotImplementedError()
self.model_conf = cls(
attention_resolutions=self.net_attn,
channel_mult=self.net_ch_mult,
conv_resample=True,
dims=2,
dropout=self.dropout,
embed_channels=self.net_beatgans_embed_channels,
enc_out_channels=self.style_ch,
enc_pool=self.net_enc_pool,
enc_num_res_block=self.net_enc_num_res_blocks,
enc_channel_mult=self.net_enc_channel_mult,
enc_grad_checkpoint=self.net_enc_grad_checkpoint,
enc_attn_resolutions=self.net_enc_attn,
image_size=self.img_size,
in_channels=3,
model_channels=self.net_ch,
num_classes=None,
num_head_channels=-1,
num_heads_upsample=-1,
num_heads=self.net_beatgans_attn_head,
num_res_blocks=self.net_num_res_blocks,
num_input_res_blocks=self.net_num_input_res_blocks,
out_channels=self.model_out_channels,
resblock_updown=self.net_resblock_updown,
use_checkpoint=self.net_beatgans_gradient_checkpoint,
use_new_attention_order=False,
resnet_two_cond=self.net_beatgans_resnet_two_cond,
resnet_use_zero_module=self.
net_beatgans_resnet_use_zero_module,
latent_net_conf=latent_net_conf,
resnet_cond_channels=self.net_beatgans_resnet_cond_channels,
)
else:
raise NotImplementedError(self.model_name)
return self.model_conf
|