""" Main component: the trainer handles everything: * initializations * training * saving """ import inspect import warnings from copy import deepcopy from pathlib import Path from time import time import numpy as np from comet_ml import ExistingExperiment, Experiment warnings.simplefilter("ignore", UserWarning) import torch import torch.nn as nn from addict import Dict from torch import autograd, sigmoid, softmax from torch.cuda.amp import GradScaler, autocast from tqdm import tqdm from climategan.data import get_all_loaders, decode_segmap_merged_labels from climategan.discriminator import OmniDiscriminator, create_discriminator from climategan.eval_metrics import accuracy, mIOU from climategan.fid import compute_val_fid from climategan.fire import add_fire from climategan.generator import OmniGenerator, create_generator from climategan.logger import Logger from climategan.losses import get_losses from climategan.optim import get_optimizer from climategan.transforms import DiffTransforms from climategan.tutils import ( divide_pred, get_num_params, get_WGAN_gradient, lrgb2srgb, normalize, print_num_parameters, shuffle_batch_tuple, srgb2lrgb, tensor_to_uint8_numpy_image, vgg_preprocess, zero_grad, ) from climategan.utils import ( comet_kwargs, div_dict, find_target_size, flatten_opts, get_display_indices, get_existing_comet_id, get_latest_opts, merge, resolve, sum_dict, Timer, ) try: import torch_xla.core.xla_model as xm # type: ignore except ImportError: pass class Trainer: """Main trainer class""" def __init__(self, opts, comet_exp=None, verbose=0, device=None): """Trainer class to gather various model training procedures such as training evaluating saving and logging init: * creates an addict.Dict logger * creates logger.exp as a comet_exp experiment if `comet` arg is True * sets the device (1 GPU or CPU) Args: opts (addict.Dict): options to configure the trainer, the data, the models comet (bool, optional): whether to log the trainer with comet.ml. Defaults to False. verbose (int, optional): printing level to debug. Defaults to 0. """ super().__init__() self.opts = opts self.verbose = verbose self.logger = Logger(self) self.losses = None self.G = self.D = None self.real_val_fid_stats = None self.use_pl4m = False self.is_setup = False self.loaders = self.all_loaders = None self.exp = None self.current_mode = "train" self.diff_transforms = None self.kitti_pretrain = self.opts.train.kitti.pretrain self.pseudo_training_tasks = set(self.opts.train.pseudo.tasks) self.lr_names = {} self.base_display_images = {} self.kitty_display_images = {} self.domain_labels = {"s": 0, "r": 1} self.device = device or torch.device( "cuda:0" if torch.cuda.is_available() else "cpu" ) if isinstance(comet_exp, Experiment): self.exp = comet_exp if self.opts.train.amp: optimizers = [ self.opts.gen.opt.optimizer.lower(), self.opts.dis.opt.optimizer.lower(), ] if "extraadam" in optimizers: raise ValueError( "AMP does not work with ExtraAdam ({})".format(optimizers) ) self.grad_scaler_d = GradScaler() self.grad_scaler_g = GradScaler() # ------------------------------- # ----- Legacy Overwrites ----- # ------------------------------- if ( self.opts.gen.s.depth_feat_fusion is True or self.opts.gen.s.depth_dada_fusion is True ): self.opts.gen.s.use_dada = True @torch.no_grad() def paint_and_mask(self, image_batch, mask_batch=None, resolution="approx"): """ Paints a batch of images (or a single image with a batch dim of 1). If masks are not provided, they are inferred from the masker. Resolution can either be the train-time resolution or the closest multiple of 2 ** spade_n_up Operations performed without gradient If resolution == "approx" then the output image has the shape: (dim // 2 ** spade_n_up) * 2 ** spade_n_up, for dim in [height, width] eg: (1000, 1300) => (896, 1280) for spade_n_up = 7 If resolution == "exact" then the output image has the same shape: we first process in "approx" mode then upsample bilinear If resolution == "basic" image output shape is the train-time's (typically 640x640) If resolution == "upsample" image is inferred as "basic" and then upsampled to original size Args: image_batch (torch.Tensor): 4D batch of images to flood mask_batch (torch.Tensor, optional): Masks for the images. Defaults to None (infer with Masker). resolution (str, optional): "approx", "exact" or False Returns: torch.Tensor: N x C x H x W where H and W depend on `resolution` """ assert resolution in {"approx", "exact", "basic", "upsample"} previous_mode = self.current_mode if previous_mode == "train": self.eval_mode() if mask_batch is None: mask_batch = self.G.mask(x=image_batch) else: assert len(image_batch) == len(mask_batch) assert image_batch.shape[-2:] == mask_batch.shape[-2:] if resolution not in {"approx", "exact"}: painted = self.G.paint(mask_batch, image_batch) if resolution == "upsample": painted = nn.functional.interpolate( painted, size=image_batch.shape[-2:], mode="bilinear" ) else: # save latent shape zh = self.G.painter.z_h zw = self.G.painter.z_w # adapt latent shape to approximately keep the resolution self.G.painter.z_h = ( image_batch.shape[-2] // 2**self.opts.gen.p.spade_n_up ) self.G.painter.z_w = ( image_batch.shape[-1] // 2**self.opts.gen.p.spade_n_up ) painted = self.G.paint(mask_batch, image_batch) self.G.painter.z_h = zh self.G.painter.z_w = zw if resolution == "exact": painted = nn.functional.interpolate( painted, size=image_batch.shape[-2:], mode="bilinear" ) if previous_mode == "train": self.train_mode() return painted def _p(self, *args, **kwargs): """ verbose-dependant print util """ if self.verbose > 0: print(*args, **kwargs) @torch.no_grad() def infer_all( self, x, numpy=True, stores={}, bin_value=-1, half=False, xla=False, cloudy=True, auto_resize_640=False, ignore_event=set(), return_intermediates=False, ): """ Create a dictionary of events from a numpy or tensor, single or batch image data. stores is a dictionary of times for the Timer class. bin_value is used to binarize (or not) flood masks all values in the output dictionary have 4 dimensions: BxHxWxC if numpy else BxCxHxW """ assert self.is_setup assert len(x.shape) in {3, 4}, f"Unknown Data shape {x.shape}" # convert numpy to tensor if not isinstance(x, torch.Tensor): x = torch.tensor(x, device=self.device) # add batch dimension if len(x.shape) == 3: x.unsqueeze_(0) # permute channels as second dimension if x.shape[1] != 3: assert x.shape[-1] == 3, f"Unknown x shape to permute {x.shape}" x = x.permute(0, 3, 1, 2) # send to device if x.device != self.device: x = x.to(self.device) # interpolate to standard input size if auto_resize_640 and (x.shape[-1] != 640 or x.shape[-2] != 640): x = torch.nn.functional.interpolate(x, (640, 640), mode="bilinear") if half: x = x.half() # adjust painter's latent vector self.G.painter.set_latent_shape(x.shape, True) with Timer(store=stores.get("all events", [])): # encode with Timer(store=stores.get("encode", [])): z = self.G.encode(x) if xla: xm.mark_step() # predict from masker with Timer(store=stores.get("depth", [])): depth, z_depth = self.G.decoders["d"](z) if xla: xm.mark_step() with Timer(store=stores.get("segmentation", [])): segmentation = self.G.decoders["s"](z, z_depth) if xla: xm.mark_step() with Timer(store=stores.get("mask", [])): cond = self.G.make_m_cond(depth, segmentation, x) mask = self.G.mask(z=z, cond=cond, z_depth=z_depth) if xla: xm.mark_step() # apply events if "wildfire" not in ignore_event: with Timer(store=stores.get("wildfire", [])): wildfire = self.compute_fire(x, seg_preds=segmentation) if "smog" not in ignore_event: with Timer(store=stores.get("smog", [])): smog = self.compute_smog(x, d=depth, s=segmentation) if "flood" not in ignore_event: with Timer(store=stores.get("flood", [])): flood = self.compute_flood( x, m=mask, s=segmentation, cloudy=cloudy, bin_value=bin_value, ) if xla: xm.mark_step() output_data = {} if numpy: with Timer(store=stores.get("numpy", [])): if "flood" not in ignore_event: # normalize to 0-1 flood = tensor_to_uint8_numpy_image(flood) # convert to 0-255 uint8 output_data["flood"] = flood if "wildfire" not in ignore_event: wildfire = tensor_to_uint8_numpy_image(wildfire) output_data["wildfire"] = wildfire if "smog" not in ignore_event: smog = tensor_to_uint8_numpy_image(smog) output_data["smog"] = smog if return_intermediates: if numpy: output_data["mask"] = ( ((mask > bin_value) * 255).cpu().numpy().astype(np.uint8) ) output_data["depth"] = tensor_to_uint8_numpy_image(depth) output_data["segmentation"] = ( decode_segmap_merged_labels(segmentation, "r", False) .cpu() .permute(0, 2, 3, 1) .numpy() .astype(np.uint8) ) else: output_data["mask"] = mask output_data["depth"] = depth output_data["segmentation"] = segmentation return output_data @classmethod def resume_from_path( cls, path, overrides={}, setup=True, inference=False, new_exp=False, device=None, verbose=1, ): """ Resume and optionally setup a trainer from a specific path, using the latest opts and checkpoint. Requires path to contain opts.yaml (or increased), url.txt (or increased) and checkpoints/ Args: path (str | pathlib.Path): Trainer to resume overrides (dict, optional): Override loaded opts with those. Defaults to {}. setup (bool, optional): Wether or not to setup the trainer before returning it. Defaults to True. inference (bool, optional): Setup should be done in inference mode or not. Defaults to False. new_exp (bool, optional): Re-use existing comet exp in path or create a new one? Defaults to False. device (torch.device, optional): Device to use Returns: climategan.Trainer: Loaded and resumed trainer """ p = resolve(path) assert p.exists() c = p / "checkpoints" assert c.exists() and c.is_dir() opts = get_latest_opts(p) opts = Dict(merge(overrides, opts)) opts.train.resume = True if new_exp is None: exp = None elif new_exp is True: exp = Experiment(project_name="climategan", **comet_kwargs) exp.log_asset_folder( str(resolve(Path(__file__)).parent), recursive=True, log_file_name=True, ) exp.log_parameters(flatten_opts(opts)) else: comet_id = get_existing_comet_id(p) exp = ExistingExperiment(previous_experiment=comet_id, **comet_kwargs) trainer = cls(opts, comet_exp=exp, device=device, verbose=verbose) if setup: trainer.setup(inference=inference) return trainer def save(self): save_dir = Path(self.opts.output_path) / Path("checkpoints") save_dir.mkdir(exist_ok=True) save_path = save_dir / "latest_ckpt.pth" # Construct relevant state dicts / optims: # Save at least G save_dict = { "epoch": self.logger.epoch, "G": self.G.state_dict(), "g_opt": self.g_opt.state_dict(), "step": self.logger.global_step, } if self.D is not None and get_num_params(self.D) > 0: save_dict["D"] = self.D.state_dict() save_dict["d_opt"] = self.d_opt.state_dict() if ( self.logger.epoch >= self.opts.train.min_save_epoch and self.logger.epoch % self.opts.train.save_n_epochs == 0 ): torch.save(save_dict, save_dir / f"epoch_{self.logger.epoch}_ckpt.pth") torch.save(save_dict, save_path) def resume(self, inference=False): tpu = "xla" in str(self.device) if tpu: print("Resuming on TPU:", self.device) m_path = Path(self.opts.load_paths.m) p_path = Path(self.opts.load_paths.p) pm_path = Path(self.opts.load_paths.pm) output_path = Path(self.opts.output_path) map_loc = self.device if not tpu else "cpu" if "m" in self.opts.tasks and "p" in self.opts.tasks: # ---------------------------------------- # ----- Masker and Painter Loading ----- # ---------------------------------------- # want to resume a pm model but no path was provided: # resume a single pm model from output_path if all([str(p) == "none" for p in [m_path, p_path, pm_path]]): checkpoint_path = output_path / "checkpoints/latest_ckpt.pth" print("Resuming P+M model from", str(checkpoint_path)) checkpoint = torch.load(checkpoint_path, map_location=map_loc) # want to resume a pm model with a pm_path provided: # resume a single pm model from load_paths.pm # depending on whether a dir or a file is specified elif str(pm_path) != "none": assert pm_path.exists() if pm_path.is_dir(): checkpoint_path = pm_path / "checkpoints/latest_ckpt.pth" else: assert pm_path.suffix == ".pth" checkpoint_path = pm_path print("Resuming P+M model from", str(checkpoint_path)) checkpoint = torch.load(checkpoint_path, map_location=map_loc) # want to resume a pm model, pm_path not provided: # m_path and p_path must be provided as dirs or pth files elif m_path != p_path: assert m_path.exists() assert p_path.exists() if m_path.is_dir(): m_path = m_path / "checkpoints/latest_ckpt.pth" if p_path.is_dir(): p_path = p_path / "checkpoints/latest_ckpt.pth" assert m_path.suffix == ".pth" assert p_path.suffix == ".pth" print(f"Resuming P+M model from \n -{p_path} \nand \n -{m_path}") m_checkpoint = torch.load(m_path, map_location=map_loc) p_checkpoint = torch.load(p_path, map_location=map_loc) checkpoint = merge(m_checkpoint, p_checkpoint) else: raise ValueError( "Cannot resume a P+M model with provided load_paths:\n{}".format( self.opts.load_paths ) ) else: # ---------------------------------- # ----- Single Model Loading ----- # ---------------------------------- # cannot specify both paths if str(m_path) != "none" and str(p_path) != "none": raise ValueError( "Opts tasks are {} but received 2 values for the load_paths".format( self.opts.tasks ) ) # specified m elif str(m_path) != "none": assert m_path.exists() assert "m" in self.opts.tasks model = "M" if m_path.is_dir(): m_path = m_path / "checkpoints/latest_ckpt.pth" checkpoint_path = m_path # specified m elif str(p_path) != "none": assert p_path.exists() assert "p" in self.opts.tasks model = "P" if p_path.is_dir(): p_path = p_path / "checkpoints/latest_ckpt.pth" checkpoint_path = p_path # specified neither p nor m: resume from output_path else: model = "P" if "p" in self.opts.tasks else "M" checkpoint_path = output_path / "checkpoints/latest_ckpt.pth" print(f"Resuming {model} model from {checkpoint_path}") checkpoint = torch.load(checkpoint_path, map_location=map_loc) # On TPUs must send the data to the xla device as it cannot be mapped # there directly from torch.load if tpu: checkpoint = xm.send_cpu_data_to_device(checkpoint, self.device) # ----------------------- # ----- Restore G ----- # ----------------------- if inference: incompatible_keys = self.G.load_state_dict(checkpoint["G"], strict=False) if incompatible_keys.missing_keys: print("WARNING: Missing keys in self.G.load_state_dict, keeping inits") print(incompatible_keys.missing_keys) if incompatible_keys.unexpected_keys: print("WARNING: Ignoring Unexpected keys in self.G.load_state_dict") print(incompatible_keys.unexpected_keys) else: self.G.load_state_dict(checkpoint["G"]) if inference: # only G is needed to infer print("Done loading checkpoints.") return self.g_opt.load_state_dict(checkpoint["g_opt"]) # ------------------------------ # ----- Resume scheduler ----- # ------------------------------ # https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822 for _ in range(self.logger.epoch + 1): self.update_learning_rates() # ----------------------- # ----- Restore D ----- # ----------------------- if self.D is not None and get_num_params(self.D) > 0: self.D.load_state_dict(checkpoint["D"]) self.d_opt.load_state_dict(checkpoint["d_opt"]) # --------------------------- # ----- Resore logger ----- # --------------------------- self.logger.epoch = checkpoint["epoch"] self.logger.global_step = checkpoint["step"] self.exp.log_text( "Resuming from epoch {} & step {}".format( checkpoint["epoch"], checkpoint["step"] ) ) # Round step to even number for extraGradient if self.logger.global_step % 2 != 0: self.logger.global_step += 1 def eval_mode(self): """ Set trainer's models in eval mode """ if self.G is not None: self.G.eval() if self.D is not None: self.D.eval() self.current_mode = "eval" def train_mode(self): """ Set trainer's models in train mode """ if self.G is not None: self.G.train() if self.D is not None: self.D.train() self.current_mode = "train" def assert_z_matches_x(self, x, z): assert x.shape[0] == ( z.shape[0] if not isinstance(z, (list, tuple)) else z[0].shape[0] ), "x-> {}, z->{}".format( x.shape, z.shape if not isinstance(z, (list, tuple)) else z[0].shape ) def batch_to_device(self, b): """sends the data in b to self.device Args: b (dict): the batch dictionnay Returns: dict: the batch dictionnary with its "data" field sent to self.device """ for task, tensor in b["data"].items(): b["data"][task] = tensor.to(self.device) return b def sample_painter_z(self, batch_size): return self.G.sample_painter_z(batch_size, self.device) @property def train_loaders(self): """Get a zip of all training loaders Returns: generator: zip generator yielding tuples: (batch_rf, batch_rn, batch_sf, batch_sn) """ return zip(*list(self.loaders["train"].values())) @property def val_loaders(self): """Get a zip of all validation loaders Returns: generator: zip generator yielding tuples: (batch_rf, batch_rn, batch_sf, batch_sn) """ return zip(*list(self.loaders["val"].values())) def compute_latent_shape(self): """Compute the latent shape, i.e. the Encoder's output shape, from a batch. Raises: ValueError: If no loader, the latent_shape cannot be inferred Returns: tuple: (c, h, w) """ x = None for mode in self.all_loaders: for domain in self.all_loaders.loaders[mode]: x = ( self.all_loaders[mode][domain] .dataset[0]["data"]["x"] .to(self.device) ) break if x is not None: break if x is None: raise ValueError("No batch found to compute_latent_shape") x = x.unsqueeze(0) z = self.G.encode(x) return z.shape[1:] if not isinstance(z, (list, tuple)) else z[0].shape[1:] def g_opt_step(self): """Run an optimizing step ; if using ExtraAdam, there needs to be an extrapolation step every other step """ if "extra" in self.opts.gen.opt.optimizer.lower() and ( self.logger.global_step % 2 == 0 ): self.g_opt.extrapolation() else: self.g_opt.step() def d_opt_step(self): """Run an optimizing step ; if using ExtraAdam, there needs to be an extrapolation step every other step """ if "extra" in self.opts.dis.opt.optimizer.lower() and ( self.logger.global_step % 2 == 0 ): self.d_opt.extrapolation() else: self.d_opt.step() def update_learning_rates(self): if self.g_scheduler is not None: self.g_scheduler.step() if self.d_scheduler is not None: self.d_scheduler.step() def setup(self, inference=False): """Prepare the trainer before it can be used to train the models: * initialize G and D * creates 2 optimizers """ self.logger.global_step = 0 start_time = time() self.logger.time.start_time = start_time verbose = self.verbose if not inference: self.all_loaders = get_all_loaders(self.opts) # ----------------------- # ----- Generator ----- # ----------------------- __t = time() print("Creating generator...") self.G: OmniGenerator = create_generator( self.opts, device=self.device, no_init=inference, verbose=verbose ) self.has_painter = get_num_params(self.G.painter) or self.G.load_val_painter() if self.has_painter: self.G.painter.set_latent_shape(find_target_size(self.opts, "x"), True) print(f"Generator OK in {time() - __t:.1f}s.") if inference: # Inference mode: no more than a Generator needed print("Inference mode: no Discriminator, no optimizers") print_num_parameters(self) self.switch_data(to="base") if self.opts.train.resume: self.resume(True) self.eval_mode() print("Trainer is in evaluation mode.") print("Setup done.") self.is_setup = True return # --------------------------- # ----- Discriminator ----- # --------------------------- self.D: OmniDiscriminator = create_discriminator( self.opts, self.device, verbose=verbose ) print("Discriminator OK.") print_num_parameters(self) # -------------------------- # ----- Optimization ----- # -------------------------- # Get different optimizers for each task (different learning rates) self.g_opt, self.g_scheduler, self.lr_names["G"] = get_optimizer( self.G, self.opts.gen.opt, self.opts.tasks ) if get_num_params(self.D) > 0: self.d_opt, self.d_scheduler, self.lr_names["D"] = get_optimizer( self.D, self.opts.dis.opt, self.opts.tasks, True ) else: self.d_opt, self.d_scheduler = None, None self.losses = get_losses(self.opts, verbose, device=self.device) if "p" in self.opts.tasks and self.opts.gen.p.diff_aug.use: self.diff_transforms = DiffTransforms(self.opts.gen.p.diff_aug) if verbose > 0: for mode, mode_dict in self.all_loaders.items(): for domain, domain_loader in mode_dict.items(): print( "Loader {} {} : {}".format( mode, domain, len(domain_loader.dataset) ) ) # ---------------------------- # ----- Display images ----- # ---------------------------- self.set_display_images() # ------------------------------- # ----- Log Architectures ----- # ------------------------------- self.logger.log_architecture() # ----------------------------- # ----- Set data source ----- # ----------------------------- if self.kitti_pretrain: self.switch_data(to="kitti") else: self.switch_data(to="base") # ------------------------- # ----- Setup Done. ----- # ------------------------- print(" " * 50, end="\r") print("Done creating display images") if self.opts.train.resume: print("Resuming Model (inference: False)") self.resume(False) else: print("Not resuming: starting a new model") print("Setup done.") self.is_setup = True def switch_data(self, to="kitti"): caller = inspect.stack()[1].function print(f"[{caller}] Switching data source to", to) self.data_source = to if to == "kitti": self.display_images = self.kitty_display_images if self.all_loaders is not None: self.loaders = { mode: {"s": self.all_loaders[mode]["kitti"]} for mode in self.all_loaders } else: self.display_images = self.base_display_images if self.all_loaders is not None: self.loaders = { mode: { domain: self.all_loaders[mode][domain] for domain in self.all_loaders[mode] if domain != "kitti" } for mode in self.all_loaders } if ( self.logger.global_step % 2 != 0 and "extra" in self.opts.dis.opt.optimizer.lower() ): print( "Warning: artificially bumping step to run an extrapolation step first." ) self.logger.global_step += 1 def set_display_images(self, use_all=False): for mode, mode_dict in self.all_loaders.items(): if self.kitti_pretrain: self.kitty_display_images[mode] = {} self.base_display_images[mode] = {} for domain in mode_dict: if self.kitti_pretrain and domain == "kitti": target_dict = self.kitty_display_images else: if domain == "kitti": continue target_dict = self.base_display_images dataset = self.all_loaders[mode][domain].dataset display_indices = ( get_display_indices(self.opts, domain, len(dataset)) if not use_all else list(range(len(dataset))) ) ldis = len(display_indices) print( f" Creating {ldis} {mode} {domain} display images...", end="\r", flush=True, ) target_dict[mode][domain] = [ Dict(dataset[i]) for i in display_indices if (print(f"({i})", end="\r") is None and i < len(dataset)) ] if self.exp is not None: for im_id, d in enumerate(target_dict[mode][domain]): self.exp.log_parameter( "display_image_{}_{}_{}".format(mode, domain, im_id), d["paths"], ) def train(self): """For each epoch: * train * eval * save """ assert self.is_setup for self.logger.epoch in range( self.logger.epoch, self.logger.epoch + self.opts.train.epochs ): # backprop painter's disc loss to masker if ( self.logger.epoch == self.opts.gen.p.pl4m_epoch and get_num_params(self.G.painter) > 0 and "p" in self.opts.tasks and self.opts.gen.m.use_pl4m ): print( "\n\n >>> Enabling pl4m at epoch {}\n\n".format(self.logger.epoch) ) self.use_pl4m = True self.run_epoch() self.run_evaluation(verbose=1) self.save() # end vkitti2 pre-training if self.logger.epoch == self.opts.train.kitti.epochs - 1: self.switch_data(to="base") self.kitti_pretrain = False # end pseudo training if self.logger.epoch == self.opts.train.pseudo.epochs - 1: self.pseudo_training_tasks = set() def run_epoch(self): """Runs an epoch: * checks trainer is setup * gets a tuple of batches per domain * sends batches to device * updates sequentially G, D """ assert self.is_setup self.train_mode() if self.exp is not None: self.exp.log_parameter("epoch", self.logger.epoch) epoch_len = min(len(loader) for loader in self.loaders["train"].values()) epoch_desc = "Epoch {}".format(self.logger.epoch) self.logger.time.epoch_start = time() for multi_batch_tuple in tqdm( self.train_loaders, desc=epoch_desc, total=epoch_len, mininterval=0.5, unit="batch", ): self.logger.time.step_start = time() multi_batch_tuple = shuffle_batch_tuple(multi_batch_tuple) # The `[0]` is because the domain is contained in a list multi_domain_batch = { batch["domain"][0]: self.batch_to_device(batch) for batch in multi_batch_tuple } # ------------------------------ # ----- Update Generator ----- # ------------------------------ # freeze params of the discriminator if self.d_opt is not None: for param in self.D.parameters(): param.requires_grad = False self.update_G(multi_domain_batch) # ---------------------------------- # ----- Update Discriminator ----- # ---------------------------------- # unfreeze params of the discriminator if self.d_opt is not None and not self.kitti_pretrain: for param in self.D.parameters(): param.requires_grad = True self.update_D(multi_domain_batch) # ------------------------- # ----- Log Metrics ----- # ------------------------- self.logger.global_step += 1 self.logger.log_step_time(time()) if not self.kitti_pretrain: self.update_learning_rates() self.logger.log_learning_rates() self.logger.log_epoch_time(time()) def update_G(self, multi_domain_batch, verbose=0): """Perform an update on g from multi_domain_batch which is a dictionary domain => batch * automatic mixed precision according to self.opts.train.amp * compute loss for each task * loss.backward() * g_opt_step() * g_opt.step() or .extrapolation() depending on self.logger.global_step * logs losses on comet.ml with self.logger.log_losses(model_to_update="G") Args: multi_domain_batch (dict): dictionnary of domain batches """ zero_grad(self.G) if self.opts.train.amp: with autocast(): g_loss = self.get_G_loss(multi_domain_batch, verbose) self.grad_scaler_g.scale(g_loss).backward() self.grad_scaler_g.step(self.g_opt) self.grad_scaler_g.update() else: g_loss = self.get_G_loss(multi_domain_batch, verbose) g_loss.backward() self.g_opt_step() self.logger.log_losses(model_to_update="G", mode="train") def update_D(self, multi_domain_batch, verbose=0): zero_grad(self.D) if self.opts.train.amp: with autocast(): d_loss = self.get_D_loss(multi_domain_batch, verbose) self.grad_scaler_d.scale(d_loss).backward() self.grad_scaler_d.step(self.d_opt) self.grad_scaler_d.update() else: d_loss = self.get_D_loss(multi_domain_batch, verbose) d_loss.backward() self.d_opt_step() self.logger.losses.disc.total_loss = d_loss.item() self.logger.log_losses(model_to_update="D", mode="train") def get_D_loss(self, multi_domain_batch, verbose=0): """Compute the discriminators' losses: * for each domain-specific batch: * encode the image * get the conditioning tensor if using spade * source domain is the data's domain, sequentially r|s then f|n * get the target domain accordingly * compute the translated image from the data * compute the source domain discriminator's loss on the data * compute the target domain discriminator's loss on the translated image # ? In this setting, each D[decoder][domain] is updated twice towards # real or fake data See readme's update d section for details Args: multi_domain_batch ([type]): [description] Returns: [type]: [description] """ disc_loss = { "m": {"Advent": 0}, "s": {"Advent": 0}, } if self.opts.dis.p.use_local_discriminator: disc_loss["p"] = {"global": 0, "local": 0} else: disc_loss["p"] = {"gan": 0} for domain, batch in multi_domain_batch.items(): x = batch["data"]["x"] # --------------------- # ----- Painter ----- # --------------------- if domain == "rf" and self.has_painter: m = batch["data"]["m"] # sample vector with torch.no_grad(): # see spade compute_discriminator_loss fake = self.G.paint(m, x) if self.opts.gen.p.diff_aug.use: fake = self.diff_transforms(fake) x = self.diff_transforms(x) fake = fake.detach() fake.requires_grad_() if self.opts.dis.p.use_local_discriminator: fake_d_global = self.D["p"]["global"](fake) real_d_global = self.D["p"]["global"](x) fake_d_local = self.D["p"]["local"](fake * m) real_d_local = self.D["p"]["local"](x * m) global_loss = self.losses["D"]["p"](fake_d_global, False, True) global_loss += self.losses["D"]["p"](real_d_global, True, True) local_loss = self.losses["D"]["p"](fake_d_local, False, True) local_loss += self.losses["D"]["p"](real_d_local, True, True) disc_loss["p"]["global"] += global_loss disc_loss["p"]["local"] += local_loss else: real_cat = torch.cat([m, x], axis=1) fake_cat = torch.cat([m, fake], axis=1) real_fake_cat = torch.cat([real_cat, fake_cat], dim=0) real_fake_d = self.D["p"](real_fake_cat) real_d, fake_d = divide_pred(real_fake_d) disc_loss["p"]["gan"] = self.losses["D"]["p"](fake_d, False, True) disc_loss["p"]["gan"] += self.losses["D"]["p"](real_d, True, True) # -------------------- # ----- Masker ----- # -------------------- else: z = self.G.encode(x) s_pred = d_pred = cond = z_depth = None if "s" in batch["data"]: if "d" in self.opts.tasks and self.opts.gen.s.use_dada: d_pred, z_depth = self.G.decoders["d"](z) step_loss, s_pred = self.masker_s_loss( x, z, d_pred, z_depth, None, domain, for_="D" ) step_loss *= self.opts.train.lambdas.advent.adv_main disc_loss["s"]["Advent"] += step_loss if "m" in batch["data"]: if "d" in self.opts.tasks: if self.opts.gen.m.use_spade: if d_pred is None: d_pred, z_depth = self.G.decoders["d"](z) cond = self.G.make_m_cond(d_pred, s_pred, x) elif self.opts.gen.m.use_dada: if d_pred is None: d_pred, z_depth = self.G.decoders["d"](z) step_loss, _ = self.masker_m_loss( x, z, None, domain, for_="D", cond=cond, z_depth=z_depth, depth_preds=d_pred, ) step_loss *= self.opts.train.lambdas.advent.adv_main disc_loss["m"]["Advent"] += step_loss self.logger.losses.disc.update( { dom: { k: v.item() if isinstance(v, torch.Tensor) else v for k, v in d.items() } for dom, d in disc_loss.items() } ) loss = sum(v for d in disc_loss.values() for k, v in d.items()) return loss def get_G_loss(self, multi_domain_batch, verbose=0): m_loss = p_loss = None # For now, always compute "representation loss" g_loss = 0 if any(t in self.opts.tasks for t in "msd"): m_loss = self.get_masker_loss(multi_domain_batch) self.logger.losses.gen.masker = m_loss.item() g_loss += m_loss if "p" in self.opts.tasks and not self.kitti_pretrain: p_loss = self.get_painter_loss(multi_domain_batch) self.logger.losses.gen.painter = p_loss.item() g_loss += p_loss assert g_loss != 0 and not isinstance(g_loss, int), "No update in get_G_loss!" self.logger.losses.gen.total_loss = g_loss.item() return g_loss def get_masker_loss(self, multi_domain_batch): # TODO update docstrings """Only update the representation part of the model, meaning everything but the translation part * for each batch in available domains: * compute task-specific losses * compute the adaptation and translation decoders' auto-encoding losses * compute the adaptation decoder's translation losses (GAN and Cycle) Args: multi_domain_batch (dict): dictionnary mapping domain names to batches from the trainer's loaders Returns: torch.Tensor: scalar loss tensor, weighted according to opts.train.lambdas """ m_loss = 0 for domain, batch in multi_domain_batch.items(): # We don't care about the flooded domain here if domain == "rf": continue x = batch["data"]["x"] z = self.G.encode(x) # -------------------------------------- # ----- task-specific losses (2) ----- # -------------------------------------- d_pred = s_pred = z_depth = None for task in ["d", "s", "m"]: if task not in batch["data"]: continue target = batch["data"][task] if task == "d": loss, d_pred, z_depth = self.masker_d_loss( x, z, target, domain, "G" ) m_loss += loss self.logger.losses.gen.task["d"][domain] = loss.item() elif task == "s": loss, s_pred = self.masker_s_loss( x, z, d_pred, z_depth, target, domain, "G" ) m_loss += loss self.logger.losses.gen.task["s"][domain] = loss.item() elif task == "m": cond = None if self.opts.gen.m.use_spade: if not self.opts.gen.m.detach: d_pred = d_pred.clone() s_pred = s_pred.clone() cond = self.G.make_m_cond(d_pred, s_pred, x) loss, _ = self.masker_m_loss( x, z, target, domain, "G", cond=cond, z_depth=z_depth, depth_preds=d_pred, ) m_loss += loss self.logger.losses.gen.task["m"][domain] = loss.item() return m_loss def get_painter_loss(self, multi_domain_batch): """Computes the translation loss when flooding/deflooding images Args: multi_domain_batch (dict): dictionnary mapping domain names to batches from the trainer's loaders Returns: torch.Tensor: scalar loss tensor, weighted according to opts.train.lambdas """ step_loss = 0 # self.g_opt.zero_grad() lambdas = self.opts.train.lambdas batch_domain = "rf" batch = multi_domain_batch[batch_domain] x = batch["data"]["x"] # ! different mask: hides water to be reconstructed # ! 1 for water, 0 otherwise m = batch["data"]["m"] fake_flooded = self.G.paint(m, x) # ---------------------- # ----- VGG Loss ----- # ---------------------- if lambdas.G.p.vgg != 0: loss = self.losses["G"]["p"]["vgg"]( vgg_preprocess(fake_flooded * m), vgg_preprocess(x * m) ) loss *= lambdas.G.p.vgg self.logger.losses.gen.p.vgg = loss.item() step_loss += loss # --------------------- # ----- TV Loss ----- # --------------------- if lambdas.G.p.tv != 0: loss = self.losses["G"]["p"]["tv"](fake_flooded * m) loss *= lambdas.G.p.tv self.logger.losses.gen.p.tv = loss.item() step_loss += loss # -------------------------- # ----- Context Loss ----- # -------------------------- if lambdas.G.p.context != 0: loss = self.losses["G"]["p"]["context"](fake_flooded, x, m) loss *= lambdas.G.p.context self.logger.losses.gen.p.context = loss.item() step_loss += loss # --------------------------------- # ----- Reconstruction Loss ----- # --------------------------------- if lambdas.G.p.reconstruction != 0: loss = self.losses["G"]["p"]["reconstruction"](fake_flooded, x, m) loss *= lambdas.G.p.reconstruction self.logger.losses.gen.p.reconstruction = loss.item() step_loss += loss # ------------------------------------- # ----- Local & Global GAN Loss ----- # ------------------------------------- if self.opts.gen.p.diff_aug.use: fake_flooded = self.diff_transforms(fake_flooded) x = self.diff_transforms(x) if self.opts.dis.p.use_local_discriminator: fake_d_global = self.D["p"]["global"](fake_flooded) fake_d_local = self.D["p"]["local"](fake_flooded * m) real_d_global = self.D["p"]["global"](x) # Note: discriminator returns [out_1,...,out_num_D] outputs # Each out_i is a list [feat1, feat2, ..., pred_i] self.logger.losses.gen.p.gan = 0 loss = self.losses["G"]["p"]["gan"](fake_d_global, True, False) loss += self.losses["G"]["p"]["gan"](fake_d_local, True, False) loss *= lambdas.G["p"]["gan"] self.logger.losses.gen.p.gan = loss.item() step_loss += loss # ----------------------------------- # ----- Feature Matching Loss ----- # ----------------------------------- # (only on global discriminator) # Order must be real, fake if self.opts.dis.p.get_intermediate_features: loss = self.losses["G"]["p"]["featmatch"](real_d_global, fake_d_global) loss *= lambdas.G["p"]["featmatch"] if isinstance(loss, float): self.logger.losses.gen.p.featmatch = loss else: self.logger.losses.gen.p.featmatch = loss.item() step_loss += loss # ------------------------------------------- # ----- Single Discriminator GAN Loss ----- # ------------------------------------------- else: real_cat = torch.cat([m, x], axis=1) fake_cat = torch.cat([m, fake_flooded], axis=1) real_fake_cat = torch.cat([real_cat, fake_cat], dim=0) real_fake_d = self.D["p"](real_fake_cat) real_d, fake_d = divide_pred(real_fake_d) loss = self.losses["G"]["p"]["gan"](fake_d, True, False) self.logger.losses.gen.p.gan = loss.item() step_loss += loss # ----------------------------------- # ----- Feature Matching Loss ----- # ----------------------------------- if self.opts.dis.p.get_intermediate_features and lambdas.G.p.featmatch != 0: loss = self.losses["G"]["p"]["featmatch"](real_d, fake_d) loss *= lambdas.G.p.featmatch if isinstance(loss, float): self.logger.losses.gen.p.featmatch = loss else: self.logger.losses.gen.p.featmatch = loss.item() step_loss += loss return step_loss def masker_d_loss(self, x, z, target, domain, for_="G"): assert for_ in {"G", "D"} self.assert_z_matches_x(x, z) assert x.shape[0] == target.shape[0] zero_loss = torch.tensor(0.0, device=self.device) weight = self.opts.train.lambdas.G.d.main prediction, z_depth = self.G.decoders["d"](z) if self.opts.gen.d.classify.enable: target.squeeze_(1) full_loss = self.losses["G"]["tasks"]["d"](prediction, target) full_loss *= weight if weight == 0 or (domain == "r" and "d" not in self.pseudo_training_tasks): return zero_loss, prediction, z_depth return full_loss, prediction, z_depth def masker_s_loss(self, x, z, depth_preds, z_depth, target, domain, for_="G"): assert for_ in {"G", "D"} assert domain in {"r", "s"} self.assert_z_matches_x(x, z) assert x.shape[0] == target.shape[0] if target is not None else True full_loss = torch.tensor(0.0, device=self.device) softmax_preds = None # -------------------------- # ----- Segmentation ----- # -------------------------- pred = None if for_ == "G" or self.opts.gen.s.use_advent: pred = self.G.decoders["s"](z, z_depth) # Supervised segmentation loss: crossent for sim domain, # crossent_pseudo for real ; loss is crossent in any case if for_ == "G": if domain == "s" or "s" in self.pseudo_training_tasks: if domain == "s": logger = self.logger.losses.gen.task["s"]["crossent"] weight = self.opts.train.lambdas.G["s"]["crossent"] else: logger = self.logger.losses.gen.task["s"]["crossent_pseudo"] weight = self.opts.train.lambdas.G["s"]["crossent_pseudo"] if weight != 0: # Cross-Entropy loss loss_func = self.losses["G"]["tasks"]["s"]["crossent"] loss = loss_func(pred, target.squeeze(1)) loss *= weight full_loss += loss logger[domain] = loss.item() if domain == "r": weight = self.opts.train.lambdas.G["s"]["minent"] if self.opts.gen.s.use_minent and weight != 0: softmax_preds = softmax(pred, dim=1) # Entropy minimization loss loss = self.losses["G"]["tasks"]["s"]["minent"](softmax_preds) loss *= weight full_loss += loss self.logger.losses.gen.task["s"]["minent"]["r"] = loss.item() # Fool ADVENT discriminator if self.opts.gen.s.use_advent: if self.opts.gen.s.use_dada and depth_preds is not None: depth_preds = depth_preds.detach() else: depth_preds = None if for_ == "D": domain_label = domain logger = {} loss_func = self.losses["D"]["advent"] pred = pred.detach() weight = self.opts.train.lambdas.advent.adv_main else: domain_label = "s" logger = self.logger.losses.gen.task["s"]["advent"] loss_func = self.losses["G"]["tasks"]["s"]["advent"] weight = self.opts.train.lambdas.G["s"]["advent"] if (for_ == "D" or domain == "r") and weight != 0: if softmax_preds is None: softmax_preds = softmax(pred, dim=1) loss = loss_func( softmax_preds, self.domain_labels[domain_label], self.D["s"]["Advent"], depth_preds, ) loss *= weight full_loss += loss logger[domain] = loss.item() if for_ == "D": # WGAN: clipping or GP if self.opts.dis.s.gan_type == "GAN" or "WGAN_norm": pass elif self.opts.dis.s.gan_type == "WGAN": for p in self.D["s"]["Advent"].parameters(): p.data.clamp_( self.opts.dis.s.wgan_clamp_lower, self.opts.dis.s.wgan_clamp_upper, ) elif self.opts.dis.s.gan_type == "WGAN_gp": prob_need_grad = autograd.Variable(pred, requires_grad=True) d_out = self.D["s"]["Advent"](prob_need_grad) gp = get_WGAN_gradient(prob_need_grad, d_out) gp_loss = gp * self.opts.train.lambdas.advent.WGAN_gp full_loss += gp_loss else: raise NotImplementedError return full_loss, pred def masker_m_loss( self, x, z, target, domain, for_="G", cond=None, z_depth=None, depth_preds=None ): assert for_ in {"G", "D"} assert domain in {"r", "s"} self.assert_z_matches_x(x, z) assert x.shape[0] == target.shape[0] if target is not None else True full_loss = torch.tensor(0.0, device=self.device) pred_logits = self.G.decoders["m"](z, cond=cond, z_depth=z_depth) pred_prob = sigmoid(pred_logits) pred_prob_complementary = 1 - pred_prob prob = torch.cat([pred_prob, pred_prob_complementary], dim=1) if for_ == "G": # TV loss weight = self.opts.train.lambdas.G.m.tv if weight != 0: loss = self.losses["G"]["tasks"]["m"]["tv"](pred_prob) loss *= weight full_loss += loss self.logger.losses.gen.task["m"]["tv"][domain] = loss.item() weight = self.opts.train.lambdas.G.m.bce if domain == "s" and weight != 0: # CrossEnt Loss loss = self.losses["G"]["tasks"]["m"]["bce"](pred_logits, target) loss *= weight full_loss += loss self.logger.losses.gen.task["m"]["bce"]["s"] = loss.item() if domain == "r": weight = self.opts.train.lambdas.G["m"]["gi"] if self.opts.gen.m.use_ground_intersection and weight != 0: # GroundIntersection loss loss = self.losses["G"]["tasks"]["m"]["gi"](pred_prob, target) loss *= weight full_loss += loss self.logger.losses.gen.task["m"]["gi"]["r"] = loss.item() weight = self.opts.train.lambdas.G.m.pl4m if self.use_pl4m and weight != 0: # Painter loss pl4m_loss = self.painter_loss_for_masker(x, pred_prob) pl4m_loss *= weight full_loss += pl4m_loss self.logger.losses.gen.task.m.pl4m.r = pl4m_loss.item() weight = self.opts.train.lambdas.advent.ent_main if self.opts.gen.m.use_minent and weight != 0: # MinEnt loss loss = self.losses["G"]["tasks"]["m"]["minent"](prob) loss *= weight full_loss += loss self.logger.losses.gen.task["m"]["minent"]["r"] = loss.item() if self.opts.gen.m.use_advent: # AdvEnt loss if self.opts.gen.m.use_dada and depth_preds is not None: depth_preds = depth_preds.detach() depth_preds = torch.nn.functional.interpolate( depth_preds, size=x.shape[-2:], mode="nearest" ) else: depth_preds = None if for_ == "D": domain_label = domain logger = {} loss_func = self.losses["D"]["advent"] prob = prob.detach() weight = self.opts.train.lambdas.advent.adv_main else: domain_label = "s" logger = self.logger.losses.gen.task["m"]["advent"] loss_func = self.losses["G"]["tasks"]["m"]["advent"] weight = self.opts.train.lambdas.advent.adv_main if (for_ == "D" or domain == "r") and weight != 0: loss = loss_func( prob.to(self.device), self.domain_labels[domain_label], self.D["m"]["Advent"], depth_preds, ) loss *= weight full_loss += loss logger[domain] = loss.item() if for_ == "D": # WGAN: clipping or GP if self.opts.dis.m.gan_type == "GAN" or "WGAN_norm": pass elif self.opts.dis.m.gan_type == "WGAN": for p in self.D["s"]["Advent"].parameters(): p.data.clamp_( self.opts.dis.m.wgan_clamp_lower, self.opts.dis.m.wgan_clamp_upper, ) elif self.opts.dis.m.gan_type == "WGAN_gp": prob_need_grad = autograd.Variable(prob, requires_grad=True) d_out = self.D["s"]["Advent"](prob_need_grad) gp = get_WGAN_gradient(prob_need_grad, d_out) gp_loss = self.opts.train.lambdas.advent.WGAN_gp * gp full_loss += gp_loss else: raise NotImplementedError return full_loss, prob def painter_loss_for_masker(self, x, m): # pl4m loss # painter should not be updated for param in self.G.painter.parameters(): param.requires_grad = False # TODO for param in self.D.painter.parameters(): # param.requires_grad = False fake_flooded = self.G.paint(m, x) if self.opts.dis.p.use_local_discriminator: fake_d_global = self.D["p"]["global"](fake_flooded) fake_d_local = self.D["p"]["local"](fake_flooded * m) # Note: discriminator returns [out_1,...,out_num_D] outputs # Each out_i is a list [feat1, feat2, ..., pred_i] pl4m_loss = self.losses["G"]["p"]["gan"](fake_d_global, True, False) pl4m_loss += self.losses["G"]["p"]["gan"](fake_d_local, True, False) else: real_cat = torch.cat([m, x], axis=1) fake_cat = torch.cat([m, fake_flooded], axis=1) real_fake_cat = torch.cat([real_cat, fake_cat], dim=0) real_fake_d = self.D["p"](real_fake_cat) _, fake_d = divide_pred(real_fake_d) pl4m_loss = self.losses["G"]["p"]["gan"](fake_d, True, False) if "p" in self.opts.tasks: for param in self.G.painter.parameters(): param.requires_grad = True return pl4m_loss @torch.no_grad() def run_evaluation(self, verbose=0): print("******************* Running Evaluation ***********************") start_time = time() self.eval_mode() val_logger = None nb_of_batches = None for i, multi_batch_tuple in enumerate(self.val_loaders): # create a dictionnary (domain => batch) from tuple # (batch_domain_0, ..., batch_domain_i) # and send it to self.device nb_of_batches = i + 1 multi_domain_batch = { batch["domain"][0]: self.batch_to_device(batch) for batch in multi_batch_tuple } self.get_G_loss(multi_domain_batch, verbose) if val_logger is None: val_logger = deepcopy(self.logger.losses.generator) else: val_logger = sum_dict(val_logger, self.logger.losses.generator) val_logger = div_dict(val_logger, nb_of_batches) self.logger.losses.generator = val_logger self.logger.log_losses(model_to_update="G", mode="val") for d in self.opts.domains: self.logger.log_comet_images("train", d) self.logger.log_comet_images("val", d) if "m" in self.opts.tasks and self.has_painter and not self.kitti_pretrain: self.logger.log_comet_combined_images("train", "r") self.logger.log_comet_combined_images("val", "r") if self.exp is not None: print() if "m" in self.opts.tasks or "s" in self.opts.tasks: self.eval_images("val", "r") self.eval_images("val", "s") if "p" in self.opts.tasks and not self.kitti_pretrain: val_fid = compute_val_fid(self) if self.exp is not None: self.exp.log_metric("val_fid", val_fid, step=self.logger.global_step) else: print("Validation FID Score", val_fid) self.train_mode() timing = int(time() - start_time) print("****************** Done in {}s *********************".format(timing)) def eval_images(self, mode, domain): if domain == "s" and self.kitti_pretrain: domain = "kitti" if domain == "rf" or domain not in self.display_images[mode]: return metric_funcs = {"accuracy": accuracy, "mIOU": mIOU} metric_avg_scores = {"m": {}} if "s" in self.opts.tasks: metric_avg_scores["s"] = {} if "d" in self.opts.tasks and domain == "s" and self.opts.gen.d.classify.enable: metric_avg_scores["d"] = {} for key in metric_funcs: for task in metric_avg_scores: metric_avg_scores[task][key] = [] for im_set in self.display_images[mode][domain]: x = im_set["data"]["x"].unsqueeze(0).to(self.device) z = self.G.encode(x) s_pred = d_pred = z_depth = None if "d" in metric_avg_scores: d_pred, z_depth = self.G.decoders["d"](z) d_pred = d_pred.detach().cpu() if domain == "s": d = im_set["data"]["d"].unsqueeze(0).detach() for metric in metric_funcs: metric_score = metric_funcs[metric](d_pred, d) metric_avg_scores["d"][metric].append(metric_score) if "s" in metric_avg_scores: if z_depth is None: if self.opts.gen.s.use_dada and "d" in self.opts.tasks: _, z_depth = self.G.decoders["d"](z) s_pred = self.G.decoders["s"](z, z_depth).detach().cpu() s = im_set["data"]["s"].unsqueeze(0).detach() for metric in metric_funcs: metric_score = metric_funcs[metric](s_pred, s) metric_avg_scores["s"][metric].append(metric_score) if "m" in self.opts: cond = None if s_pred is not None and d_pred is not None: cond = self.G.make_m_cond(d_pred, s_pred, x) if z_depth is None: if self.opts.gen.m.use_dada and "d" in self.opts.tasks: _, z_depth = self.G.decoders["d"](z) pred_mask = ( (self.G.mask(z=z, cond=cond, z_depth=z_depth)).detach().cpu() ) pred_mask = (pred_mask > 0.5).to(torch.float32) pred_prob = torch.cat([1 - pred_mask, pred_mask], dim=1) m = im_set["data"]["m"].unsqueeze(0).detach() for metric in metric_funcs: if metric != "mIOU": metric_score = metric_funcs[metric](pred_mask, m) else: metric_score = metric_funcs[metric](pred_prob, m) metric_avg_scores["m"][metric].append(metric_score) metric_avg_scores = { task: { metric: np.mean(values) if values else float("nan") for metric, values in met_dict.items() } for task, met_dict in metric_avg_scores.items() } metric_avg_scores = { task: { metric: value if not np.isnan(value) else -1 for metric, value in met_dict.items() } for task, met_dict in metric_avg_scores.items() } if self.exp is not None: self.exp.log_metrics( flatten_opts(metric_avg_scores), prefix=f"metrics_{mode}_{domain}", step=self.logger.global_step, ) else: print(f"metrics_{mode}_{domain}") print(flatten_opts(metric_avg_scores)) return 0 def functional_test_mode(self): import atexit self.opts.output_path = ( Path("~").expanduser() / "climategan" / "functional_tests" ) Path(self.opts.output_path).mkdir(parents=True, exist_ok=True) with open(Path(self.opts.output_path) / "is_functional.test", "w") as f: f.write("trainer functional test - delete this dir") if self.exp is not None: self.exp.log_parameter("is_functional_test", True) atexit.register(self.del_output_path) def del_output_path(self, force=False): import shutil if not Path(self.opts.output_path).exists(): return if (Path(self.opts.output_path) / "is_functional.test").exists() or force: shutil.rmtree(self.opts.output_path) def compute_fire(self, x, seg_preds=None, z=None, z_depth=None): """ Transforms input tensor given wildfires event Args: x (torch.Tensor): Input tensor seg_preds (torch.Tensor): Semantic segmentation predictions for input tensor z (torch.Tensor): Latent vector of encoded "x". Can be None if seg_preds is given. Returns: torch.Tensor: Wildfire version of input tensor """ if seg_preds is None: if z is None: z = self.G.encode(x) seg_preds = self.G.decoders["s"](z, z_depth) return add_fire(x, seg_preds, self.opts.events.fire) def compute_flood( self, x, z=None, z_depth=None, m=None, s=None, cloudy=None, bin_value=-1 ): """ Applies a flood (mask + paint) to an input image, with optionally pre-computed masker z or mask Args: x (torch.Tensor): B x C x H x W -1:1 input image z (torch.Tensor, optional): B x C x H x W Masker latent vector. Defaults to None. m (torch.Tensor, optional): B x 1 x H x W Mask. Defaults to None. bin_value (float, optional): Mask binarization value. Set to -1 to use smooth masks (no binarization) Returns: torch.Tensor: B x 3 x H x W -1:1 flooded image """ if m is None: if z is None: z = self.G.encode(x) if "d" in self.opts.tasks and self.opts.gen.m.use_dada and z_depth is None: _, z_depth = self.G.decoders["d"](z) m = self.G.mask(x=x, z=z, z_depth=z_depth) if bin_value >= 0: m = (m > bin_value).to(m.dtype) if cloudy: assert s is not None return self.G.paint_cloudy(m, x, s) return self.G.paint(m, x) def compute_smog(self, x, z=None, d=None, s=None, use_sky_seg=False): # implementation from the paper: # HazeRD: An outdoor scene dataset and benchmark for single image dehazing sky_mask = None if d is None or (use_sky_seg and s is None): if z is None: z = self.G.encode(x) if d is None: d, _ = self.G.decoders["d"](z) if use_sky_seg and s is None: if "s" not in self.opts.tasks: raise ValueError( "Cannot have " + "(use_sky_seg is True and s is None and 's' not in tasks)" ) s = self.G.decoders["s"](z) # TODO: s to sky mask # TODO: interpolate to d's size params = self.opts.events.smog airlight = params.airlight * torch.ones(3) airlight = airlight.view(1, -1, 1, 1).to(self.device) irradiance = srgb2lrgb(x) beta = torch.tensor([params.beta / params.vr] * 3) beta = beta.view(1, -1, 1, 1).to(self.device) d = normalize(d, mini=0.3, maxi=1.0) d = 1.0 / d d = normalize(d, mini=0.1, maxi=1) if sky_mask is not None: d[sky_mask] = 1 d = torch.nn.functional.interpolate( d, size=x.shape[-2:], mode="bilinear", align_corners=True ) d = d.repeat(1, 3, 1, 1) transmission = torch.exp(d * -beta) smogged = transmission * irradiance + (1 - transmission) * airlight smogged = lrgb2srgb(smogged) # add yellow filter alpha = params.alpha / 255 yellow_mask = torch.Tensor([params.yellow_color]) / 255 yellow_filter = ( yellow_mask.unsqueeze(2) .unsqueeze(2) .repeat(1, 1, smogged.shape[-2], smogged.shape[-1]) .to(self.device) ) smogged = smogged * (1 - alpha) + yellow_filter * alpha return smogged