Spaces:

daidedou
/

diffumatch

Running on Zero

App Files Files Community

daidedou commited on Sep 23

Commit

e321b92

1 Parent(s): 458efe2

forgot a few things lol

Browse files

Files changed (26) hide show

config/diffusion/dfaust_fmap.yaml +65 -0
config/matching/diff_mask.yaml +28 -0
config/matching/lap_mask.yaml +23 -0
config/matching/resol_mask.yaml +23 -0
config/matching/sds.yaml +35 -0
config/matching/sds_dt4d.yaml +35 -0
config/matching/sds_slow.yaml +40 -0
config/matching/sds_smal.yaml +35 -0
config/matching/snk.yaml +27 -0
diffu_models/basis_dataset.py +314 -0
diffu_models/dit_models.py +383 -0
diffu_models/losses.py +96 -0
diffu_models/precond.py +152 -0
diffu_models/sds.py +77 -0
shape_data/__init__.py +74 -0
shape_data/data_utils.py +270 -0
shape_data/dt4dinter.py +50 -0
shape_data/dt4dintra.py +57 -0
shape_data/faust.py +408 -0
shape_data/scape.py +17 -0
shape_data/shrec19.py +42 -0
shape_data/smalr.py +31 -0
shape_data/tosca.py +46 -0
snk/__init__.py +0 -0
snk/loss.py +119 -0
snk/prism_decoder.py +86 -0

config/diffusion/dfaust_fmap.yaml ADDED Viewed

	@@ -0,0 +1,65 @@

+# misc
+misc:
+  cuda: True
+  device: 0
+  checkpoint_interval: 1
+  log_interval: 812
+  desc: null
+  precond: False
+  dry_run: False
+data:
+  root_dir: "data_cache"
+  name: DFAUST_fmap_30
+  n_fmap: 30
+  out: "fmap_exps"
+  cond: False
+  template_path: "data/template.ply"
+  normalize: True
+  pairs: False
+  abs: True
+add_name:
+  do: False
+  name: "bis"
+architecture:
+  model: "DiT"
+  name_arch: "DiT-S/4"
+  input_type: "img"
+  cond: False # Conditioning with 3D-CODED
+## loss params
+#loss:
+#  w_gt: False  # if one wants to train as a supervised method, one should set w_gt=True
+#  w_ortho: 1   # orthogonal loss for functional map (default: 1)
+#  w_Qortho: 0  # orthogonal loss for complex functional map (default: 1)
+#  w_bij: 1
+#  w_res: 1  # residual loss for functional map (default: 1)
+#  w_rank: -0.1
+#  w_srnf: 1
+#  min_alpha: 1
+#  max_alpha: 100
+#
+hyper_params:
+  iterations: 200
+  batch_size: 256
+  lr: 0.001
+  lr_rampup_kimg: 10000 # Learning rate ramp-up duration
+  ema_halflife_nshape : 500 # ema half-life of the exponential moving average (EMA) of model weights.
+  ema_rampup_ratio : 0.05 # EMA ramp-up coefficient, None = no rampup.
+  dropout: 0
+  loss_name: 'VPLoss'
+  ls : 1 #loss scaling
+perfs:
+  fp16: False
+  workers: 1
+resume:
+  pkl: null
+  transfer: null
+  kimg_per_tick: 5
+  snapshot_ticks: 50
+  state_dump_ticks: 50

config/matching/diff_mask.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+gpu: 0
+cache: "cache/fmaps"
+sds: True
+optimize: False
+sds_conf:
+  train_dir: pretrained
+  diff_num_exp: 53
+  zoomout: 40
+deepfeat_conf:
+  fmap:
+    feat: "xyz"
+    n_fmap: 30
+    C_in: 3
+    n_feat: 128 ## Doesn't change
+    lambda_: 2
+    use_diff: True
+    diffusion:
+      abs: True
+      normalize: False
+      time: 1
+      batch_sds: 32
+      batch_mask: 200
+zo_shot: 150

config/matching/lap_mask.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+gpu: 0
+cache: "cache/fmaps"
+sds: True
+optimize: False
+sds_conf:
+  train_dir: pretrained
+  diff_num_exp: 53
+  zoomout: 40
+deepfeat_conf:
+  fmap:
+    feat: "xyz"
+    n_fmap: 30
+    C_in: 3
+    n_feat: 128 ## Doesn't change
+    lambda_: 1e-3
+    use_resolvent: False ## Don't forget to change lambda values if you want to use the resolvent mask (around 100)
+    resolvent_gamma: 0.5
+zo_shot: 150

config/matching/resol_mask.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+gpu: 0
+cache: "cache/fmaps"
+sds: True
+optimize: False
+sds_conf:
+  train_dir: pretrained
+  diff_num_exp: 53
+  zoomout: 40
+deepfeat_conf:
+  fmap:
+    feat: "xyz"
+    n_fmap: 30
+    C_in: 3
+    n_feat: 128 ## Doesn't change
+    lambda_: 100
+    use_resolvent: True ## Don't forget to change lambda values if you want to use the resolvent mask (around 100)
+    resolvent_gamma: 0.5
+zo_shot: 150

config/matching/sds.yaml ADDED Viewed

	@@ -0,0 +1,35 @@

+gpu: 0
+cache: "cache/fmaps"
+sds: True
+refine: True
+optimize: True
+oriented: True
+sds_conf:
+  train_dir: pretrained
+  diff_num_exp: 53
+  zoomout: 40
+deepfeat_conf:
+  fmap:
+    feat: "xyz"
+    n_fmap: 30
+    C_in: 3
+    n_feat: 128 ## Doesn't change
+    lambda_: 1e-1
+    use_diff: True
+    diffusion:
+      abs: True
+      normalize: False
+      time: 1
+      batch_sds: 32
+      batch_mask: 200
+opt:
+  n_loop: 300
+  soft_p2p: False
+loss:
+  sds: 1.
+  proper: 1.

config/matching/sds_dt4d.yaml ADDED Viewed

	@@ -0,0 +1,35 @@

+gpu: 0
+cache: "cache/fmaps"
+sds: True
+refine: True
+optimize: True
+rotate: True
+sds_conf:
+  train_dir: fmap_exps
+  diff_num_exp: 53
+  zoomout: 40
+deepfeat_conf:
+  fmap:
+    feat: "xyz"
+    n_fmap: 30
+    C_in: 3
+    n_feat: 128 ## Doesn't change
+    lambda_: 1e-3
+    use_diff: True
+    diffusion:
+      abs: True
+      normalize: False
+      time: 1
+      batch_sds: 32
+      batch_mask: 200
+opt:
+  n_loop: 1000
+  soft_p2p: False
+loss:
+  sds: 0.1
+  proper: 1.

config/matching/sds_slow.yaml ADDED Viewed

	@@ -0,0 +1,40 @@

+gpu: 0
+cache: "cache/fmaps"
+sds: True
+refine: True
+optimize: True
+oriented: True
+diff_model:
+  train_dir: pretrained
+# diff_model:
+#   train_dir: fmap_exps
+#   diff_num_exp: 53
+sds_conf:
+  zoomout: 40
+deepfeat_conf:
+  fmap:
+    feat: "xyz"
+    n_fmap: 30
+    C_in: 3
+    n_feat: 128 ## Doesn't change
+    lambda_: 1e-3
+    use_diff: True
+    diffusion:
+      abs: True
+      normalize: False
+      time: 1
+      batch_sds: 32
+      batch_mask: 200
+opt:
+  n_loop: 1000
+  soft_p2p: False
+loss:
+  sds: 0.1
+  proper: 1.

config/matching/sds_smal.yaml ADDED Viewed

	@@ -0,0 +1,35 @@

+gpu: 0
+cache: "cache/fmaps"
+sds: True
+refine: True
+optimize: True
+oriented: True
+sds_conf:
+  train_dir: pretrained
+  diff_num_exp: 53
+  zoomout: 40
+deepfeat_conf:
+  fmap:
+    feat: "xyz"
+    n_fmap: 30
+    C_in: 3
+    n_feat: 128 ## Doesn't change
+    lambda_: 1e-1
+    use_diff: True
+    diffusion:
+      abs: True
+      normalize: False
+      time: 1
+      batch_sds: 32
+      batch_mask: 200
+opt:
+  n_loop: 1000
+  soft_p2p: False
+loss:
+  sds: 0.1
+  proper: 1.

config/matching/snk.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+gpu: 0
+cache: "cache/fmaps"
+snk: True
+refine: True
+optimize: True
+deepfeat_conf:
+  fmap:
+    feat: "xyz"
+    n_fmap: 30
+    C_in: 3
+    n_feat: 128 ## Doesn't change
+    lambda_: 100
+    use_resolvent: True ## Don't forget to change lambda values if you want to use the resolvent mask (around 100)
+    resolvent_gamma: 0.5
+opt:
+  n_loop: 1000
+  soft_p2p: True
+loss:
+  bij: 1.
+  ortho: 1.
+  cycle: 1
+  mse_rec: 1
+  prism_rec: 1

diffu_models/basis_dataset.py ADDED Viewed

	@@ -0,0 +1,314 @@

+import os
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+N_POSES = 21
+class AMASSDataset(torch.utils.data.Dataset):
+    def __init__(self, root_path, version='version0', subset='train', basis_path='base_amass.npy',
+                 sample_interval=None, num_coeffs=100, return_shape=False,
+                 normalize=True, min_max=False):
+        self.root_path = root_path
+        self.version = version
+        assert subset in ['train', 'valid', 'test']
+        self.subset = subset
+        self.sample_interval = sample_interval
+        self.return_shape = return_shape
+        self.normalize = normalize
+        self.min_max = min_max
+        self.num_coeffs = num_coeffs
+        self.poses, self.shapes = self.read_data()
+        if self.sample_interval:
+            self._sample(sample_interval)
+        if self.normalize:
+            if self.min_max:
+                self.min_poses, self.max_poses, self.min_shapes, self.max_shapes = self.Normalize()
+            else:
+                self.mean_poses, self.std_poses, self.mean_shapes, self.std_shapes = self.Normalize()
+        self.real_data_len = len(self.poses)
+    def __getitem__(self, idx):
+        """
+        Return:
+            [21, 3] or [21, 6] for poses including body and root orient
+            [10] for shapes (betas)  [Optimal]
+        """
+        data_poses = self.poses[idx % self.real_data_len]
+        #coeffs = data_poses}
+        if self.return_shape:
+            return data_poses, self.shapes[idx % self.real_data_len]
+        return data_poses
+    def __len__(self, ):
+        return len(self.poses)
+    def _sample(self, sample_interval):
+        print(f'Class AMASSDataset({self.subset}): sample dataset every {sample_interval} frame')
+        self.poses = self.poses[::sample_interval]
+    def read_data(self):
+        data_path = os.path.join(self.root_path, self.subset)
+        # root_orient = torch.load(os.path.join(data_path, 'root_orient.pt'))
+        coeffs = torch.load(os.path.join(data_path, 'train_coeffs.pt'))
+        shapes = torch.load(os.path.join(data_path, 'betas.pt')) if self.return_shape else None
+        # poses = torch.cat([root_orient, pose_body], dim=1)
+        data_len = len(coeffs)
+        if self.num_coeffs < 300:
+            coeffs = coeffs[:, -self.num_coeffs:]
+        return coeffs, shapes
+    def Normalize(self):
+        # Use train dataset for normalize computing, Z_score or min-max Normalize
+        if self.min_max:
+            normalize_path = os.path.join(self.root_path, 'train', 'coeffs_' + str(self.num_coeffs) + '_normalize1.pt')
+        else:
+            normalize_path = os.path.join(self.root_path, 'train', 'coeffs_' + str(self.num_coeffs) + '_normalize2.pt')
+        if os.path.exists(normalize_path):
+            normalize_params = torch.load(normalize_path)
+            if self.min_max:
+                min_poses, max_poses, min_shapes, max_shapes = (
+                    normalize_params['min_poses'],
+                    normalize_params['max_poses'],
+                    normalize_params['min_shapes'],
+                    normalize_params['max_shapes']
+                )
+            else:
+                mean_poses, std_poses, mean_shapes, std_shapes = (
+                    normalize_params['mean_poses'],
+                    normalize_params['std_poses'],
+                    normalize_params['mean_shapes'],
+                    normalize_params['std_shapes']
+                )
+        else:
+            if self.min_max:
+                min_poses = torch.min(self.poses, dim=0)[0]
+                max_poses = torch.max(self.poses, dim=0)[0]
+                min_shapes = torch.min(self.shapes, dim=0)[0] if self.return_shape else None
+                max_shapes = torch.max(self.shapes, dim=0)[0] if self.return_shape else None
+                torch.save({
+                    'min_poses': min_poses,
+                    'max_poses': max_poses,
+                    'min_shapes': min_shapes,
+                    'max_shapes': max_shapes
+                }, normalize_path)
+            else:
+                mean_poses = torch.mean(self.poses, dim=0)
+                std_poses = torch.std(self.poses, dim=0)
+                mean_shapes = torch.mean(self.shapes, dim=0) if self.return_shape else None
+                std_shapes = torch.std(self.shapes, dim=0) if self.return_shape else None
+                torch.save({
+                    'mean_poses': mean_poses,
+                    'std_poses': std_poses,
+                    'mean_shapes': mean_shapes,
+                    'std_shapes': std_shapes
+                }, normalize_path)
+        if self.min_max:
+            self.poses = 2 * (self.poses - min_poses) / (max_poses - min_poses) - 1
+            if self.return_shape:
+                self.shapes = 2 * (self.shapes - min_shapes) / (max_shapes - min_shapes) - 1
+            return min_poses, max_poses, min_shapes, max_shapes
+        else:
+            self.poses = (self.poses - mean_poses) / std_poses
+            if self.return_shape:
+                self.shapes = (self.shapes - mean_shapes) / std_shapes
+            return mean_poses, std_poses, mean_shapes, std_shapes
+    def Denormalize(self, poses, shapes=None):
+        assert len(poses.shape) == 2 or len(poses.shape) == 3  # [b, data_dim] or [t, b, data_dim]
+        if self.min_max:
+            min_poses = self.min_poses.view(1, -1).to(poses.device)
+            max_poses = self.max_poses.view(1, -1).to(poses.device)
+            if len(poses.shape) == 3:  # [t, b, data_dim]
+                min_poses = min_poses.unsqueeze(0)
+                max_poses = max_poses.unsqueeze(0)
+            normalized_poses = 0.5 * ((poses + 1) * (max_poses - min_poses) + 2 * min_poses)
+            if shapes is not None and self.min_shapes is not None:
+                min_shapes = self.min_shapes.view(1, -1).to(shapes.device)
+                max_shapes = self.max_shapes.view(1, -1).to(shapes.device)
+                if len(shapes.shape) == 3:
+                    min_shapes = min_shapes.unsqueeze(0)
+                    max_shapes = max_shapes.unsqueeze(0)
+                normalized_shapes = 0.5 * ((shapes + 1) * (max_shapes - min_shapes) + 2 * min_shapes)
+                return normalized_poses, normalized_shapes
+            else:
+                return normalized_poses
+        else:
+            mean_poses = self.mean_poses.view(1, -1).to(poses.device)
+            std_poses = self.std_poses.view(1, -1).to(poses.device)
+            if len(poses.shape) == 3:  # [t, b, data_dim]
+                mean_poses = mean_poses.unsqueeze(0)
+                std_poses = std_poses.unsqueeze(0)
+            normalized_poses = poses * std_poses + mean_poses
+            if shapes is not None and self.mean_shapes is not None:
+                mean_shapes = self.mean_shapes.view(1, -1)
+                std_shapes = self.std_shapes.view(1, -1)
+                if len(shapes.shape) == 3:
+                    mean_shapes = mean_shapes.unsqueeze(0)
+                    std_shapes = std_shapes.unsqueeze(0)
+                normalized_shapes = shapes * std_shapes + mean_shapes
+                return normalized_poses, normalized_shapes
+            else:
+                return normalized_poses
+    def eval(self, preds):
+        pass
+class Posenormalizer:
+    def __init__(self, data_path, device='cuda:0', normalize=True, min_max=True, rot_rep=None):
+        assert rot_rep in ['rot6d', 'axis']
+        self.normalize = normalize
+        self.min_max = min_max
+        self.rot_rep = rot_rep
+        normalize_params = torch.load(os.path.join(data_path, '{}_normalize1.pt'.format(rot_rep)))
+        self.min_poses, self.max_poses = normalize_params['min_poses'].to(device), normalize_params['max_poses'].to(device)
+        normalize_params = torch.load(os.path.join(data_path, '{}_normalize2.pt'.format(rot_rep)))
+        self.mean_poses, self.std_poses = normalize_params['mean_poses'].to(device), normalize_params['std_poses'].to(device)
+    def offline_normalize(self, poses, from_axis=False):
+        assert len(poses.shape) == 2 or len(poses.shape) == 3  # [b, data_dim] or [t, b, data_dim]
+        pose_shape = poses.shape
+        if not self.normalize:
+            return poses
+        if self.min_max:
+            min_poses = self.min_poses.view(1, -1)
+            max_poses = self.max_poses.view(1, -1)
+            if len(poses.shape) == 3:  # [t, b, data_dim]
+                min_poses = min_poses.unsqueeze(0)
+                max_poses = max_poses.unsqueeze(0)
+            normalized_poses = 2 * (poses - min_poses) / (max_poses - min_poses) - 1
+        else:
+            mean_poses = self.mean_poses.view(1, -1)
+            std_poses = self.std_poses.view(1, -1)
+            if len(poses.shape) == 3:  # [t, b, data_dim]
+                mean_poses = mean_poses.unsqueeze(0)
+                std_poses = std_poses.unsqueeze(0)
+            normalized_poses = (poses - mean_poses) / std_poses
+        return normalized_poses
+    def offline_denormalize(self, poses, to_axis=False):
+        assert len(poses.shape) == 2 or len(poses.shape) == 3  # [b, data_dim] or [t, b, data_dim]
+        if not self.normalize:
+            denormalized_poses = poses
+        else:
+            if self.min_max:
+                min_poses = self.min_poses.view(1, -1)
+                max_poses = self.max_poses.view(1, -1)
+                if len(poses.shape) == 3:  # [t, b, data_dim]
+                    min_poses = min_poses.unsqueeze(0)
+                    max_poses = max_poses.unsqueeze(0)
+                denormalized_poses = 0.5 * ((poses + 1) * (max_poses - min_poses) + 2 * min_poses)
+            else:
+                mean_poses = self.mean_poses.view(1, -1)
+                std_poses = self.std_poses.view(1, -1)
+                if len(poses.shape) == 3:  # [t, b, data_dim]
+                    mean_poses = mean_poses.unsqueeze(0)
+                    std_poses = std_poses.unsqueeze(0)
+                denormalized_poses = poses * std_poses + mean_poses
+        return denormalized_poses
+# a simple eval process for completion task
+class Evaler:
+    def __init__(self, body_model, part=None):
+        self.body_model = body_model
+        self.part = part
+        if self.part is not None:
+            self.joint_idx = np.array(getattr(BodyPartIndices, self.part)) + 1  # skip pelvis
+            self.vert_idx = np.array(getattr(BodySegIndices, self.part))
+        else:
+            self.joint_idx = slice(None)
+            self.vert_idx = slice(None)
+    def eval_bodys(self, outs, gts):
+        '''
+        :param outs: [b, j*3] axis-angle results of body poses
+        :param gts:  [b, j*3] axis-angle groundtruth of body poses
+        :return: result dict for every sample
+        '''
+        sample_num = len(outs)
+        eval_result = {'mpvpe_all': [], 'mpjpe_body': []}
+        body_gt = self.body_model(pose_body=gts)
+        body_out = self.body_model(pose_body=outs)
+        for n in range(sample_num):
+            # MPVPE from all vertices
+            mesh_gt = body_gt.v.detach().cpu().numpy()[n, self.vert_idx]
+            mesh_out = body_out.v.detach().cpu().numpy()[n, self.vert_idx]
+            eval_result['mpvpe_all'].append(np.sqrt(np.sum((mesh_out - mesh_gt) ** 2, 1)).mean() * 1000)
+            joint_gt_body = body_gt.Jtr.detach().cpu().numpy()[n, self.joint_idx]
+            joint_out_body = body_out.Jtr.detach().cpu().numpy()[n, self.joint_idx]
+            eval_result['mpjpe_body'].append(
+                np.sqrt(np.sum((joint_out_body - joint_gt_body) ** 2, 1)).mean() * 1000)
+        return eval_result
+    def multi_eval_bodys(self, outs, gts):
+        '''
+        :param outs: [b, hypo, j*3] axis-angle results of body poses, multiple hypothesis
+        :param gts:  [b, j*3] axis-angle groundtruth of body poses
+        :return: result dict
+        '''
+        hypo_num = outs.shape[1]
+        eval_result = {f'mpvpe_all': [], f'mpjpe_body': []}
+        for hypo in range(hypo_num):
+            result = self.eval_bodys(outs[:, hypo], gts)
+            eval_result['mpvpe_all'].append(result['mpvpe_all'])
+            eval_result['mpjpe_body'].append(result['mpjpe_body'])
+        eval_result['mpvpe_all'] = np.min(eval_result['mpvpe_all'], axis=0)
+        eval_result['mpjpe_body'] = np.min(eval_result['mpjpe_body'], axis=0)
+        return eval_result
+    def print_eval_result(self, eval_result):
+        print('MPVPE (All): %.2f mm' % np.mean(eval_result['mpvpe_all']))
+        print('MPJPE (Body): %.2f mm' % np.mean(eval_result['mpjpe_body']))
+    def print_multi_eval_result(self, eval_result, hypo_num):
+        print(f'multihypo {hypo_num} MPVPE (All): %.2f mm' % np.mean(eval_result['mpvpe_all']))
+        print(f'multihypo {hypo_num} MPJPE (Body): %.2f mm' % np.mean(eval_result['mpjpe_body']))

diffu_models/dit_models.py ADDED Viewed

	@@ -0,0 +1,383 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# GLIDE: https://github.com/openai/glide-text2im
+# MAE: https://github.com/facebookresearch/mae/blob/main/models_mae.py
+# --------------------------------------------------------
+import torch
+import torch.nn as nn
+import numpy as np
+import math
+from timm.models.vision_transformer import PatchEmbed, Attention, Mlp
+def modulate(x, shift, scale):
+    return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1)
+#################################################################################
+#               Embedding Layers for Timesteps and Class Labels                 #
+#################################################################################
+class TimestepEmbedder(nn.Module):
+    """
+    Embeds scalar timesteps into vector representations.
+    """
+    def __init__(self, hidden_size, frequency_embedding_size=256):
+        super().__init__()
+        self.mlp = nn.Sequential(
+            nn.Linear(frequency_embedding_size, hidden_size, bias=True),
+            nn.SiLU(),
+            nn.Linear(hidden_size, hidden_size, bias=True),
+        )
+        self.frequency_embedding_size = frequency_embedding_size
+    @staticmethod
+    def timestep_embedding(t, dim, max_period=10000):
+        """
+        Create sinusoidal timestep embeddings.
+        :param t: a 1-D Tensor of N indices, one per batch element.
+                          These may be fractional.
+        :param dim: the dimension of the output.
+        :param max_period: controls the minimum frequency of the embeddings.
+        :return: an (N, D) Tensor of positional embeddings.
+        """
+        # https://github.com/openai/glide-text2im/blob/main/glide_text2im/nn.py
+        half = dim // 2
+        freqs = torch.exp(
+            -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half
+        ).to(device=t.device)
+        args = t[:, None].float() * freqs[None]
+        embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
+        if dim % 2:
+            embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
+        return embedding
+    def forward(self, t):
+        t_freq = self.timestep_embedding(t, self.frequency_embedding_size)
+        t_emb = self.mlp(t_freq)
+        return t_emb
+class LabelEmbedder(nn.Module):
+    """
+    Embeds class labels into vector representations. Also handles label dropout for classifier-free guidance.
+    """
+    def __init__(self, num_classes, hidden_size, dropout_prob):
+        super().__init__()
+        use_cfg_embedding = dropout_prob > 0
+        self.embedding_table = nn.Embedding(num_classes + use_cfg_embedding, hidden_size)
+        self.num_classes = num_classes
+        self.dropout_prob = dropout_prob
+    def token_drop(self, labels, force_drop_ids=None):
+        """
+        Drops labels to enable classifier-free guidance.
+        """
+        if force_drop_ids is None:
+            drop_ids = torch.rand(labels.shape[0], device=labels.device) < self.dropout_prob
+        else:
+            drop_ids = force_drop_ids == 1
+        labels = torch.where(drop_ids, self.num_classes, labels)
+        return labels
+    def forward(self, labels, train, force_drop_ids=None):
+        use_dropout = self.dropout_prob > 0
+        if (train and use_dropout) or (force_drop_ids is not None):
+            labels = self.token_drop(labels, force_drop_ids)
+        embeddings = self.embedding_table(labels)
+        return embeddings
+#################################################################################
+#                                 Core DiT Model                                #
+#################################################################################
+class DiTBlock(nn.Module):
+    """
+    A DiT block with adaptive layer norm zero (adaLN-Zero) conditioning.
+    """
+    def __init__(self, hidden_size, num_heads, mlp_ratio=4.0, **block_kwargs):
+        super().__init__()
+        self.norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+        self.attn = Attention(hidden_size, num_heads=num_heads, qkv_bias=True, **block_kwargs)
+        self.norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+        mlp_hidden_dim = int(hidden_size * mlp_ratio)
+        approx_gelu = lambda: nn.GELU(approximate="tanh")
+        self.mlp = Mlp(in_features=hidden_size, hidden_features=mlp_hidden_dim, act_layer=approx_gelu, drop=0)
+        self.adaLN_modulation = nn.Sequential(
+            nn.SiLU(),
+            nn.Linear(hidden_size, 6 * hidden_size, bias=True)
+        )
+    def forward(self, x, c):
+        shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.adaLN_modulation(c).chunk(6, dim=1)
+        x = x + gate_msa.unsqueeze(1) * self.attn(modulate(self.norm1(x), shift_msa, scale_msa))
+        x = x + gate_mlp.unsqueeze(1) * self.mlp(modulate(self.norm2(x), shift_mlp, scale_mlp))
+        return x
+class FinalLayer(nn.Module):
+    """
+    The final layer of DiT.
+    """
+    def __init__(self, hidden_size, patch_size, out_channels):
+        super().__init__()
+        self.norm_final = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+        self.linear = nn.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True)
+        self.adaLN_modulation = nn.Sequential(
+            nn.SiLU(),
+            nn.Linear(hidden_size, 2 * hidden_size, bias=True)
+        )
+    def forward(self, x, c):
+        shift, scale = self.adaLN_modulation(c).chunk(2, dim=1)
+        x = modulate(self.norm_final(x), shift, scale)
+        x = self.linear(x)
+        return x
+class DiT(nn.Module):
+    """
+    Diffusion model with a Transformer backbone.
+    """
+    def __init__(
+        self,
+        input_size=32,
+        patch_size=2,
+        in_channels=4,
+        hidden_size=1152,
+        depth=28,
+        num_heads=16,
+        mlp_ratio=4.0,
+        class_dropout_prob=0.1,
+        num_classes=1000,
+        learn_sigma=True,
+        conditioning=False
+    ):
+        super().__init__()
+        self.learn_sigma = learn_sigma
+        self.in_channels = in_channels
+        self.out_channels = in_channels * 2 if learn_sigma else in_channels
+        self.patch_size = patch_size
+        self.num_heads = num_heads
+        self.x_embedder = PatchEmbed(input_size, patch_size, in_channels, hidden_size, bias=True)
+        self.t_embedder = TimestepEmbedder(hidden_size)
+        self.conditioning = conditioning
+        if conditioning:
+            self.y_embedder = LabelEmbedder(num_classes, hidden_size, class_dropout_prob)
+        num_patches = self.x_embedder.num_patches
+        # Will use fixed sin-cos embedding:
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, hidden_size), requires_grad=False)
+        self.blocks = nn.ModuleList([
+            DiTBlock(hidden_size, num_heads, mlp_ratio=mlp_ratio) for _ in range(depth)
+        ])
+        self.final_layer = FinalLayer(hidden_size, patch_size, self.out_channels)
+        self.initialize_weights()
+    def initialize_weights(self):
+        # Initialize transformer layers:
+        def _basic_init(module):
+            if isinstance(module, nn.Linear):
+                torch.nn.init.xavier_uniform_(module.weight)
+                if module.bias is not None:
+                    nn.init.constant_(module.bias, 0)
+        self.apply(_basic_init)
+        # Initialize (and freeze) pos_embed by sin-cos embedding:
+        pos_embed = get_2d_sincos_pos_embed(self.pos_embed.shape[-1], int(self.x_embedder.num_patches ** 0.5))
+        self.pos_embed.data.copy_(torch.from_numpy(pos_embed).float().unsqueeze(0))
+        # Initialize patch_embed like nn.Linear (instead of nn.Conv2d):
+        w = self.x_embedder.proj.weight.data
+        nn.init.xavier_uniform_(w.view([w.shape[0], -1]))
+        nn.init.constant_(self.x_embedder.proj.bias, 0)
+        # Initialize label embedding table:
+        if self.conditioning:
+            nn.init.normal_(self.y_embedder.embedding_table.weight, std=0.02)
+        # Initialize timestep embedding MLP:
+        nn.init.normal_(self.t_embedder.mlp[0].weight, std=0.02)
+        nn.init.normal_(self.t_embedder.mlp[2].weight, std=0.02)
+        # Zero-out adaLN modulation layers in DiT blocks:
+        for block in self.blocks:
+            nn.init.constant_(block.adaLN_modulation[-1].weight, 0)
+            nn.init.constant_(block.adaLN_modulation[-1].bias, 0)
+        # Zero-out output layers:
+        nn.init.constant_(self.final_layer.adaLN_modulation[-1].weight, 0)
+        nn.init.constant_(self.final_layer.adaLN_modulation[-1].bias, 0)
+        nn.init.constant_(self.final_layer.linear.weight, 0)
+        nn.init.constant_(self.final_layer.linear.bias, 0)
+    def unpatchify(self, x):
+        """
+        x: (N, T, patch_size**2 * C)
+        imgs: (N, H, W, C)
+        """
+        c = self.out_channels
+        p = self.x_embedder.patch_size[0]
+        h = w = int(x.shape[1] ** 0.5)
+        assert h * w == x.shape[1]
+        x = x.reshape(shape=(x.shape[0], h, w, p, p, c))
+        x = torch.einsum('nhwpqc->nchpwq', x)
+        imgs = x.reshape(shape=(x.shape[0], c, h * p, h * p))
+        return imgs
+    def forward(self, x, t, y=None):
+        """
+        Forward pass of DiT.
+        x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
+        t: (N,) tensor of diffusion timesteps
+        y: (N,) tensor of class labels
+        """
+        x = self.x_embedder(x) + self.pos_embed  # (N, T, D), where T = H * W / patch_size ** 2
+        t = self.t_embedder(t)                   # (N, D)
+        c = t                                    # (N, D)
+        if self.conditioning:
+            y = self.y_embedder(y, self.training) # (N, D)
+            c += t
+        for block in self.blocks:
+            x = block(x, c)                      # (N, T, D)
+        x = self.final_layer(x, c)                # (N, T, patch_size ** 2 * out_channels)
+        x = self.unpatchify(x)                   # (N, out_channels, H, W)
+        return x
+    def forward_with_cfg(self, x, t, y, cfg_scale):
+        """
+        Forward pass of DiT, but also batches the unconditional forward pass for classifier-free guidance.
+        """
+        # https://github.com/openai/glide-text2im/blob/main/notebooks/text2im.ipynb
+        half = x[: len(x) // 2]
+        combined = torch.cat([half, half], dim=0)
+        model_out = self.forward(combined, t, y)
+        # For exact reproducibility reasons, we apply classifier-free guidance on only
+        # three channels by default. The standard approach to cfg applies it to all channels.
+        # This can be done by uncommenting the following line and commenting-out the line following that.
+        # eps, rest = model_out[:, :self.in_channels], model_out[:, self.in_channels:]
+        eps, rest = model_out[:, :3], model_out[:, 3:]
+        cond_eps, uncond_eps = torch.split(eps, len(eps) // 2, dim=0)
+        half_eps = uncond_eps + cfg_scale * (cond_eps - uncond_eps)
+        eps = torch.cat([half_eps, half_eps], dim=0)
+        return torch.cat([eps, rest], dim=1)
+#################################################################################
+#                   Sine/Cosine Positional Embedding Functions                  #
+#################################################################################
+# https://github.com/facebookresearch/mae/blob/main/util/pos_embed.py
+def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False, extra_tokens=0):
+    """
+    grid_size: int of the grid height and width
+    return:
+    pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
+    """
+    grid_h = np.arange(grid_size, dtype=np.float32)
+    grid_w = np.arange(grid_size, dtype=np.float32)
+    grid = np.meshgrid(grid_w, grid_h)  # here w goes first
+    grid = np.stack(grid, axis=0)
+    grid = grid.reshape([2, 1, grid_size, grid_size])
+    pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
+    if cls_token and extra_tokens > 0:
+        pos_embed = np.concatenate([np.zeros([extra_tokens, embed_dim]), pos_embed], axis=0)
+    return pos_embed
+def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
+    assert embed_dim % 2 == 0
+    # use half of dimensions to encode grid_h
+    emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0])  # (H*W, D/2)
+    emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1])  # (H*W, D/2)
+    emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)
+    return emb
+def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
+    """
+    embed_dim: output dimension for each position
+    pos: a list of positions to be encoded: size (M,)
+    out: (M, D)
+    """
+    assert embed_dim % 2 == 0
+    omega = np.arange(embed_dim // 2, dtype=np.float64)
+    omega /= embed_dim / 2.
+    omega = 1. / 10000**omega  # (D/2,)
+    pos = pos.reshape(-1)  # (M,)
+    out = np.einsum('m,d->md', pos, omega)  # (M, D/2), outer product
+    emb_sin = np.sin(out) # (M, D/2)
+    emb_cos = np.cos(out) # (M, D/2)
+    emb = np.concatenate([emb_sin, emb_cos], axis=1)  # (M, D)
+    return emb
+#################################################################################
+#                                   DiT Configs                                  #
+#################################################################################
+def DiT_XL_2(**kwargs):
+    return DiT(depth=28, hidden_size=1152, patch_size=2, num_heads=16, **kwargs)
+def DiT_XL_4(**kwargs):
+    return DiT(depth=28, hidden_size=1152, patch_size=4, num_heads=16, **kwargs)
+def DiT_XL_8(**kwargs):
+    return DiT(depth=28, hidden_size=1152, patch_size=8, num_heads=16, **kwargs)
+def DiT_L_2(**kwargs):
+    return DiT(depth=24, hidden_size=1024, patch_size=2, num_heads=16, **kwargs)
+def DiT_L_4(**kwargs):
+    return DiT(depth=24, hidden_size=1024, patch_size=4, num_heads=16, **kwargs)
+def DiT_L_5(**kwargs):
+    return DiT(depth=24, hidden_size=1024, patch_size=5, num_heads=16, **kwargs)
+def DiT_L_8(**kwargs):
+    return DiT(depth=24, hidden_size=1024, patch_size=8, num_heads=16, **kwargs)
+def DiT_B_2(**kwargs):
+    return DiT(depth=12, hidden_size=768, patch_size=2, num_heads=12, **kwargs)
+def DiT_B_4(**kwargs):
+    return DiT(depth=12, hidden_size=768, patch_size=4, num_heads=12, **kwargs)
+def DiT_B_5(**kwargs):
+    return DiT(depth=12, hidden_size=768, patch_size=5, num_heads=12, **kwargs)
+def DiT_B_8(**kwargs):
+    return DiT(depth=12, hidden_size=768, patch_size=8, num_heads=12, **kwargs)
+def DiT_S_2(**kwargs):
+    return DiT(depth=12, hidden_size=384, patch_size=2, num_heads=6, **kwargs)
+def DiT_S_4(**kwargs):
+    return DiT(depth=12, hidden_size=384, patch_size=5, num_heads=6, **kwargs)
+def DiT_S_8(**kwargs):
+    return DiT(depth=12, hidden_size=384, patch_size=8, num_heads=6, **kwargs)
+DiT_models = {
+    'DiT-XL/2': DiT_XL_2,  'DiT-XL/4': DiT_XL_4,  'DiT-XL/8': DiT_XL_8,
+    'DiT-L/2':  DiT_L_2,   'DiT-L/4':  DiT_L_4,   'DiT-L/8':  DiT_L_8, 'DiT-L/5': DiT_L_5,
+    'DiT-B/2':  DiT_B_2,   'DiT-B/4':  DiT_B_4,   'DiT-B/8':  DiT_B_8, 'DiT-B/5': DiT_B_5,
+    'DiT-S/2':  DiT_S_2,   'DiT-S/4':  DiT_S_4,   'DiT-S/8':  DiT_S_8,
+}

diffu_models/losses.py ADDED Viewed

	@@ -0,0 +1,96 @@

+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# This work is licensed under a Creative Commons
+# Attribution-NonCommercial-ShareAlike 4.0 International License.
+# You should have received a copy of the license along with this
+# work. If not, see http://creativecommons.org/licenses/by-nc-sa/4.0/
+"""Loss functions used in the paper
+"Elucidating the Design Space of Diffusion-Based Generative Models"."""
+import torch
+from edm.torch_utils import persistence
+import pdb
+#----------------------------------------------------------------------------
+# Loss function corresponding to the variance preserving (VP) formulation
+# from the paper "Score-Based Generative Modeling through Stochastic
+# Differential Equations".
+@persistence.persistent_class
+class VPLoss:
+    def __init__(self, beta_d=19.9, beta_min=0.1, epsilon_t=1e-5):
+        self.beta_d = beta_d
+        self.beta_min = beta_min
+        self.epsilon_t = epsilon_t
+    def noise_and_weight(self, shape, device, sds=False):
+        rnd_uniform = torch.rand([shape, 1, 1, 1], device=device)
+        if sds:
+            rnd_uniform = 0.02 + rnd_uniform*0.96 #Between O.O2 and 0.98, see https://github.com/ashawkey/stable-dreamfusion/blob/5550b91862a3af7842bb04875b7f1211e5095a63/guidance/sd_utils.py#L180
+        sigma = self.sigma(1 + rnd_uniform * (self.epsilon_t - 1))
+        weight = 1 / sigma ** 2
+        return sigma, weight
+    def __call__(self, net, x, latents, augment_pipe=None):
+        sigma, weight = self.noise_and_weight(x.shape[0], x.device)
+        n = torch.randn_like(x) * sigma
+        D_xn = net(x + n, sigma, latents)
+        loss = weight * ((D_xn - x) ** 2)
+        return loss
+    def sigma(self, t):
+        t = torch.as_tensor(t)
+        return ((0.5 * self.beta_d * (t ** 2) + self.beta_min * t).exp() - 1).sqrt()
+#----------------------------------------------------------------------------
+# Loss function corresponding to the variance exploding (VE) formulation
+# from the paper "Score-Based Generative Modeling through Stochastic
+# Differential Equations".
+@persistence.persistent_class
+class VELoss:
+    def __init__(self, sigma_min=0.02, sigma_max=100):
+        self.sigma_min = sigma_min
+        self.sigma_max = sigma_max
+    def noise_and_weight(self, shape, device, sds=False):
+        rnd_uniform = torch.rand([x.shape[0], 1], device=x.device)
+        sigma = self.sigma_min * ((self.sigma_max / self.sigma_min) ** rnd_uniform)
+        weight = 1 / sigma ** 2
+        return sigma, weight
+    def __call__(self, net, x, latents, augment_pipe=None):
+        sigma, weight = self.noise_and_weight(x.shape[0], x.device)
+        n = torch.randn_like(x) * sigma
+        D_xn = net(x + n, sigma, latents)
+        loss = weight * ((D_xn - x) ** 2)
+        return loss
+#----------------------------------------------------------------------------
+# Improved loss function proposed in the paper "Elucidating the Design Space
+# of Diffusion-Based Generative Models" (EDM).
+@persistence.persistent_class
+class EDMLoss:
+    def __init__(self, P_mean=-1.2, P_std=1.2, sigma_data=0.5):
+        self.P_mean = P_mean
+        self.P_std = P_std
+        self.sigma_data = sigma_data
+        self.sigma_min = 0.4
+        self.sigma_max = 10
+        self.rho=3
+    def noise_and_weight(self, shape, device, sds=False):
+        rnd_normal = torch.randn([shape, 1, 1, 1], device=device)
+        sigma = (rnd_normal * self.P_std + self.P_mean).exp()
+        weight = (sigma ** 2 + self.sigma_data ** 2) / (sigma * self.sigma_data) ** 2
+        return sigma.float(), weight.float()
+    def __call__(self, net, x, latents, augment_pipe=None):
+        sigma, weight = self.noise_and_weight(x.shape[0], x.device)
+        n = torch.randn_like(x) * sigma
+        D_xn = net(x + n, sigma, latents)
+        loss = weight * ((D_xn - x) ** 2)
+        return loss
+#----------------------------------------------------------------------------

diffu_models/precond.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import numpy as np
+import torch
+#----------------------------------------------------------------------------
+# Preconditioning corresponding to the variance exploding (VE) formulation
+# from the paper "Score-Based Generative Modeling through Stochastic
+# Differential Equations".
+class VEPrecond(torch.nn.Module):
+    def __init__(self,
+        model,
+        label_dim       = 0,                # Number of class labels, 0 = unconditional.
+        use_fp16        = False,        # Execute the underlying model at FP16 precision?
+        sigma_min       = 0.02,         # Minimum supported noise level.
+        sigma_max       = 100,          # Maximum supported noise level.
+    ):
+        super().__init__()
+        self.label_dim = label_dim
+        self.use_fp16 = use_fp16
+        self.sigma_min = sigma_min
+        self.sigma_max = sigma_max
+        self.model = model
+    def forward(self, x, sigma, class_labels=None, force_fp32=False, **model_kwargs):
+        sigma = sigma.to(torch.float32).reshape(-1, 1, 1, 1)
+        x = x.to(torch.float32)
+        class_labels = None if self.label_dim == 0 else torch.zeros([1, self.label_dim], device=x.device) if class_labels is None else class_labels.to(torch.float32).reshape(-1, self.label_dim)
+        dtype = torch.float16 if (self.use_fp16 and not force_fp32 and x.device.type == 'cuda') else torch.float32
+        c_skip = 1
+        c_out = sigma
+        c_in = 1
+        c_noise = (0.5 * sigma).log()
+        if class_labels is not None:
+            F_x = self.model((c_in * x).to(dtype), c_noise.flatten(), class_labels=class_labels, **model_kwargs)
+        else:
+            F_x = self.model((c_in * x).to(dtype), c_noise.flatten(), **model_kwargs)
+        assert F_x.dtype == dtype
+        D_x = c_skip * x + c_out * F_x.to(torch.float32)
+        return D_x
+    def round_sigma(self, sigma):
+        return torch.as_tensor(sigma)
+#----------------------------------------------------------------------------
+# Preconditioning corresponding to improved DDPM (iDDPM) formulation from
+# the paper "Improved Denoising Diffusion Probabilistic Models".
+class iDDPMPrecond(torch.nn.Module):
+    def __init__(self,
+        model,
+        label_dim       = 0,                # Number of class labels, 0 = unconditional.
+        use_fp16        = False,            # Execute the underlying model at FP16 precision?
+        C_1             = 0.001,            # Timestep adjustment at low noise levels.
+        C_2             = 0.008,            # Timestep adjustment at high noise levels.
+        M               = 1000,             # Original number of timesteps in the DDPM formulation.
+    ):
+        super().__init__()
+        self.label_dim = label_dim
+        self.use_fp16 = use_fp16
+        self.C_1 = C_1
+        self.C_2 = C_2
+        self.M = M
+        self.model = model
+        u = torch.zeros(M + 1)
+        for j in range(M, 0, -1): # M, ..., 1
+            u[j - 1] = ((u[j] ** 2 + 1) / (self.alpha_bar(j - 1) / self.alpha_bar(j)).clip(min=C_1) - 1).sqrt()
+        self.register_buffer('u', u)
+        self.sigma_min = float(u[M - 1])
+        self.sigma_max = float(u[0])
+    def forward(self, x, sigma, class_labels=None, lamb=None, force_fp32=False, **model_kwargs):
+        sigma = sigma.to(torch.float32).reshape(-1, 1, 1, 1)
+        x = x.to(torch.float32)
+        class_labels = None if self.label_dim == 0 else torch.zeros([1, self.label_dim], device=x.device) if class_labels is None else class_labels.to(torch.float32).reshape(-1, self.label_dim)
+        dtype = torch.float16 if (self.use_fp16 and not force_fp32 and x.device.type == 'cuda') else torch.float32
+        c_skip = 1
+        c_out = -sigma
+        c_in = 1 / (sigma ** 2 + 1).sqrt()
+        c_noise = self.M - 1 - self.round_sigma(sigma, return_index=True).to(torch.float32)
+        # if class_labels is not None:
+        #     F_x = self.model((c_in * x).to(dtype), c_noise.flatten(), class_labels=class_labels, **model_kwargs)
+        # else:
+        if lamb is not None:
+            F_x = self.model((c_in * x).to(dtype), lamb, c_noise.flatten(), **model_kwargs)
+        else:
+            F_x = self.model((c_in * x).to(dtype), c_noise.flatten(), **model_kwargs)
+        assert F_x.dtype == dtype
+        D_x = c_skip * x + c_out * F_x.to(torch.float32)
+        return D_x
+    def alpha_bar(self, j):
+        j = torch.as_tensor(j)
+        return (0.5 * np.pi * j / self.M / (self.C_2 + 1)).sin() ** 2
+    def round_sigma(self, sigma, return_index=False):
+        sigma = torch.as_tensor(sigma)
+        index = torch.cdist(sigma.to(self.u.device).to(torch.float32).reshape(1, -1, 1), self.u.reshape(1, -1, 1)).argmin(2)
+        result = index if return_index else self.u[index.flatten()].to(sigma.dtype)
+        return result.reshape(sigma.shape).to(sigma.device)
+#----------------------------------------------------------------------------
+# Improved preconditioning proposed in the paper "Elucidating the Design
+# Space of Diffusion-Based Generative Models" (EDM).
+class EDMPrecond(torch.nn.Module):
+    def __init__(self,
+        model,
+        label_dim       = 0,                # Number of class labels, 0 = unconditional.
+        use_fp16        = False,            # Execute the underlying model at FP16 precision?
+        sigma_min       = 0,                # Minimum supported noise level.
+        sigma_max       = float('inf'),     # Maximum supported noise level.
+        sigma_data      = 0.5,              # Expected standard deviation of the training data.
+    ):
+        super().__init__()
+        self.label_dim = label_dim
+        self.use_fp16 = use_fp16
+        self.sigma_min = sigma_min
+        self.sigma_max = sigma_max
+        self.sigma_data = sigma_data
+        self.model = model
+    def forward(self, x, sigma, class_labels=None, force_fp32=False, **model_kwargs):
+        x = x.to(torch.float32)
+        sigma = sigma.to(torch.float32).reshape(-1, 1, 1, 1)
+        if class_labels is not None:
+            if self.label_dim == 0:
+                class_labels = None
+            else:
+                class_labels = class_labels.to(torch.float32).reshape(-1, self.label_dim)
+        dtype = torch.float16 if (self.use_fp16 and not force_fp32 and x.device.type == 'cuda') else torch.float32
+        c_skip = self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2)
+        c_out = sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2).sqrt()
+        c_in = 1 / (self.sigma_data ** 2 + sigma ** 2).sqrt()
+        c_in = c_in.to(x.device)
+        c_noise = sigma.log() / 4
+        if class_labels is not None:
+            F_x = self.model((c_in * x).to(dtype), c_noise.flatten(), c_latent=class_labels, **model_kwargs)
+        else:
+            F_x = self.model((c_in * x).to(dtype), c_noise.flatten(), **model_kwargs)
+        assert F_x.dtype == dtype
+        D_x = c_skip * x + c_out * F_x.to(torch.float32)
+        return D_x
+    def round_sigma(self, sigma):
+        return torch.as_tensor(sigma)
+#----------------------------------------------------------------------------

diffu_models/sds.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import torch
+from torch.autograd import grad
+from torch.optim.lr_scheduler import _LRScheduler
+import torch.nn.functional as F
+class WarmupCosineDecayScheduler(_LRScheduler):
+    def __init__(self, optimizer, warmup_steps, total_steps, warmup_start_lr=1e-9, max_lr=1e-4, min_lr=1e-6, last_epoch=-1):
+        self.warmup_steps = warmup_steps
+        self.total_steps = total_steps
+        self.warmup_start_lr = warmup_start_lr
+        self.max_lr = max_lr
+        self.min_lr = min_lr
+        super(WarmupCosineDecayScheduler, self).__init__(optimizer, last_epoch)
+    def get_lr(self):
+        if self.last_epoch < self.warmup_steps:
+            # Linear warmup
+            lr = self.max_lr * self.last_epoch/self.warmup_steps + (1-self.last_epoch/self.warmup_steps) * self.warmup_start_lr
+        else:
+            # Cosine decay
+            cosine_decay = 0.5 * (1 + np.cos(torch.pi * (self.last_epoch - self.warmup_steps) / (self.total_steps - self.warmup_steps)))
+            decayed = (1 - self.min_lr / self.max_lr) * cosine_decay + self.min_lr / self.max_lr
+            lr = self.max_lr * decayed
+        return [lr for _ in self.base_lrs]
+def guidance_grad(pred_shape, net, scale_noise, grad_scale=1, batch_size=32, device="cpu", save_guidance_path=None):
+    # timestep ~ U(0.02, 0.98) to avoid very high/low noise level
+    sigma = 0.01 + torch.rand([batch_size, 1, 1, 1], device=device)*scale_noise
+    # predict the noise residual with unet, NO grad!
+    with torch.no_grad():
+        # sample noise
+        noise = torch.randn_like(pred_shape) * sigma
+        # pred noise
+        x = pred_shape+noise
+        denoised = net(x, sigma)
+    # w(t), sigma_t^2
+    grad = torch.mean(grad_scale * (pred_shape - denoised), dim=0)  # / sigma**2
+    #print(sigma.item()**2, weight.item(), torch.norm(pred_shape-denoised).item())
+    #print(grad)
+    grad = torch.nan_to_num(grad)
+    # if save_guidance_path:
+    #     with torch.no_grad():
+    #         if as_latent:
+    #             pred_rgb_512 = self.decode_latents(latents)
+    #         # visualize predicted denoised image
+    #         # The following block of code is equivalent to `predict_start_from_noise`...
+    #         # see zero123_utils.py's version for a simpler implementation.
+    #         alphas = self.scheduler.alphas.to(latents)
+    #         total_timesteps = self.max_step - self.min_step + 1
+    #         index = total_timesteps - t.to(latents.device) - 1
+    #         b = len(noise_pred)
+    #         a_t = alphas[index].reshape(b,1,1,1).to(self.device)
+    #         sqrt_one_minus_alphas = torch.sqrt(1 - alphas)
+    #         sqrt_one_minus_at = sqrt_one_minus_alphas[index].reshape((b,1,1,1)).to(self.device)
+    #         pred_x0 = (latents_noisy - sqrt_one_minus_at * noise_pred) / a_t.sqrt() # current prediction for x_0
+    #         result_hopefully_less_noisy_image = self.decode_latents(pred_x0.to(latents.type(self.precision_t)))
+    #         # visualize noisier image
+    #         result_noisier_image = self.decode_latents(latents_noisy.to(pred_x0).type(self.precision_t))
+    #         # TODO: also denoise all-the-way
+    #         # all 3 input images are [1, 3, H, W], e.g. [1, 3, 512, 512]
+    #         viz_images = torch.cat([pred_rgb_512, result_noisier_image, result_hopefully_less_noisy_image],dim=0)
+    #         save_image(viz_images, save_guidance_path)
+    return grad, denoised
+def guidance_loss(pred_shape, loss_sde, net, grad_scale=1, device="cpu", save_guidance_path=None):
+    grad = guidance_grad(pred_shape, loss_sde, net, grad_scale, device, save_guidance_path)
+    targets = (pred_shape - grad).detach()
+    loss = 0.5 * F.mse_loss(pred_shape.float(), targets, reduction='sum') / pred_shape.shape[0]
+    return loss

shape_data/__init__.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import sys
+import os.path as osp
+import numpy as np
+import torch
+from collections import defaultdict
+ROOT_DIR = osp.join(osp.abspath(osp.dirname(__file__)), '../')
+if ROOT_DIR not in sys.path:
+    sys.path.append(ROOT_DIR)
+DATA_DIRS = {
+    'faust': 'FAUST_r',
+    'faust_ori': 'FAUST_r_ori',
+    'scape': 'SCAPE_r',
+    'scape_ori': 'SCAPE_r_ori',
+    'smalr': 'SMAL_r',
+    'smalr_ori': 'SMAL_r_ori',
+    'shrec19': 'SHREC_r',
+    'shrec19_ori': 'SHREC_r_ori',
+    'dt4d': 'DT4D_r',
+    'dt4dintra': 'DT4D_r',
+    'dt4dintra_ori': 'DT4D_r_ori',
+    'dt4dinter': 'DT4D_r',
+    'dt4dinter_ori': 'DT4D_r_ori',
+    'tosca': 'TOSCA_r',
+    'tosca_ori': 'TOSCA_r',
+}
+def get_data_dirs(root, name, mode):
+    prefix = osp.join(root, DATA_DIRS[name])
+    shape_dir = osp.join(prefix, 'shapes')
+    corr_dir = osp.join(prefix, 'correspondences')
+    return shape_dir, DATA_DIRS[name], corr_dir
+# def collate_default(data_list):
+#     data_dict = defaultdict(list)
+#     for pair_dict in data_list:
+#         for k, v in pair_dict.items():
+#             data_dict[k].append(v)
+#     for k in data_dict.keys():
+#         if k.startswith('fmap') or k.startswith('evals') or k.endswith('_sub'):
+#             data_dict[k] = np.stack(data_dict[k], axis=0)
+#     batch_size = len(data_list)
+#     for k, v in data_dict.items():
+#         assert len(v) == batch_size
+#     return data_dict
+def prepare_batch(data_dict, device):
+    for k in data_dict.keys():
+        if isinstance(data_dict[k], np.ndarray):
+            data_dict[k] = torch.from_numpy(data_dict[k]).to(device)
+        else:
+            if k.startswith('gradX') or \
+               k.startswith('gradY') or \
+               k.startswith('L'):
+                from diffusion_net.utils import sparse_np_to_torch
+                tmp_list = [sparse_np_to_torch(st).to(device) for st in data_dict[k]]
+                if len(data_dict[k]) == 1:
+                    data_dict[k] = torch.stack(tmp_list, dim=0)
+                else:
+                    data_dict[k] = tmp_list
+            else:
+                if isinstance(data_dict[k][0], np.ndarray):
+                    tmp_list = [torch.from_numpy(st).to(device) for st in data_dict[k]]
+                    if len(data_dict[k]) == 1:
+                        data_dict[k] = torch.stack(tmp_list, dim=0).to(device)
+                    else:
+                        data_dict[k] = tmp_list
+    return data_dict

shape_data/data_utils.py ADDED Viewed

	@@ -0,0 +1,270 @@

+import scipy
+import scipy.sparse
+import scipy.sparse.linalg
+from scipy.io import loadmat
+import sys
+import os
+import os.path as osp
+import math
+import numpy as np
+import open3d as o3d
+import potpourri3d as pp3d
+import torch
+from pathlib import Path
+class CorrLoader(object):
+    def __init__(self, root_dir, data_type='mat'):
+        self.root_dir = root_dir
+        self.data_type = data_type
+    def get_by_names(self, sname0, sname1):
+        if self.data_type.endswith('mat'):
+            pmap10 = self._load_mat(osp.join(self.root_dir, f'{sname0}-{sname1}.mat'))
+            return np.stack((pmap10, np.arange(len(pmap10))), axis=1)
+        else:
+            raise RuntimeError(f'Data type {self.data_type} is not supported.')
+    def _load_mat(self, filepath):
+        data = loadmat(filepath)
+        pmap10 = np.squeeze(np.asarray(data['pmap10'], dtype=np.int32))
+        return pmap10
+# https://github.com/RobinMagnet/pyFM/blob/master/pyFM/signatures/HKS_functions.py
+def HKS(evals, evects, time_list, scaled=False):
+    evals_s = np.asarray(evals).flatten()
+    t_list = np.asarray(time_list).flatten()
+    coefs = np.exp(-np.outer(t_list, evals_s))
+    weighted_evects = evects[None, :, :] * coefs[:, None, :]
+    natural_HKS = np.einsum('tnk,nk->nt', weighted_evects, evects)
+    if scaled:
+        inv_scaling = coefs.sum(1)
+        return (1 / inv_scaling)[None, :] * natural_HKS
+    else:
+        return natural_HKS
+def lm_HKS(evals, evects, landmarks, time_list, scaled=False):
+    evals_s = np.asarray(evals).flatten()
+    t_list = np.asarray(time_list).flatten()
+    coefs = np.exp(-np.outer(t_list, evals_s))
+    weighted_evects = evects[None, landmarks, :] * coefs[:, None, :]
+    landmarks_HKS = np.einsum('tpk,nk->ptn', weighted_evects, evects)
+    if scaled:
+        inv_scaling = coefs.sum(1)
+        landmarks_HKS = (1 / inv_scaling)[None, :, None] * landmarks_HKS
+    return landmarks_HKS.reshape(-1, evects.shape[0]).T
+def auto_HKS(evals, evects, num_T, landmarks=None, scaled=True):
+    abs_ev = sorted(np.abs(evals))
+    t_list = np.geomspace(4 * np.log(10) / abs_ev[-1], 4 * np.log(10) / abs_ev[1], num_T)
+    if landmarks is None:
+        return HKS(abs_ev, evects, t_list, scaled=scaled)
+    else:
+        return lm_HKS(abs_ev, evects, landmarks, t_list, scaled=scaled)
+# https://github.com/RobinMagnet/pyFM/blob/master/pyFM/signatures/WKS_functions.py
+def WKS(evals, evects, energy_list, sigma, scaled=False):
+    assert sigma > 0, f"Sigma should be positive ! Given value : {sigma}"
+    evals = np.asarray(evals).flatten()
+    indices = np.where(evals > 1e-5)[0].flatten()
+    evals = evals[indices]
+    evects = evects[:, indices]
+    e_list = np.asarray(energy_list)
+    coefs = np.exp(-np.square(e_list[:, None] - np.log(np.abs(evals))[None, :]) / (2 * sigma**2))
+    weighted_evects = evects[None, :, :] * coefs[:, None, :]
+    natural_WKS = np.einsum('tnk,nk->nt', weighted_evects, evects)
+    if scaled:
+        inv_scaling = coefs.sum(1)
+        return (1 / inv_scaling)[None, :] * natural_WKS
+    else:
+        return natural_WKS
+def lm_WKS(evals, evects, landmarks, energy_list, sigma, scaled=False):
+    assert sigma > 0, f"Sigma should be positive ! Given value : {sigma}"
+    evals = np.asarray(evals).flatten()
+    indices = np.where(evals > 1e-2)[0].flatten()
+    evals = evals[indices]
+    evects = evects[:, indices]
+    e_list = np.asarray(energy_list)
+    coefs = np.exp(-np.square(e_list[:, None] - np.log(np.abs(evals))[None, :]) / (2 * sigma**2))
+    weighted_evects = evects[None, landmarks, :] * coefs[:, None, :]
+    landmarks_WKS = np.einsum('tpk,nk->ptn', weighted_evects, evects)
+    if scaled:
+        inv_scaling = coefs.sum(1)
+        landmarks_WKS = ((1 / inv_scaling)[None, :, None] * landmarks_WKS)
+    return landmarks_WKS.reshape(-1, evects.shape[0]).T
+def auto_WKS(evals, evects, num_E, landmarks=None, scaled=True):
+    abs_ev = sorted(np.abs(evals))
+    e_min, e_max = np.log(abs_ev[1]), np.log(abs_ev[-1])
+    sigma = 7 * (e_max - e_min) / num_E
+    e_min += 2 * sigma
+    e_max -= 2 * sigma
+    energy_list = np.linspace(e_min, e_max, num_E)
+    if landmarks is None:
+        return WKS(abs_ev, evects, energy_list, sigma, scaled=scaled)
+    else:
+        return lm_WKS(abs_ev, evects, landmarks, energy_list, sigma, scaled=scaled)
+def compute_hks(evecs, evals, mass, n_descr=100, subsample_step=5, n_eig=35):
+    feats = auto_HKS(evals[:n_eig], evecs[:, :n_eig], n_descr, scaled=True)
+    feats = feats[:, np.arange(0, feats.shape[1], subsample_step)]
+    feats_norm2 = np.einsum('np,np->p', feats, np.expand_dims(mass, 1) * feats).flatten()
+    feats /= np.expand_dims(np.sqrt(feats_norm2), 0)
+    return feats.astype(np.float32)
+def compute_wks(evecs, evals, mass, n_descr=100, subsample_step=5, n_eig=35):
+    feats = auto_WKS(evals[:n_eig], evecs[:, :n_eig], n_descr, scaled=True)
+    feats = feats[:, np.arange(0, feats.shape[1], subsample_step)]
+    feats_norm2 = np.einsum('np,np->p', feats, np.expand_dims(mass, 1) * feats).flatten()
+    feats /= np.expand_dims(np.sqrt(feats_norm2), 0)
+    return feats.astype(np.float32)
+def compute_geodesic_distance(V, F, vindices):
+    solver = pp3d.MeshHeatMethodDistanceSolver(np.asarray(V, dtype=np.float32), np.asarray(F, dtype=np.int32))
+    dists = [solver.compute_distance(vid)[vindices] for vid in vindices]
+    dists = np.stack(dists, axis=0)
+    assert dists.ndim == 2
+    return dists.astype(np.float32)
+def compute_vertex_normals(vertices, faces):
+    mesh = o3d.geometry.TriangleMesh(o3d.utility.Vector3dVector(vertices), o3d.utility.Vector3iVector(faces))
+    mesh.compute_vertex_normals()
+    return np.asarray(mesh.vertex_normals, dtype=np.float32)
+def compute_surface_area(vertices, faces):
+    mesh = o3d.geometry.TriangleMesh(o3d.utility.Vector3dVector(vertices), o3d.utility.Vector3iVector(faces))
+    return mesh.get_surface_area()
+def numpy_to_open3d_mesh(V, F):
+    # Create an empty TriangleMesh object
+    mesh = o3d.geometry.TriangleMesh()
+    # Set vertices
+    mesh.vertices = o3d.utility.Vector3dVector(V)
+    # Set triangles
+    mesh.triangles = o3d.utility.Vector3iVector(F)
+    return mesh
+def load_mesh(filepath, scale=True, return_vnormals=False):
+    if os.path.splitext(filepath)[1] == ".obj": #Avoid pre process from open3d
+        V, F = pp3d.read_mesh(filepath)
+        mesh = numpy_to_open3d_mesh(V, F)
+    else:
+        mesh = o3d.io.read_triangle_mesh(filepath)
+    tmat = np.identity(4, dtype=np.float32)
+    center = mesh.get_center()
+    tmat[:3, 3] = -center
+    if scale:
+        smat = np.identity(4, dtype=np.float32)
+        area = mesh.get_surface_area()
+        smat[:3, :3] = np.identity(3, dtype=np.float32) / math.sqrt(area)
+        tmat = smat @ tmat
+    mesh.transform(tmat)
+    vertices = np.asarray(mesh.vertices, dtype=np.float32)
+    faces = np.asarray(mesh.triangles, dtype=np.int32)
+    if return_vnormals:
+        mesh.compute_vertex_normals()
+        vnormals = np.asarray(mesh.vertex_normals, dtype=np.float32)
+        return vertices, faces, vnormals
+    else:
+        return vertices, faces
+def save_mesh(filepath, vertices, faces):
+    mesh = o3d.geometry.TriangleMesh(o3d.utility.Vector3dVector(vertices), o3d.utility.Vector3iVector(faces))
+    o3d.io.write_triangle_mesh(filepath, mesh)
+def load_geodist(filepath):
+    data = loadmat(filepath)
+    if 'geodist' in data and 'sqrt_area' in data:
+        geodist = np.asarray(data['geodist'], dtype=np.float32)
+        sqrt_area = data['sqrt_area'].toarray().flatten()[0]
+    elif 'G' in data and 'SQRarea' in data:
+        geodist = np.asarray(data['G'], dtype=np.float32)
+        sqrt_area = data['SQRarea'].flatten()[0]
+    else:
+        raise RuntimeError(f'File {filepath} does not have geodesics data.')
+    return geodist, sqrt_area
+def farthest_point_sampling(points, max_points, random_start=True):
+    import torch_cluster
+    if torch.is_tensor(points):
+        device = points.device
+        is_batch = points.dim() == 3
+        if not is_batch:
+            points = torch.unsqueeze(points, dim=0)
+        assert points.dim() == 3
+        B, N, D = points.size()
+        assert N >= max_points
+        bindices = torch.flatten(torch.unsqueeze(torch.arange(B), 1).repeat(1, N)).long().to(device)
+        points = torch.reshape(points, (B * N, D)).float()
+        sindices = torch_cluster.fps(points, bindices, ratio=float(max_points) / N, random_start=random_start)
+        if is_batch:
+            sindices = torch.reshape(sindices, (B, max_points)) - torch.unsqueeze(torch.arange(B), 1).long().to(device) * N
+    elif isinstance(points, np.ndarray):
+        device = torch.device('cpu')
+        is_batch = points.ndim == 3
+        if not is_batch:
+            points = np.expand_dims(points, axis=0)
+        assert points.ndim == 3
+        B, N, D = points.shape
+        assert N >= max_points
+        bindices = np.tile(np.expand_dims(np.arange(B), 1), (1, N)).flatten()
+        bindices = torch.as_tensor(bindices, device=device).long()
+        points = torch.as_tensor(np.reshape(points, (B * N, D)), device=device).float()
+        sindices = torch_cluster.fps(points, bindices, ratio=float(max_points) / N, random_start=random_start)
+        sindices = sindices.cpu().numpy()
+        if is_batch:
+            sindices = np.reshape(sindices, (B, max_points)) - np.expand_dims(np.arange(B), 1) * N
+    else:
+        raise NotImplementedError
+    return sindices
+def lstsq(A, B):
+    assert A.ndim == B.ndim == 2
+    sols = scipy.linalg.lstsq(A, B)[0]
+    return sols

shape_data/dt4dinter.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import os.path as osp
+import sys
+import numpy as np
+import itertools
+from pathlib import Path
+from collections import defaultdict
+ROOT_DIR = osp.join(osp.abspath(osp.dirname(__file__)), '../')
+if ROOT_DIR not in sys.path:
+    sys.path.append(ROOT_DIR)
+from .dt4dintra import IGNORED_CATEGORIES
+from .dt4dintra import ShapeDataset
+from .faust import ShapePairDataset as FaustShapePairDataset
+from utils.mesh import list_files
+#IGNORED_CATEGORIES = ["drake", "mannequin", "ninja", "prisoner", "zlorp", "pumpkinhulk"]
+IGNORED_CATEGORIES = ["pumpkinhulk"]
+class ShapePairDataset(FaustShapePairDataset):
+    def _init(self):
+        self.name_id_map = self.shape_data.get_name_id_map()
+        categories = defaultdict(list)
+        for sname in self.name_id_map.keys():
+            categories[sname.split('/')[0]].append(sname)
+        self.pair_indices = list()
+        for filename in list_files(osp.join(self.corr_dir, 'cross_category_corres'), '*.vts', alphanum_sort=False):
+            cname0, cname1 = filename[:-4].split('_')
+            if cname0 in IGNORED_CATEGORIES or cname1 in IGNORED_CATEGORIES:
+                continue
+            for sname0 in categories[cname0]:
+                for sname1 in categories[cname1]:
+                    self.pair_indices.append((self.name_id_map[sname0], self.name_id_map[sname1]))
+    def _load_corr_gt(self, sdict0, sdict1):
+        sname0 = sdict0['name']
+        sname1 = sdict1['name']
+        cname0 = sname0.split('/')[0]
+        cname1 = sname1.split('/')[0]
+        assert cname0 != cname1
+        lmk01 = self._load_corr_file(f'cross_category_corres/{cname0}_{cname1}')
+        corr0 = self._load_corr_file(sname0)
+        corr1 = self._load_corr_file(sname1)
+        corr_gt = np.stack((corr0, corr1[lmk01]), axis=1)
+        return corr_gt
+    def _load_corr_file(self, sname):
+        corr_path = osp.join(self.corr_dir, f'{sname}.vts')
+        corr = np.loadtxt(corr_path, dtype=np.int32)
+        return corr - 1

shape_data/dt4dintra.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os.path as osp
+import sys
+import numpy as np
+import itertools
+from pathlib import Path
+from collections import defaultdict
+ROOT_DIR = osp.join(osp.abspath(osp.dirname(__file__)), '../')
+if ROOT_DIR not in sys.path:
+    sys.path.append(ROOT_DIR)
+from .faust import ShapeDataset as FaustShapeDataset
+from .faust import ShapePairDataset as FaustShapePairDataset
+from utils.utils_legacy import read_lines
+IGNORED_CATEGORIES = ['pumpkinhulk']
+class ShapeDataset(FaustShapeDataset):
+    TRAIN_IDX = None
+    TEST_IDX = None
+    NAME = "DT4D"
+    def _get_file_list(self):
+        if self.mode.startswith('train'):
+            file_list = read_lines(osp.join(self.shape_dir, '..', 'train.txt'))
+        elif self.mode.startswith('test'):
+            file_list = read_lines(osp.join(self.shape_dir, '..', 'test.txt'))
+        else:
+            raise RuntimeError(f'Mode {self.mode} is not supported.')
+        shape_list = [fn + '.ply' for fn in file_list]
+        return shape_list
+class ShapePairDataset(FaustShapePairDataset):
+    def _init(self):
+        self.name_id_map = self.shape_data.get_name_id_map()
+        categories = defaultdict(list)
+        for sname in self.name_id_map.keys():
+            categories[sname.split('/')[0]].append(sname)
+        self.pair_indices = list()
+        for cname, clist in categories.items():
+            if cname in IGNORED_CATEGORIES:
+                continue
+            for pname in itertools.combinations(clist, 2):
+                self.pair_indices.append((self.name_id_map[pname[0]], self.name_id_map[pname[1]]))
+    def _load_corr_gt(self, sdict0, sdict1):
+        corr0 = self._load_corr_file(sdict0['name'])
+        corr1 = self._load_corr_file(sdict1['name'])
+        corr_gt = np.stack((corr0, corr1), axis=1)
+        return corr_gt
+    def _load_corr_file(self, sname):
+        corr_path = osp.join(self.corr_dir, f'{sname}.vts')
+        corr = np.loadtxt(corr_path, dtype=np.int32)
+        return corr - 1

shape_data/faust.py ADDED Viewed

	@@ -0,0 +1,408 @@

+import os.path as osp
+import sys
+import itertools
+import math
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from pathlib import Path
+import potpourri3d as pp3d
+import open3d as o3d
+from utils.geometry import get_operators, load_operators
+from utils.surfaces import Surface
+from utils.utils_func import may_create_folder
+from utils.mesh import find_mesh_files
+# def opt_rot_points(pts_1, pts_2, device="cuda:0"):
+#     center_1 = pts_1.mean(dim=0)
+#     pts_c1 = pts_1 - center_1
+#     center_2 = pts_2.mean(dim=0)
+#     pts_c2 = pts_2 - center_2
+#     to_sum = pts_c1[:, :, None] * pts_c2[:, None, :]
+#     A = pts_c1.T @ pts_c2
+#     #A = to_sum.sum(axis=0)
+#     u, _, v = torch.linalg.svd(A)
+#     a = torch.Tensor([[1, 0, 0], [0, 1, 0], [0, 0, torch.sign(torch.linalg.det(A))]]).float().to(device)
+#     O = u @ a @ v
+#     return O.T
+def opt_rot_points(pts_1, pts_2):
+    center_1 = pts_1.mean(axis=0)
+    pts_c1 = pts_1 - center_1
+    center_2 = pts_2.mean(axis=0)
+    pts_c2 = pts_2 - center_2
+    A = np.dot(pts_c1.T, pts_c2)
+    u, _, v = np.linalg.svd(A)
+    a = np.array([[1, 0, 0], [0, 1, 0], [0, 0, np.sign(np.linalg.det(A))]])
+    O = u @ a @ v
+    return O.T
+def compute_vertex_normals(vertices, faces):
+    mesh = o3d.geometry.TriangleMesh(o3d.utility.Vector3dVector(vertices), o3d.utility.Vector3iVector(faces))
+    mesh.compute_vertex_normals()
+    return np.asarray(mesh.vertex_normals, dtype=np.float32)
+def numpy_to_open3d_mesh(V, F):
+    # Create an empty TriangleMesh object
+    mesh = o3d.geometry.TriangleMesh()
+    # Set vertices
+    mesh.vertices = o3d.utility.Vector3dVector(V)
+    # Set triangles
+    mesh.triangles = o3d.utility.Vector3iVector(F)
+    return mesh
+def open_mesh(path):
+    """
+    Tries to open a mesh.
+    If it fails, try .ply, .obj, and .off alternatives.
+    Parameters
+    ----------
+    path : str
+        Path of the mesh
+    Returns
+    -------
+    mesh or None
+        Loaded mesh (V, F format) if successful, else None
+    """
+    p = Path(path)
+    base, ext = p.with_suffix(""), p.suffix
+    tried_exts = [ext, ".ply", ".obj", ".off"]
+    for e in tried_exts:
+        path = base.with_suffix(e)
+        if Path.exists(path):
+            try:
+                temp = pp3d.read_mesh(str(path))
+                return temp
+            except Exception as err:
+                print(f"Failed loading {path}: {err}")
+    return None
+KEYS = ['vertices', 'faces', 'frames', 'mass', 'L', 'evals', 'evecs', 'gradX', 'gradY', 'hks', 'wks', 'idx', 'name']
+class ShapeDataset(Dataset):
+    TRAIN_IDX = np.arange(0, 80)
+    TEST_IDX = np.arange(80, 100)
+    NAME = "FAUST"
+    def __init__(self,
+                 shape_dir,
+                 cache_dir,
+                 mode,
+                 oriented=False,
+                 rot_auto=False,
+                 num_eigenbasis=256,
+                 laplacian_type='mesh',
+                 feature_type=None,
+                 **kwargs):
+        super().__init__()
+        self.shape_dir = shape_dir
+        self.cache_dir = cache_dir
+        self.mode = mode
+        self.oriented = oriented
+        if self.oriented:
+            self.NAME = self.NAME + "_ori"
+        self.num_eigenbasis = num_eigenbasis
+        self.laplacian_type = laplacian_type
+        self.feature_type = feature_type
+        for k, w in kwargs.items():
+            setattr(self, k, w)
+        print(f'Loading {mode} data from {shape_dir}')
+        self.shape_list = self._get_file_list()
+        self._prepare()
+        self.randg = np.random.RandomState(0)
+    def _get_file_list(self):
+        path_list = find_mesh_files(Path(self.shape_dir), alphanum_sort=True)
+        file_list = [f.name for f in path_list]
+        if self.mode.startswith('train'):
+            assert self.TRAIN_IDX is not None
+            shape_list = [file_list[idx] for idx in self.TRAIN_IDX]
+        elif self.mode.startswith('test'):
+            assert self.TEST_IDX is not None
+            shape_list = [file_list[idx] for idx in self.TEST_IDX]
+        else:
+            raise RuntimeError(f'Mode {self.mode} is not supported.')
+        return shape_list
+    def _load_mesh(self, filepath, scale=True, return_vnormals=False):
+        V, F = open_mesh(filepath)
+        mesh = numpy_to_open3d_mesh(V, F)
+        tmat = np.identity(4, dtype=np.float32)
+        center = mesh.get_center()
+        tmat[:3, 3] = -center
+        if scale:
+            smat = np.identity(4, dtype=np.float32)
+            area = mesh.get_surface_area()
+            smat[:3, :3] = np.identity(3, dtype=np.float32) / math.sqrt(area)
+            tmat = smat @ tmat
+        mesh.transform(tmat)
+        vertices = np.asarray(mesh.vertices, dtype=np.float32)
+        faces = np.asarray(mesh.triangles, dtype=np.int32)
+        if return_vnormals:
+            mesh.compute_vertex_normals()
+            vnormals = np.asarray(mesh.vertex_normals, dtype=np.float32)
+            return vertices, faces, vnormals
+        else:
+            return vertices, faces
+    def _prepare(self):
+        may_create_folder(self.cache_dir)
+        for sid, sname in enumerate(self.shape_list):
+            cache_prefix = osp.join(self.cache_dir, self.NAME, f'{sname[:-4]}_{self.laplacian_type}_{self.num_eigenbasis}k')
+            cache_path = cache_prefix + '_0n.npz'
+            if not Path(cache_path).is_file():
+                vertices_np, faces_np, vertex_normals_np = self._load_mesh(osp.join(self.shape_dir, sname),
+                                                                       scale=True,
+                                                                       return_vnormals=True)
+                if self.laplacian_type == 'mesh':
+                    _ = get_operators(torch.from_numpy(vertices_np).float(), torch.from_numpy(faces_np).long(), self.num_eigenbasis, cache_path=cache_path)
+                # elif self.laplacian_type == 'pcd':
+                #     compute_operators(vertices_np, np.asarray([], dtype=np.int32), vertex_normals_np, self.num_eigenbasis,
+                #                       cache_path)
+                else:
+                    raise RuntimeError(f'Basis type {self.laplacian_type} is not supported')
+            # if self.aug_noise_type is not None and self.aug_noise_type != 'naive':
+            #     max_magnitude, max_levels = self.aug_noise_args[:2]
+            #     randg = np.random.RandomState(sid)
+            #     for nlevel in range(1, max_levels + 1):
+            #         cache_path = cache_prefix + f'_{nlevel}n.npz'
+            #         if Path(cache_path).is_file():
+            #             continue
+            #         noise_mag = max_magnitude * nlevel / max_levels
+            #         noise_mat = np.clip(noise_mag * randg.randn(vertices_np.shape[0], vertices_np.shape[1]), -noise_mag,
+            #                             noise_mag)
+            #         vertices_noise_np = vertices_np + noise_mat.astype(vertices_np.dtype)
+            #         vertex_normals_noise_np = compute_vertex_normals(vertices_noise_np, faces_np)
+            #         if self.laplacian_type == 'mesh':
+            #             compute_operators(vertices_noise_np, faces_np, vertex_normals_noise_np, self.num_eigenbasis, cache_path)
+            #         elif self.laplacian_type == 'pcd':
+            #             compute_operators(vertices_noise_np, np.asarray([], dtype=np.int32), vertex_normals_noise_np,
+            #                               self.num_eigenbasis, cache_path)
+            #         else:
+            #             raise RuntimeError(f'Basis type {self.laplacian_type} is not supported')
+    def __getitem__(self, idx):
+        sname = self.shape_list[idx]
+        cache_prefix = osp.join(self.cache_dir, self.NAME, f'{sname[:-4]}_{self.laplacian_type}_{self.num_eigenbasis}k')
+        cache_path = cache_prefix + '_0n.npz'
+        assert Path(cache_path).is_file()
+        sdict = load_operators(cache_path)
+        sdict['idx'] = idx
+        sdict['name'] = sname[:-4]
+        if self.feature_type is not None:
+            sdict['feats'] = np.concatenate([sdict[ft] for ft in self.feature_type.split('_')], axis=-1)
+        vertices_np, _, _ = self._load_mesh(osp.join(self.shape_dir, sname), scale=True, return_vnormals=True)
+        sdict['vertices'] = vertices_np
+        sdict = self._centering(sdict)
+        return sdict
+    def __len__(self):
+        return len(self.shape_list)
+    def _centering(self, sdict):
+        vertices, areas = sdict['vertices'], sdict["mass"]
+        center = (vertices*areas[:, None]).sum()/areas.sum()
+        sdict['vertices'] = vertices - center
+        return sdict
+    def _random_noise_naive(self, sdict, randg, args):
+        vertices = sdict['vertices']
+        dtype = vertices.dtype
+        shape = vertices.shape
+        std, clip = args
+        noise = np.clip(std * randg.randn(*shape), -clip, clip)
+        sdict['vertices'] = vertices + noise.astype(dtype)
+        return sdict
+    def _random_rotation(self, sdict, randg, axes, args):
+        vertices = sdict['vertices']
+        dtype = vertices.dtype
+        max_x, max_y, max_z = args
+        if 'x' in axes:
+            anglex = randg.rand() * max_x * np.pi / 180.0
+            cosx = np.cos(anglex)
+            sinx = np.sin(anglex)
+            Rx = np.asarray([[1, 0, 0], [0, cosx, -sinx], [0, sinx, cosx]], dtype=dtype)
+        else:
+            Rx = np.eye(3, dtype=dtype)
+        if 'y' in axes:
+            angley = randg.rand() * max_y * np.pi / 180.0
+            cosy = np.cos(angley)
+            siny = np.sin(angley)
+            Ry = np.asarray([[cosy, 0, siny], [0, 1, 0], [-siny, 0, cosy]], dtype=dtype)
+        else:
+            Ry = np.eye(3, dtype=dtype)
+        if 'z' in axes:
+            anglez = randg.rand() * max_z * np.pi / 180.0
+            cosz = np.cos(anglez)
+            sinz = np.sin(anglez)
+            Rz = np.asarray([[cosz, -sinz, 0], [sinz, cosz, 0], [0, 0, 1]], dtype=dtype)
+        else:
+            Rz = np.eye(3, dtype=dtype)
+        Rxyz = randg.permutation(np.stack((Rx, Ry, Rz), axis=0))
+        R = Rxyz[2] @ Rxyz[1] @ Rxyz[0]
+        sdict['vertices'] = vertices @ R.T
+        return sdict
+    def _random_scaling(self, sdict, randg, args):
+        scale_min, scale_max = args
+        vertices = sdict['vertices']
+        scale = scale_min + randg.rand(1, 3) * (scale_max - scale_min)
+        sdict['vertices'] = vertices * scale
+        return sdict
+    def get_name_id_map(self):
+        return {sname[:-4]: sid for sid, sname in enumerate(self.shape_list)}
+class ShapePairDataset(Dataset):
+    def __init__(self, corr_dir, mode, shape_data, rotate=False, **kwargs):
+        super().__init__()
+        self.corr_dir = corr_dir
+        self.mode = mode
+        self.shape_data = shape_data
+        self.rotate = rotate
+        if self.shape_data.oriented and self.rotate:
+            self.rotate = False
+        for k, w in kwargs.items():
+            setattr(self, k, w)
+        self._init()
+        self.randg = np.random.RandomState(0)
+    def _init(self):
+        self.name_id_map = self.shape_data.get_name_id_map()
+        self.pair_indices = list(itertools.combinations(range(len(self.shape_data)), 2))
+    def __getitem__(self, idx):
+        pidx = self.pair_indices[idx]
+        sdict0 = self.shape_data[pidx[0]]
+        sdict1 = self.shape_data[pidx[1]]
+        return self._prepare_pair(sdict0, sdict1)
+    def get_by_names(self, sname0, sname1):
+        sdict0 = self.shape_data[self.name_id_map[sname0]]
+        sdict1 = self.shape_data[self.name_id_map[sname1]]
+        return self._prepare_pair(sdict0, sdict1)
+    def _prepare_pair(self, sdict0, sdict1):
+        corr_gt = self._load_corr_gt(sdict0, sdict1)
+        # for fmap_size in self.fmap_sizes:
+        #     fmap01_gt = pmap_to_fmap(sdict0['evecs'][:, :fmap_size], sdict1['evecs'][:, :fmap_size], corr_gt)
+        #     pdict[f'fmap01_{fmap_size}_gt'] = fmap01_gt
+        # for idx in range(2):
+        #     indices_sel = farthest_point_sampling(pdict[f'vertices{idx}'], self.num_corrs, random_start=is_train)
+        #     for k in ['vertices', 'evecs', 'feats']:
+        #         kid = f'{k}{idx}'
+        #         if kid in pdict:
+        #             pdict[kid + '_sub'] = pdict[kid][indices_sel, :]
+        #     if self.use_geodists:
+        #         geodists = compute_geodesic_distance(pdict[f'vertices{idx}'], pdict[f'faces{idx}'], indices_sel)
+        #         pdict[f'geodists{idx}_sub'] = geodists
+        #     pdict[f'vindices{idx}_sub'] = indices_sel
+        # fmap_size = self.fmap_sizes[-1]
+        # corr_gt_sub = fmap_to_pmap(pdict['evecs0_sub'][:, :fmap_size], pdict['evecs1_sub'][:, :fmap_size],
+        #                            pdict[f'fmap01_{fmap_size}_gt'])
+        # pdict['corr_gt_sub'] = corr_gt_sub
+        # if is_train:
+        #     fmap_size = self.fmap_sizes[0]
+        #     axis = self.randg.choice([0, 1]).item()
+        #     max_bases = fmap_size // 2
+        #     noise_ratio = 0.5
+        #     if self.randg.rand() > 0.5:
+        #         pdict[f'fmap01_{fmap_size}'] = self._random_scale(pdict[f'fmap01_{fmap_size}_gt'], self.randg, axis, max_bases)
+        #     else:
+        #         pdict[f'fmap01_{fmap_size}'] = self._random_noise(pdict[f'fmap01_{fmap_size}_gt'], self.randg, axis, max_bases,
+        #                                                           noise_ratio)
+        # else:
+        #     if self.corr_loader is not None:
+        #         corr_init = self.corr_loader.get_by_names(sdict0['name'], sdict1['name'])
+        #         assert corr_init.ndim == 2 and len(corr_init) == len(sdict1['vertices'])
+        #         fmap_size = self.fmap_sizes[0]
+        #         fmap01_init = pmap_to_fmap(sdict0['evecs'][:, :fmap_size], sdict1['evecs'][:, :fmap_size], corr_init)
+        #         pdict[f'fmap01_{fmap_size}'] = fmap01_init
+        #         pdict['pmap10'] = corr_init[:, 0]
+        vts_1, vts_2 = corr_gt[:, 0], corr_gt[:, 1]
+        shape_dict, target_dict = sdict0, sdict1
+        if self.rotate:
+            pts_1, pts_2 = shape_dict['vertices'][vts_1], target_dict['vertices'][vts_2]
+            rot = opt_rot_points(pts_1, pts_2).astype(np.float32)#, device="cuda")
+            target_dict['vertices'] = target_dict['vertices'] @ rot
+        target_surf = Surface(FV=[target_dict['faces'], target_dict['vertices']])
+        target_normals = torch.from_numpy(target_surf.surfel/np.linalg.norm(target_surf.surfel, axis=-1, keepdims=True)).float().cuda()
+        shape_surf = Surface(FV=[shape_dict['faces'], shape_dict['vertices']])
+        map_info = (shape_dict['name'], vts_1, vts_2)
+        return shape_dict, shape_surf, target_dict, target_surf, target_normals, map_info
+    def _random_scale(self, fmap, randg, axis, max_bases):
+        assert max_bases > 1
+        assert axis in [0, 1]
+        num_bases = randg.randint(1, max_bases)
+        ids = randg.choice(fmap.shape[axis], num_bases, replace=False)
+        fmap_out = np.copy(fmap)
+        if axis == 0:
+            fmap_out[ids, :] *= (randg.rand(num_bases, 1) * 2 - 1)
+        else:
+            fmap_out[:, ids] *= (randg.rand(1, num_bases) * 2 - 1)
+        return fmap_out
+    def _random_noise(self, fmap, randg, axis, max_bases, max_ratio):
+        assert max_bases > 1
+        assert axis in [0, 1]
+        num_bases = randg.randint(1, max_bases)
+        ids = randg.choice(fmap.shape[axis], num_bases, replace=False)
+        fmap_out = np.copy(fmap)
+        ratio = randg.rand() * max_ratio
+        if axis == 0:
+            maxvals = np.amax(np.abs(fmap_out[ids, :]), axis=1 - axis, keepdims=True)
+            noise = ratio * maxvals * randg.randn(num_bases, fmap.shape[1 - axis])
+            fmap_out[ids, :] += noise
+        else:
+            maxvals = np.amax(np.abs(fmap_out[:, ids]), axis=1 - axis, keepdims=True)
+            noise = ratio * maxvals * randg.randn(fmap.shape[1 - axis], num_bases)
+            fmap_out[:, ids] += noise
+        return fmap_out
+    def _load_corr_gt(self, sdict0, sdict1):
+        corr0 = self._load_corr_file(sdict0['name'])
+        corr1 = self._load_corr_file(sdict1['name'])
+        corr_gt = np.stack((corr0, corr1), axis=1)
+        return corr_gt
+    def _load_corr_file(self, sname):
+        corr_path = osp.join(self.corr_dir, f'{sname}.vts')
+        corr = np.loadtxt(corr_path, dtype=np.int32)
+        return corr - 1
+    def __len__(self):
+        return len(self.pair_indices)

shape_data/scape.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import os.path as osp
+import sys
+import numpy as np
+from pathlib import Path
+ROOT_DIR = osp.join(osp.abspath(osp.dirname(__file__)), '../')
+if ROOT_DIR not in sys.path:
+    sys.path.append(ROOT_DIR)
+from shape_data.faust import ShapeDataset as FaustShapeDataset
+from shape_data.faust import ShapePairDataset
+class ShapeDataset(FaustShapeDataset):
+    TRAIN_IDX = np.arange(0, 51)
+    TEST_IDX = np.arange(51, 71)
+    NAME = "SCAPE"

shape_data/shrec19.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import os.path as osp
+import sys
+import numpy as np
+from pathlib import Path
+ROOT_DIR = osp.join(osp.abspath(osp.dirname(__file__)), '../')
+if ROOT_DIR not in sys.path:
+    sys.path.append(ROOT_DIR)
+from shape_data.faust import ShapeDataset as FaustShapeDataset
+from shape_data.faust import ShapePairDataset as FaustShapePairDataset
+from utils.io import list_files
+class ShapeDataset(FaustShapeDataset):
+    TRAIN_IDX = None
+    TEST_IDX = np.arange(44)
+    NAME = "SHREC"
+class ShapePairDataset(FaustShapePairDataset):
+    def _init(self):
+        assert self.mode.startswith('test')
+        self.name_id_map = self.shape_data.get_name_id_map()
+        self.pair_indices = list()
+        for corr_filename in list_files(self.corr_dir, '*.map', alphanum_sort=True):
+            sname0, sname1 = corr_filename[:-4].split('_')
+            if sname0 == '40' or sname1 == '40':
+                continue
+            self.pair_indices.append((self.name_id_map[sname1], self.name_id_map[sname0]))
+    def _load_corr_gt(self, sdict0, sdict1):
+        pmap10 = self._load_corr_file(sdict1['name'], sdict0['name'])
+        corr_gt = np.stack((pmap10, np.arange(len(pmap10))), axis=1)
+        return corr_gt
+    def _load_corr_file(self, sname0, sname1):
+        corr_path = osp.join(self.corr_dir, f'{sname0}_{sname1}.map')
+        corr = np.loadtxt(corr_path, dtype=np.int32)
+        return corr - 1

shape_data/smalr.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import os.path as osp
+import sys
+import numpy as np
+from pathlib import Path
+ROOT_DIR = osp.join(osp.abspath(osp.dirname(__file__)), '../')
+if ROOT_DIR not in sys.path:
+    sys.path.append(ROOT_DIR)
+from shape_data.faust import ShapeDataset as FaustShapeDataset
+from shape_data.faust import ShapePairDataset
+from utils.mesh import find_mesh_files
+class ShapeDataset(FaustShapeDataset):
+    TRAIN_IDX = None
+    TEST_IDX = None
+    NAME = "SMAL"
+    def _get_file_list(self):
+        if self.mode.startswith('train'):
+            categories = ['cow', 'dog', 'fox', 'lion', 'wolf']
+        elif self.mode.startswith('test'):
+            categories = ['cougar', 'hippo', 'horse']
+        else:
+            raise RuntimeError(f'Mode {self.mode} is not supported.')
+        path_list = find_mesh_files(Path(self.shape_dir), alphanum_sort=True)
+        file_list = [f.name for f in path_list]
+        shape_list = [fn for fn in file_list if fn.split('_')[0] in categories]
+        return shape_list

shape_data/tosca.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import os.path as osp
+import sys
+import numpy as np
+import re
+from pathlib import Path
+from itertools import permutations as pmt
+ROOT_DIR = osp.join(osp.abspath(osp.dirname(__file__)), '../')
+if ROOT_DIR not in sys.path:
+    sys.path.append(ROOT_DIR)
+from shape_data.faust import ShapeDataset as FaustShapeDataset
+from shape_data.faust import ShapePairDataset as FaustShapePairDataset
+from utils.io import list_files
+def contains_any_regex(substrings, ext, texts):
+    pattern = re.compile('|'.join(map(re.escape, substrings)))  # Compile regex once
+    return [text for text in texts if bool(pattern.search(text)) and (ext in text)]  # Apply to all texts efficiently
+class ShapeDataset(FaustShapeDataset):
+    TRAIN_IDX = None
+    TEST_IDX = None
+    def _get_file_list(self):
+        if self.mode.startswith('train'):
+            categories = None
+        elif self.mode.startswith('test'):
+            categories = ['cat', 'dog', 'horse', 'wolf']
+        else:
+            raise RuntimeError(f'Mode {self.mode} is not supported.')
+        file_list = list_files(self.shape_dir, '*.off', alphanum_sort=True)
+        shape_list = contains_any_regex(categories, ".off", file_list)
+        return shape_list
+class ShapePairDataset(FaustShapePairDataset):
+    categories = ['cat', 'dog', 'horse', 'wolf']
+    def _init(self):
+        assert self.mode.startswith('test')
+        self.name_id_map = self.shape_data.get_name_id_map()
+        self.pair_indices = list()
+        for cat in self.categories:
+            shape_list_temp = [self.name_id_map[fn] for fn in self.name_id_map if cat in fn]
+            self.pair_indices += list(pmt(shape_list_temp, 2))

snk/__init__.py ADDED Viewed

File without changes

snk/loss.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from trimesh.graph import face_adjacency
+import torch
+import torch.nn as nn
+class PrismRegularizationLoss(nn.Module):
+    """
+    Calculate the loss based on the PriMo energy, as described in the paper:
+    PriMo: Coupled Prisms for Intuitive Surface Modeling
+    """
+    def __init__(self, primo_h):
+        super().__init__()
+        self.h = primo_h
+        # compute coefficient for the energy
+        indices = torch.tensor([(i, j) for i in range(2) for j in range(2)])
+        indices_A = indices.repeat_interleave(4, dim=0)
+        indices_B = indices.repeat(4, 1)
+        self.coeff = (torch.ones(1) * 2).pow(((indices_A - indices_B).abs() * -1).sum(dim=1))[None, :]
+    def forward(self, transformed_prism, rotations, verts, faces, normals):
+        # transformed_prism is (n_faces, 3, 3)
+        # verts and faces are from the template (shape 2)
+        # * for now assumes there is only one batch
+        # todo add batch support
+        bs = 1
+        verts = verts.reshape(-1, 3)
+        normals = normals.reshape(-1, 3)
+        faces = faces
+        # get the area of each face
+        face_areas = self.get_face_areas(verts, faces)  # (n_faces,)
+        # get list of edges and the faces that share each edge
+        face_ids, edges = face_adjacency(faces.cpu().numpy(), return_edges=True)  # (n_edges, 2), (n_edges, 2)
+        face_ids, edges = torch.from_numpy(face_ids).to(verts.device), torch.from_numpy(edges).to(verts.device)
+        # normals and rotations of the faces that share each edge
+        normals1, normals2 = normals[edges[:, 0]], normals[edges[:, 1]]  # (n_edges, 3), normals are per vertex
+        rotations1, rotations2 = rotations[face_ids[:, 0]], rotations[face_ids[:, 1]]  # (n_edges, 3, 3), rotations are per face
+        # computed normals from the transformed prism
+        # normals = self.compute_normals(transformed_prism)
+        # compute the loss
+        face_id1, face_id2 = face_ids[:, 0], face_ids[:, 1]  # (n_edges,)
+        faces_to_verts = self.get_verts_id_face(faces, edges, face_ids)  # (n_edges, 4)
+        verts1_p1, verts2_p1 = transformed_prism[face_id1, faces_to_verts[:, 0]], transformed_prism[face_id1, faces_to_verts[:, 1]]  # (n_edges, 3)
+        verts1_p2, verts2_p2 = transformed_prism[face_id2, faces_to_verts[:, 2]], transformed_prism[face_id2, faces_to_verts[:, 3]]  # (n_edges, 3)
+        # get the normals per vertex
+        # normals1, normals2 = normals[face_id1], normals[face_id2]  # (n_edges, 3)  # normals per face (NOT USED)
+        prism1_n1, prism1_n2 = (normals1[:, None] @ rotations1).squeeze(1), (normals2[:, None] @ rotations1).squeeze(1)  # todo check if this is correct
+        prism2_n1, prism2_n2 = (normals1[:, None] @ rotations2).squeeze(1), (normals2[:, None] @ rotations2).squeeze(1)
+        # get the coordinates of the face of the prism
+        # prism1 (1 -> 2)
+        f_p1_00, f_p1_01 = verts1_p1 + prism1_n1 * self.h, verts2_p1 + prism1_n2 * self.h  # (n_edges, 3)
+        f_p1_10, f_p1_11 = verts1_p1 - prism1_n1 * self.h, verts2_p1 - prism1_n2 * self.h  # (n_edges, 3)
+        # prism2 (2 -> 1)
+        f_p2_00, f_p2_01 = verts1_p2 + prism2_n1 * self.h, verts2_p2 + prism2_n2 * self.h  # (n_edges, 3)
+        f_p2_10, f_p2_11 = verts1_p2 - prism2_n1 * self.h, verts2_p2 - prism2_n2 * self.h  # (n_edges, 3)
+        # compute the energy
+        A, B = torch.stack((f_p1_00, f_p1_01, f_p1_10, f_p1_11), dim=1), torch.stack((f_p2_00, f_p2_01, f_p2_10, f_p2_11), dim=1)  # (n_edges, 4, 3)
+        energy = self.compute_energy(A - B, A - B)  # (n_edges,)
+        # compute weight
+        area1, area2 = face_areas[face_id1], face_areas[face_id2]  # (n_edges,)
+        weight = torch.norm(verts[edges[:, 0]] - verts[edges[:, 1]], dim=1).square() / (area1 + area2)  # (n_edges,)
+        # weight = torch.ones_like(weight).to(weight.device)  # todo remove
+        energy = energy * weight  # (n_edges,)
+        loss = energy.sum() / bs  # todo when batch enabled, need to divide by batch size
+        return loss
+    def compute_energy(self, A, B):
+        """
+        Computes the formula sum_{i,j,k,l=0}^{1} a_{ij}b_{kl} 2^{-|i - k| - |j - l|}.
+        Assumes that A and B are tensors of size bs x 4 x 3, where bs is the batch size.
+        """
+        self.coeff = self.coeff.to(A.device)
+        A_repeated = A.repeat_interleave(4, dim=1)
+        B_repeated = B.repeat(1, 4, 1)
+        energy = (A_repeated * B_repeated).sum(dim=-1)
+        energy = (energy * self.coeff).sum(dim=1)
+        energy = energy / 9
+        return energy
+    def get_face_areas(self, verts, faces):
+        # get the area of each face
+        v1, v2, v3 = verts[faces[:, 0]], verts[faces[:, 1]], verts[faces[:, 2]]
+        area = 0.5 * torch.cross(v2 - v1, v3 - v1, dim=-1).norm(dim=1)
+        return area
+    def get_verts_id_face(self, F, E, Q):
+        e = E.shape[0]
+        Z = torch.zeros((e, 4), dtype=torch.long)
+        v1 = F[:, 0][Q[:, 0]]
+        v2 = F[:, 1][Q[:, 0]]
+        v3 = F[:, 2][Q[:, 0]]
+        v4 = F[:, 0][Q[:, 1]]
+        v5 = F[:, 1][Q[:, 1]]
+        v6 = F[:, 2][Q[:, 1]]
+        idx1 = torch.where(v1 == E[:, 0], 0, torch.where(v2 == E[:, 0], 1, torch.where(v3 == E[:, 0], 2, -1)))
+        idx2 = torch.where(v1 == E[:, 1], 0, torch.where(v2 == E[:, 1], 1, torch.where(v3 == E[:, 1], 2, -1)))
+        idx3 = torch.where(v4 == E[:, 0], 0, torch.where(v5 == E[:, 0], 1, torch.where(v6 == E[:, 0], 2, -1)))
+        idx4 = torch.where(v4 == E[:, 1], 0, torch.where(v5 == E[:, 1], 1, torch.where(v6 == E[:, 1], 2, -1)))
+        Z[:, 0:2] = torch.stack((idx1, idx2), dim=1)
+        Z[:, 2:4] = torch.stack((idx3, idx4), dim=1)
+        Z = Z.to(F.device)
+        return Z

snk/prism_decoder.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import torch
+import torch.nn as nn
+import roma
+from shape_models.layers import DiffusionNet
+class PrismDecoder(torch.nn.Module):
+    def __init__(self, dim_in=1024, dim_out=512, n_width=256, n_block=4, pairwise_dot=True, dropout=False, dot_linear_complex=True, neig=128):
+        super().__init__()
+        self.diffusion_net = DiffusionNet(
+             C_in=dim_in,
+             C_out=dim_out,
+             C_width=n_width,
+             N_block=n_block,
+             dropout=dropout,
+             with_gradient_features=pairwise_dot,
+             with_gradient_rotations=dot_linear_complex,
+        )
+        self.mlp_refine = nn.Sequential(
+            nn.Linear(dim_out, dim_out),
+            nn.ReLU(),
+            nn.Linear(dim_out, 512),
+            nn.ReLU(),
+            nn.Linear(512, 256),
+            nn.ReLU(),
+            nn.Linear(256, 12),
+        )
+    def forward(self, batch_dict, latent):
+        # original prism
+        try:
+            verts = batch_dict["vertices"]
+        except:
+            verts = batch_dict["verts"]
+        faces = batch_dict["faces"]
+        prism_base = verts[faces]  # (n_faces, 3, 3)
+        bs = 1
+        # forward through diffusion net
+        features = self.diffusion_net(latent, batch_dict["mass"], batch_dict["L"], evals=batch_dict["evals"],
+                               evecs=batch_dict["evecs"], gradX=batch_dict["gradX"], gradY=batch_dict["gradY"], faces=batch_dict["faces"])  # (bs, n_verts, dim)
+        # features per face
+        x_gather = features.unsqueeze(-1).expand(-1, -1, 3)
+        faces_gather = faces.unsqueeze(1).expand(-1, features.shape[-1], -1)
+        xf = torch.gather(x_gather, 0, faces_gather)
+        features = torch.mean(xf, dim=-1)  # (bs, n_faces, dim)
+        # refine features with mlp
+        features = self.mlp_refine(features)  # (bs, n_faces, 12)
+        # get the translation and rotation
+        rotations = features[:, :9].reshape(-1, 3, 3)
+        rotations = roma.special_procrustes(rotations)  # (n_faces, 3, 3)
+        translations = features[:, 9:].reshape(-1, 3)  # (n_faces, 3)
+        # transform the prism
+        transformed_prism = (prism_base @ rotations) + translations[:, None]
+        # prism to vertices
+        features = self.prism_to_vertices(transformed_prism, faces, verts)
+        out_features = features.reshape(bs, -1, 3)
+        transformed_prism = transformed_prism
+        rotations = rotations
+        return out_features, transformed_prism, rotations
+    def prism_to_vertices(self, prism, faces, verts):
+        # initialize the transformed features tensor
+        N = verts.shape[0]
+        d = prism.shape[-1]
+        device = prism.device
+        features = torch.zeros((N, d), device=device)
+        # scatter the features in K onto L using the indices in F
+        features.scatter_add_(0, faces[:, :, None].repeat(1, 1, d).reshape(-1, d), prism.reshape(-1, d))
+        # divide each row in the transformed features tensor by the number of faces that the corresponding vertex appears in
+        num_faces_per_vertex = torch.zeros(N, dtype=torch.float32, device=device)
+        num_faces_per_vertex.index_add_(0, faces.reshape(-1), torch.ones(faces.shape[0] * 3, device=device))
+        features /= num_faces_per_vertex.unsqueeze(1).clamp(min=1)
+        return features