Spaces:

taesiri
/

ConvolutionalHoughMatchingNetworks

Running

App Files Files Community

taesiri commited on Nov 24, 2021

Commit

8390f90

•

1 Parent(s): ec6b92a

Initial Commit

Browse files

Files changed (31) hide show

README.md +3 -27
app.py +149 -0
common/__pycache__/evaluation.cpython-38.pyc +0 -0
common/__pycache__/logger.cpython-38.pyc +0 -0
common/evaluation.py +32 -0
common/logger.py +117 -0
data/__pycache__/dataset.cpython-38.pyc +0 -0
data/__pycache__/download.cpython-38.pyc +0 -0
data/__pycache__/pfpascal.cpython-38.pyc +0 -0
data/__pycache__/pfwillow.cpython-38.pyc +0 -0
data/__pycache__/spair.cpython-38.pyc +0 -0
data/dataset.py +140 -0
data/download.py +91 -0
data/pfpascal.py +108 -0
data/pfwillow.py +56 -0
data/spair.py +105 -0
model/__pycache__/chmlearner.cpython-38.pyc +0 -0
model/__pycache__/chmnet.cpython-38.pyc +0 -0
model/base/__pycache__/backbone.cpython-38.pyc +0 -0
model/base/__pycache__/chm.cpython-38.pyc +0 -0
model/base/__pycache__/chm_kernel.cpython-38.pyc +0 -0
model/base/__pycache__/correlation.cpython-38.pyc +0 -0
model/base/__pycache__/geometry.cpython-38.pyc +0 -0
model/base/backbone.py +136 -0
model/base/chm.py +190 -0
model/base/chm_kernel.py +66 -0
model/base/correlation.py +68 -0
model/base/geometry.py +133 -0
model/chmlearner.py +52 -0
model/chmnet.py +42 -0
requirements.txt +10 -0

README.md CHANGED Viewed

@@ -1,37 +1,13 @@
 ---
 title: ConvolutionalHoughMatchingNetworks
 emoji: 📚
-colorFrom: green
 colorTo: yellow
 sdk: gradio
 app_file: app.py
 pinned: false
 ---
-# Configuration
-`title`: _string_
-Display title for the Space
-`emoji`: _string_
-Space emoji (emoji-only character allowed)
-`colorFrom`: _string_
-Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
-`colorTo`: _string_
-Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
-`sdk`: _string_
-Can be either `gradio` or `streamlit`
-`sdk_version` : _string_
-Only applicable for `streamlit` SDK.
-See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
-`app_file`: _string_
-Path to your main application file (which contains either `gradio` or `streamlit` Python code).
-Path is relative to the root of the repository.
-`pinned`: _boolean_
-Whether the Space stays on top of your list.

 ---
 title: ConvolutionalHoughMatchingNetworks
 emoji: 📚
+colorFrom: red
 colorTo: yellow
 sdk: gradio
 app_file: app.py
 pinned: false
 ---
+# Convolutional Hough Matching Networks
+A demo for Convolutional Hough Matching Networks. [[Paper](https://arxiv.org/abs/2109.05221)] [[Official Github Repo](https://github.com/juhongm999/chm.git)]

app.py ADDED Viewed

	@@ -0,0 +1,149 @@

+from torch.utils.data import DataLoader
+import torch
+from model.base.geometry import Geometry
+from common.evaluation import Evaluator
+from common.logger import AverageMeter
+from common.logger import Logger
+from data import download
+from model import chmnet
+from matplotlib import pyplot as plt
+from matplotlib.patches import ConnectionPatch
+from PIL import Image
+import numpy as np
+import os
+import torchvision
+import torchvision.transforms as transforms
+import torchvision.transforms.functional as TF
+import torchvision.models as models
+import torch.nn as nn
+import torch.nn.functional as F
+import random
+import gradio as gr
+# Downloading the Model
+torchvision.datasets.utils.download_file_from_google_drive('1zsJRlAsoOn5F0GTCprSFYwDDfV85xDy6', '.', 'pas_psi.pt')
+# Model Initialization
+args = dict({
+    'alpha' : [0.05, 0.1],
+    'benchmark':'pfpascal',
+    'bsz':90,
+    'datapath':'../Datasets_CHM',
+    'img_size':240,
+    'ktype':'psi',
+    'load':'pas_psi.pt',
+    'thres':'img'
+    })
+model = chmnet.CHMNet(args['ktype'])
+model.load_state_dict(torch.load(args['load'], map_location=torch.device('cpu')))
+Evaluator.initialize(args['alpha'])
+Geometry.initialize(img_size=args['img_size'])
+model.eval();
+# Transforms
+chm_transform = transforms.Compose(
+   [transforms.Resize(args['img_size']),
+    transforms.CenterCrop((args['img_size'], args['img_size'])),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
+chm_transform_plot = transforms.Compose(
+   [transforms.Resize(args['img_size']),
+    transforms.CenterCrop((args['img_size'], args['img_size']))])
+# A Helper Function
+to_np = lambda x: x.data.to('cpu').numpy()
+# Colors for Plotting
+cmap = matplotlib.cm.get_cmap('Spectral')
+rgba = cmap(0.5)
+colors = []
+for k in range(49):
+  colors.append(cmap(k/49.0))
+# CHM MODEL
+def run_chm(source_image, target_image, selected_points, number_src_points , chm_transform, display_transform):
+      # Convert to Tensor
+  src_img_tnsr = chm_transform(source_image).unsqueeze(0)
+  tgt_img_tnsr = chm_transform(target_image).unsqueeze(0)
+  # Selected_points = selected_points.T
+  keypoints = torch.tensor(selected_points).unsqueeze(0)
+  n_pts = torch.tensor(np.asarray([number_src_points]))
+  # RUN CHM ------------------------------------------------------------------------
+  with torch.no_grad():
+    corr_matrix = model(src_img_tnsr, tgt_img_tnsr)
+    prd_kps = Geometry.transfer_kps(corr_matrix, keypoints, n_pts, normalized=False)
+  # VISUALIZATION
+  src_points = keypoints[0].squeeze(0).squeeze(0).numpy()
+  tgt_points = prd_kps[0].squeeze(0).squeeze(0).cpu().numpy()
+  src_points_converted  = []
+  w, h = display_transform(source_image).size
+  for x,y in zip(src_points[0], src_points[1]):
+    src_points_converted.append([int(x*w/args['img_size']),int((y)*h/args['img_size'])])
+  src_points_converted = np.asarray(src_points_converted[:number_src_points])
+  tgt_points_converted  = []
+  w, h = display_transform(target_image).size
+  for x, y in zip(tgt_points[0], tgt_points[1]):
+    tgt_points_converted.append([int(((x+1)/2.0)*w),int(((y+1)/2.0)*h)])
+  tgt_points_converted = np.asarray(tgt_points_converted[:number_src_points])
+  tgt_grid  = []
+  for x, y in zip(tgt_points[0], tgt_points[1]):
+    tgt_grid.append([int(((x+1)/2.0)*7),int(((y+1)/2.0)*7)])
+  # PLOT
+  fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 8))
+  ax[0].imshow(display_transform(source_image))
+  ax[0].scatter(src_points_converted[:, 0], src_points_converted[:, 1], c=colors[:number_src_points])
+  ax[0].set_title('Source')
+  ax[0].set_xticks([])
+  ax[0].set_yticks([])
+  ax[1].imshow(display_transform(target_image))
+  ax[1].scatter(tgt_points_converted[:, 0], tgt_points_converted[:, 1], c=colors[:number_src_points])
+  ax[1].set_title('Target')
+  ax[1].set_xticks([])
+  ax[1].set_yticks([])
+  for TL in range(49):
+    ax[0].text(x=src_points_converted[TL][0], y=src_points_converted[TL][1], s=str(TL), fontdict=dict(color='red', size=10))
+  for TL in range(49):
+    ax[1].text(x=tgt_points_converted[TL][0], y=tgt_points_converted[TL][1], s=f'{str(TL)}', fontdict=dict(color='orange', size=8))
+  plt.tight_layout()
+  fig.suptitle('CHM Correspondences\nUsing $\it{pas\_psi.pt}$ Weights ', fontsize=16)
+  return fig
+# Wrapper
+def generate_correspondences(sousrce_image, target_image, min_x=1, max_x=100, min_y=1, max_y=100):
+  A = np.linspace(min_x, max_x, 7)
+  B = np.linspace(min_y, max_y, 7)
+  point_list = list(product(A, B))
+  new_points = np.asarray(point_list, dtype=np.float64).T
+  return run_chm(sousrce_image, target_image, selected_points=new_points, number_src_points=49, chm_transform=chm_transform, display_transform=chm_transform_plot)
+# GRADIO APP
+iface = gr.Interface(fn=generate_correspondences,
+                     inputs=[gr.inputs.Image(shape=(240, 240), type='pil'),
+                             gr.inputs.Image(shape=(240, 240), type='pil'),
+                             gr.inputs.Slider(minimum=1, maximum=240, step=1, default=15, label='MinX'),
+                             gr.inputs.Slider(minimum=1, maximum=240, step=1, default=215, label='MaxX'),
+                             gr.inputs.Slider(minimum=1, maximum=240, step=1, default=15, label='MinY'),
+                             gr.inputs.Slider(minimum=1, maximum=240, step=1, default=215, label='MaxY')], outputs="plot")
+iface.launch()

common/__pycache__/evaluation.cpython-38.pyc ADDED Viewed

Binary file (1.3 kB). View file

common/__pycache__/logger.cpython-38.pyc ADDED Viewed

Binary file (4.23 kB). View file

common/evaluation.py ADDED Viewed

	@@ -0,0 +1,32 @@

+r""" Evaluates CHMNet with PCK """
+import torch
+class Evaluator:
+    r""" Computes evaluation metrics of PCK """
+    @classmethod
+    def initialize(cls, alpha):
+        cls.alpha = torch.tensor(alpha).unsqueeze(1)
+    @classmethod
+    def evaluate(cls, prd_kps, batch):
+        r""" Compute percentage of correct key-points (PCK) with multiple alpha {0.05, 0.1, 0.15 }"""
+        pcks = []
+        for idx, (pk, tk) in enumerate(zip(prd_kps, batch['trg_kps'])):
+            pckthres = batch['pckthres'][idx]
+            npt = batch['n_pts'][idx]
+            prd_kps = pk[:, :npt]
+            trg_kps = tk[:, :npt]
+            l2dist = (prd_kps - trg_kps).pow(2).sum(dim=0).pow(0.5).unsqueeze(0).repeat(len(cls.alpha), 1)
+            thres = pckthres.expand_as(l2dist).float() * cls.alpha
+            pck = torch.le(l2dist, thres).sum(dim=1) / float(npt)
+            if len(pck) == 1: pck = pck[0]
+            pcks.append(pck)
+        eval_result = {'pck': pcks}
+        return eval_result

common/logger.py ADDED Viewed

	@@ -0,0 +1,117 @@

+r""" Logging """
+import datetime
+import logging
+import os
+from tensorboardX import SummaryWriter
+import torch
+class Logger:
+    r""" Writes results of training/testing """
+    @classmethod
+    def initialize(cls, args, training):
+        logtime = datetime.datetime.now().__format__('_%m%d_%H%M%S')
+        logpath = args.logpath if training else '_TEST_' + args.load.split('/')[-1].split('.')[0] + logtime
+        if logpath == '': logpath = logtime
+        cls.logpath = os.path.join('logs', logpath + '.log')
+        cls.benchmark = args.benchmark
+        os.makedirs(cls.logpath)
+        logging.basicConfig(filemode='w',
+                            filename=os.path.join(cls.logpath, 'log.txt'),
+                            level=logging.INFO,
+                            format='%(message)s',
+                            datefmt='%m-%d %H:%M:%S')
+        # Console log config
+        console = logging.StreamHandler()
+        console.setLevel(logging.INFO)
+        formatter = logging.Formatter('%(message)s')
+        console.setFormatter(formatter)
+        logging.getLogger('').addHandler(console)
+        # Tensorboard writer
+        cls.tbd_writer = SummaryWriter(os.path.join(cls.logpath, 'tbd/runs'))
+        # Log arguments
+        if training:
+            logging.info(':======== Convolutional Hough Matching Networks =========')
+            for arg_key in args.__dict__:
+                logging.info('| %20s: %-24s' % (arg_key, str(args.__dict__[arg_key])))
+            logging.info(':========================================================\n')
+    @classmethod
+    def info(cls, msg):
+        r""" Writes message to .txt """
+        logging.info(msg)
+    @classmethod
+    def save_model(cls, model, epoch, val_pck):
+        torch.save(model.state_dict(), os.path.join(cls.logpath, 'pck_best_model.pt'))
+        cls.info('Model saved @%d w/ val. PCK: %5.2f.\n' % (epoch, val_pck))
+class AverageMeter:
+    r""" Stores loss, evaluation results, selected layers """
+    def __init__(self, benchamrk):
+        r""" Constructor of AverageMeter """
+        self.buffer_keys = ['pck']
+        self.buffer = {}
+        for key in self.buffer_keys:
+            self.buffer[key] = []
+        self.loss_buffer = []
+    def update(self, eval_result, loss=None):
+        for key in self.buffer_keys:
+            self.buffer[key] += eval_result[key]
+        if loss is not None:
+            self.loss_buffer.append(loss)
+    def write_result(self, split, epoch):
+        msg = '\n*** %s ' % split
+        msg += '[@Epoch %02d] ' % epoch
+        if len(self.loss_buffer) > 0:
+            msg += 'Loss: %5.2f  ' % (sum(self.loss_buffer) / len(self.loss_buffer))
+        for key in self.buffer_keys:
+            msg += '%s: %6.2f  ' % (key.upper(), sum(self.buffer[key]) / len(self.buffer[key]))
+        msg += '***\n'
+        Logger.info(msg)
+    def write_process(self, batch_idx, datalen, epoch):
+        msg = '[Epoch: %02d] ' % epoch
+        msg += '[Batch: %04d/%04d] ' % (batch_idx+1, datalen)
+        if len(self.loss_buffer) > 0:
+            msg += 'Loss: %5.2f  ' % self.loss_buffer[-1]
+            msg += 'Avg Loss: %5.5f  ' % (sum(self.loss_buffer) / len(self.loss_buffer))
+        for key in self.buffer_keys:
+            msg += 'Avg %s: %5.2f  ' % (key.upper(), sum(self.buffer[key]) / len(self.buffer[key]) * 100)
+        Logger.info(msg)
+    def write_test_process(self, batch_idx, datalen):
+        msg = '[Batch: %04d/%04d] ' % (batch_idx+1, datalen)
+        for key in self.buffer_keys:
+            if key == 'pck':
+                pcks = torch.stack(self.buffer[key]).mean(dim=0) * 100
+                val = ''
+                for p in pcks:
+                    val += '%5.2f   ' % p.item()
+                msg += 'Avg %s: %s   ' % (key.upper(), val)
+            else:
+                msg += 'Avg %s: %5.2f  ' % (key.upper(), sum(self.buffer[key]) / len(self.buffer[key]))
+        Logger.info(msg)
+    def get_test_result(self):
+        result = {}
+        for key in self.buffer_keys:
+            result[key] = torch.stack(self.buffer[key]).mean(dim=0) * 100
+        return result

data/__pycache__/dataset.cpython-38.pyc ADDED Viewed

Binary file (3.95 kB). View file

data/__pycache__/download.cpython-38.pyc ADDED Viewed

Binary file (2.56 kB). View file

data/__pycache__/pfpascal.cpython-38.pyc ADDED Viewed

Binary file (3.91 kB). View file

data/__pycache__/pfwillow.cpython-38.pyc ADDED Viewed

Binary file (2.85 kB). View file

data/__pycache__/spair.cpython-38.pyc ADDED Viewed

Binary file (5.51 kB). View file

data/dataset.py ADDED Viewed

	@@ -0,0 +1,140 @@

+r""" Superclass for semantic correspondence datasets """
+import os
+from torch.utils.data import Dataset
+from torchvision import transforms
+from PIL import Image
+import torch
+from model.base.geometry import Geometry
+class CorrespondenceDataset(Dataset):
+    r""" Parent class of PFPascal, PFWillow, and SPair """
+    def __init__(self, benchmark, datapath, thres, split):
+        r""" CorrespondenceDataset constructor """
+        super(CorrespondenceDataset, self).__init__()
+        # {Directory name, Layout path, Image path, Annotation path, PCK threshold}
+        self.metadata = {
+            'pfwillow': ('PF-WILLOW',
+                         'test_pairs.csv',
+                         '',
+                         '',
+                         'bbox'),
+            'pfpascal': ('PF-PASCAL',
+                         '_pairs.csv',
+                         'JPEGImages',
+                         'Annotations',
+                         'img'),
+            'spair':    ('SPair-71k',
+                         'Layout/large',
+                         'JPEGImages',
+                         'PairAnnotation',
+                         'bbox')
+        }
+        # Directory path for train, val, or test splits
+        base_path = os.path.join(os.path.abspath(datapath), self.metadata[benchmark][0])
+        if benchmark == 'pfpascal':
+            self.spt_path = os.path.join(base_path, split+'_pairs.csv')
+        elif benchmark == 'spair':
+            self.spt_path = os.path.join(base_path, self.metadata[benchmark][1], split+'.txt')
+        else:
+            self.spt_path = os.path.join(base_path, self.metadata[benchmark][1])
+        # Directory path for images
+        self.img_path = os.path.join(base_path, self.metadata[benchmark][2])
+        # Directory path for annotations
+        if benchmark == 'spair':
+            self.ann_path = os.path.join(base_path, self.metadata[benchmark][3], split)
+        else:
+            self.ann_path = os.path.join(base_path, self.metadata[benchmark][3])
+        # Miscellaneous
+        self.max_pts = 40
+        self.split = split
+        self.img_size = Geometry.img_size
+        self.benchmark = benchmark
+        self.range_ts = torch.arange(self.max_pts)
+        self.thres = self.metadata[benchmark][4] if thres == 'auto' else thres
+        self.transform = transforms.Compose([transforms.Resize((self.img_size, self.img_size)),
+                                             transforms.ToTensor(),
+                                             transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                                                  std=[0.229, 0.224, 0.225])])
+        # To get initialized in subclass constructors
+        self.train_data = []
+        self.src_imnames = []
+        self.trg_imnames = []
+        self.cls = []
+        self.cls_ids = []
+        self.src_kps = []
+        self.trg_kps = []
+    def __len__(self):
+        r""" Returns the number of pairs """
+        return len(self.train_data)
+    def __getitem__(self, idx):
+        r""" Constructs and return a batch """
+        # Image name
+        batch = dict()
+        batch['src_imname'] = self.src_imnames[idx]
+        batch['trg_imname'] = self.trg_imnames[idx]
+        # Object category
+        batch['category_id'] = self.cls_ids[idx]
+        batch['category'] = self.cls[batch['category_id']]
+        # Image as numpy (original width, original height)
+        src_pil = self.get_image(self.src_imnames, idx)
+        trg_pil = self.get_image(self.trg_imnames, idx)
+        batch['src_imsize'] = src_pil.size
+        batch['trg_imsize'] = trg_pil.size
+        # Image as tensor
+        batch['src_img'] = self.transform(src_pil)
+        batch['trg_img'] = self.transform(trg_pil)
+        # Key-points (re-scaled)
+        batch['src_kps'], num_pts = self.get_points(self.src_kps, idx, src_pil.size)
+        batch['trg_kps'], _ = self.get_points(self.trg_kps, idx, trg_pil.size)
+        batch['n_pts'] = torch.tensor(num_pts)
+        # Total number of pairs in training split
+        batch['datalen'] = len(self.train_data)
+        return batch
+    def get_image(self, imnames, idx):
+        r""" Reads PIL image from path """
+        path = os.path.join(self.img_path, imnames[idx])
+        return Image.open(path).convert('RGB')
+    def get_pckthres(self, batch, imsize):
+        r""" Computes PCK threshold """
+        if self.thres == 'bbox':
+            bbox = batch['trg_bbox'].clone()
+            bbox_w = (bbox[2] - bbox[0])
+            bbox_h = (bbox[3] - bbox[1])
+            pckthres = torch.max(bbox_w, bbox_h)
+        elif self.thres == 'img':
+            imsize_t = batch['trg_img'].size()
+            pckthres = torch.tensor(max(imsize_t[1], imsize_t[2]))
+        else:
+            raise Exception('Invalid pck threshold type: %s' % self.thres)
+        return pckthres.float()
+    def get_points(self, pts_list, idx, org_imsize):
+        r""" Returns key-points of an image """
+        xy, n_pts = pts_list[idx].size()
+        pad_pts = torch.zeros((xy, self.max_pts - n_pts)) - 2
+        x_crds = pts_list[idx][0] * (self.img_size / org_imsize[0])
+        y_crds = pts_list[idx][1] * (self.img_size / org_imsize[1])
+        kps = torch.cat([torch.stack([x_crds, y_crds]), pad_pts], dim=1)
+        return kps, n_pts

data/download.py ADDED Viewed

	@@ -0,0 +1,91 @@

+r""" Functions to download semantic correspondence datasets """
+import tarfile
+import os
+import requests
+from . import pfpascal
+from . import pfwillow
+from . import spair
+def load_dataset(benchmark, datapath, thres, split='test'):
+    r""" Instantiate a correspondence dataset """
+    correspondence_benchmark = {
+        'spair': spair.SPairDataset,
+        'pfpascal': pfpascal.PFPascalDataset,
+        'pfwillow': pfwillow.PFWillowDataset
+    }
+    dataset = correspondence_benchmark.get(benchmark)
+    if dataset is None:
+        raise Exception('Invalid benchmark dataset %s.' % benchmark)
+    return dataset(benchmark, datapath, thres, split)
+def download_from_google(token_id, filename):
+    r""" Download desired filename from Google drive """
+    print('Downloading %s ...' % os.path.basename(filename))
+    url = 'https://docs.google.com/uc?export=download'
+    destination = filename + '.tar.gz'
+    session = requests.Session()
+    response = session.get(url, params={'id': token_id}, stream=True)
+    token = get_confirm_token(response)
+    if token:
+        params = {'id': token_id, 'confirm': token}
+        response = session.get(url, params=params, stream=True)
+    save_response_content(response, destination)
+    file = tarfile.open(destination, 'r:gz')
+    print("Extracting %s ..." % destination)
+    file.extractall(filename)
+    file.close()
+    os.remove(destination)
+    os.rename(filename, filename + '_tmp')
+    os.rename(os.path.join(filename + '_tmp', os.path.basename(filename)), filename)
+    os.rmdir(filename+'_tmp')
+def get_confirm_token(response):
+    r"""Retrieves confirm token"""
+    for key, value in response.cookies.items():
+        if key.startswith('download_warning'):
+            return value
+    return None
+def save_response_content(response, destination):
+    r"""Saves the response to the destination"""
+    chunk_size = 32768
+    with open(destination, "wb") as file:
+        for chunk in response.iter_content(chunk_size):
+            if chunk:
+                file.write(chunk)
+def download_dataset(datapath, benchmark):
+    r"""Downloads semantic correspondence benchmark dataset from Google drive"""
+    if not os.path.isdir(datapath):
+        os.mkdir(datapath)
+    file_data = {
+        # 'spair': ('1s73NVEFPro260H1tXxCh1ain7oApR8of', 'SPair-71k') old version
+        'spair': ('1KSvB0k2zXA06ojWNvFjBv0Ake426Y76k', 'SPair-71k'),
+        'pfpascal': ('1OOwpGzJnTsFXYh-YffMQ9XKM_Kl_zdzg', 'PF-PASCAL'),
+        'pfwillow': ('1tDP0y8RO5s45L-vqnortRaieiWENQco_', 'PF-WILLOW')
+    }
+    file_id, filename = file_data[benchmark]
+    abs_filepath = os.path.join(datapath, filename)
+    if not os.path.isdir(abs_filepath):
+        download_from_google(file_id, abs_filepath)

data/pfpascal.py ADDED Viewed

	@@ -0,0 +1,108 @@

+r""" PF-PASCAL dataset """
+import os
+import scipy.io as sio
+import pandas as pd
+import numpy as np
+import torch
+from .dataset import CorrespondenceDataset
+class PFPascalDataset(CorrespondenceDataset):
+    def __init__(self, benchmark, datapath, thres, split):
+        r""" PF-PASCAL dataset constructor """
+        super(PFPascalDataset, self).__init__(benchmark, datapath, thres, split)
+        self.train_data = pd.read_csv(self.spt_path)
+        self.src_imnames = np.array(self.train_data.iloc[:, 0])
+        self.trg_imnames = np.array(self.train_data.iloc[:, 1])
+        self.cls = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
+                    'bus', 'car', 'cat', 'chair', 'cow',
+                    'diningtable', 'dog', 'horse', 'motorbike', 'person',
+                    'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
+        self.cls_ids = self.train_data.iloc[:, 2].values.astype('int') - 1
+        if split == 'trn':
+            self.flip = self.train_data.iloc[:, 3].values.astype('int')
+        self.src_kps = []
+        self.trg_kps = []
+        self.src_bbox = []
+        self.trg_bbox = []
+        for src_imname, trg_imname, cls in zip(self.src_imnames, self.trg_imnames, self.cls_ids):
+            src_anns = os.path.join(self.ann_path, self.cls[cls],
+                                    os.path.basename(src_imname))[:-4] + '.mat'
+            trg_anns = os.path.join(self.ann_path, self.cls[cls],
+                                    os.path.basename(trg_imname))[:-4] + '.mat'
+            src_kp = torch.tensor(read_mat(src_anns, 'kps')).float()
+            trg_kp = torch.tensor(read_mat(trg_anns, 'kps')).float()
+            src_box = torch.tensor(read_mat(src_anns, 'bbox')[0].astype(float))
+            trg_box = torch.tensor(read_mat(trg_anns, 'bbox')[0].astype(float))
+            src_kps = []
+            trg_kps = []
+            for src_kk, trg_kk in zip(src_kp, trg_kp):
+                if len(torch.isnan(src_kk).nonzero()) != 0 or \
+                        len(torch.isnan(trg_kk).nonzero()) != 0:
+                    continue
+                else:
+                    src_kps.append(src_kk)
+                    trg_kps.append(trg_kk)
+            self.src_kps.append(torch.stack(src_kps).t())
+            self.trg_kps.append(torch.stack(trg_kps).t())
+            self.src_bbox.append(src_box)
+            self.trg_bbox.append(trg_box)
+        self.src_imnames = list(map(lambda x: os.path.basename(x), self.src_imnames))
+        self.trg_imnames = list(map(lambda x: os.path.basename(x), self.trg_imnames))
+    def __getitem__(self, idx):
+        r""" Constructs and returns a batch for PF-PASCAL dataset """
+        batch = super(PFPascalDataset, self).__getitem__(idx)
+        # Object bounding-box (resized following self.img_size)
+        batch['src_bbox'] = self.get_bbox(self.src_bbox, idx, batch['src_imsize'])
+        batch['trg_bbox'] = self.get_bbox(self.trg_bbox, idx, batch['trg_imsize'])
+        batch['pckthres'] = self.get_pckthres(batch,  batch['trg_imsize'])
+        # Horizontal flipping key-points during training
+        if self.split == 'trn' and self.flip[idx]:
+            self.horizontal_flip(batch)
+            batch['flip'] = 1
+        else:
+            batch['flip'] = 0
+        return batch
+    def get_bbox(self, bbox_list, idx, imsize):
+        r""" Returns object bounding-box """
+        bbox = bbox_list[idx].clone()
+        bbox[0::2] *= (self.img_size / imsize[0])
+        bbox[1::2] *= (self.img_size / imsize[1])
+        return bbox
+    def horizontal_flip(self, batch):
+        tmp = batch['src_bbox'][0].clone()
+        batch['src_bbox'][0] = batch['src_img'].size(2) - batch['src_bbox'][2]
+        batch['src_bbox'][2] = batch['src_img'].size(2) - tmp
+        tmp = batch['trg_bbox'][0].clone()
+        batch['trg_bbox'][0] = batch['trg_img'].size(2) - batch['trg_bbox'][2]
+        batch['trg_bbox'][2] = batch['trg_img'].size(2) - tmp
+        batch['src_kps'][0][:batch['n_pts']] = batch['src_img'].size(2) - batch['src_kps'][0][:batch['n_pts']]
+        batch['trg_kps'][0][:batch['n_pts']] = batch['trg_img'].size(2) - batch['trg_kps'][0][:batch['n_pts']]
+        batch['src_img'] = torch.flip(batch['src_img'], dims=(2,))
+        batch['trg_img'] = torch.flip(batch['trg_img'], dims=(2,))
+def read_mat(path, obj_name):
+    r""" Reads specified objects from Matlab data file. (.mat) """
+    mat_contents = sio.loadmat(path)
+    mat_obj = mat_contents[obj_name]
+    return mat_obj

data/pfwillow.py ADDED Viewed

	@@ -0,0 +1,56 @@

+r""" PF-WILLOW dataset """
+import os
+import pandas as pd
+import numpy as np
+import torch
+from .dataset import CorrespondenceDataset
+class PFWillowDataset(CorrespondenceDataset):
+    def __init__(self, benchmark, datapath, thres, split):
+        r"""PF-WILLOW dataset constructor"""
+        super(PFWillowDataset, self).__init__(benchmark, datapath, thres, split)
+        self.train_data = pd.read_csv(self.spt_path)
+        self.src_imnames = np.array(self.train_data.iloc[:, 0])
+        self.trg_imnames = np.array(self.train_data.iloc[:, 1])
+        self.src_kps = self.train_data.iloc[:, 2:22].values
+        self.trg_kps = self.train_data.iloc[:, 22:].values
+        self.cls = ['car(G)', 'car(M)', 'car(S)', 'duck(S)',
+                    'motorbike(G)', 'motorbike(M)', 'motorbike(S)',
+                    'winebottle(M)', 'winebottle(wC)', 'winebottle(woC)']
+        self.cls_ids = list(map(lambda names: self.cls.index(names.split('/')[1]), self.src_imnames))
+        self.src_imnames = list(map(lambda x: os.path.join(*x.split('/')[1:]), self.src_imnames))
+        self.trg_imnames = list(map(lambda x: os.path.join(*x.split('/')[1:]), self.trg_imnames))
+    def __getitem__(self, idx):
+        r""" Constructs and returns a batch for PF-WILLOW dataset """
+        batch = super(PFWillowDataset, self).__getitem__(idx)
+        batch['pckthres'] = self.get_pckthres(batch)
+        return batch
+    def get_pckthres(self, batch):
+        r""" Computes PCK threshold """
+        if self.thres == 'bbox':
+            return max(batch['trg_kps'].max(1)[0] - batch['trg_kps'].min(1)[0]).clone()
+        elif self.thres == 'img':
+            return torch.tensor(max(batch['trg_img'].size()[1], batch['trg_img'].size()[2]))
+        else:
+            raise Exception('Invalid pck evaluation level: %s' % self.thres)
+    def get_points(self, pts_list, idx, org_imsize):
+        r""" Returns key-points of an image """
+        point_coords = pts_list[idx, :].reshape(2, 10)
+        point_coords = torch.tensor(point_coords.astype(np.float32))
+        xy, n_pts = point_coords.size()
+        pad_pts = torch.zeros((xy, self.max_pts - n_pts)) - 2
+        x_crds = point_coords[0] * (self.img_size / org_imsize[0])
+        y_crds = point_coords[1] * (self.img_size / org_imsize[1])
+        kps = torch.cat([torch.stack([x_crds, y_crds]), pad_pts], dim=1)
+        return kps, n_pts

data/spair.py ADDED Viewed

	@@ -0,0 +1,105 @@

+r""" SPair-71k dataset """
+import json
+import glob
+import os
+import torch.nn.functional as F
+import torch
+from PIL import Image
+import numpy as np
+from .dataset import CorrespondenceDataset
+class SPairDataset(CorrespondenceDataset):
+    def __init__(self, benchmark, datapath, thres, split):
+        r""" SPair-71k dataset constructor """
+        super(SPairDataset, self).__init__(benchmark, datapath, thres, split)
+        self.train_data = open(self.spt_path).read().split('\n')
+        self.train_data = self.train_data[:len(self.train_data) - 1]
+        self.src_imnames = list(map(lambda x: x.split('-')[1] + '.jpg', self.train_data))
+        self.trg_imnames = list(map(lambda x: x.split('-')[2].split(':')[0] + '.jpg', self.train_data))
+        self.seg_path = os.path.abspath(os.path.join(self.img_path, os.pardir, 'Segmentation'))
+        self.cls = os.listdir(self.img_path)
+        self.cls.sort()
+        anntn_files = []
+        for data_name in self.train_data:
+            anntn_files.append(glob.glob('%s/%s.json' % (self.ann_path, data_name))[0])
+        anntn_files = list(map(lambda x: json.load(open(x)), anntn_files))
+        self.src_kps = list(map(lambda x: torch.tensor(x['src_kps']).t().float(), anntn_files))
+        self.trg_kps = list(map(lambda x: torch.tensor(x['trg_kps']).t().float(), anntn_files))
+        self.src_bbox = list(map(lambda x: torch.tensor(x['src_bndbox']).float(), anntn_files))
+        self.trg_bbox = list(map(lambda x: torch.tensor(x['trg_bndbox']).float(), anntn_files))
+        self.cls_ids = list(map(lambda x: self.cls.index(x['category']), anntn_files))
+        self.vpvar = list(map(lambda x: torch.tensor(x['viewpoint_variation']), anntn_files))
+        self.scvar = list(map(lambda x: torch.tensor(x['scale_variation']), anntn_files))
+        self.trncn = list(map(lambda x: torch.tensor(x['truncation']), anntn_files))
+        self.occln = list(map(lambda x: torch.tensor(x['occlusion']), anntn_files))
+    def __getitem__(self, idx):
+        r""" Construct and return a batch for SPair-71k dataset """
+        sample = super(SPairDataset, self).__getitem__(idx)
+        sample['src_mask'] = self.get_mask(sample, sample['src_imname'])
+        sample['trg_mask'] = self.get_mask(sample, sample['trg_imname'])
+        sample['src_bbox'] = self.get_bbox(self.src_bbox, idx, sample['src_imsize'])
+        sample['trg_bbox'] = self.get_bbox(self.trg_bbox, idx, sample['trg_imsize'])
+        sample['pckthres'] = self.get_pckthres(sample,  sample['trg_imsize'])
+        sample['vpvar'] = self.vpvar[idx]
+        sample['scvar'] = self.scvar[idx]
+        sample['trncn'] = self.trncn[idx]
+        sample['occln'] = self.occln[idx]
+        return sample
+    def get_mask(self, sample, imname):
+        mask_path = os.path.join(self.seg_path, sample['category'], imname.split('.')[0] + '.png')
+        tensor_mask = torch.tensor(np.array(Image.open(mask_path)))
+        class_dict = {'aeroplane': 0, 'bicycle': 1, 'bird': 2, 'boat': 3, 'bottle': 4,
+                      'bus': 5, 'car': 6, 'cat': 7, 'chair': 8, 'cow': 9,
+                      'diningtable': 10, 'dog': 11, 'horse': 12, 'motorbike': 13, 'person': 14,
+                      'pottedplant': 15, 'sheep': 16, 'sofa': 17, 'train': 18, 'tvmonitor': 19}
+        class_id = class_dict[sample['category']] + 1
+        tensor_mask[tensor_mask != class_id] = 0
+        tensor_mask[tensor_mask == class_id] = 255
+        tensor_mask = F.interpolate(tensor_mask.unsqueeze(0).unsqueeze(0).float(),
+                                    size=(self.img_size, self.img_size),
+                                    mode='bilinear', align_corners=True).int().squeeze()
+        return tensor_mask
+    def get_image(self, img_names, idx):
+        r""" Return image tensor """
+        path = os.path.join(self.img_path, self.cls[self.cls_ids[idx]], img_names[idx])
+        return Image.open(path).convert('RGB')
+    def get_pckthres(self, sample, imsize):
+        r""" Compute PCK threshold """
+        return super(SPairDataset, self).get_pckthres(sample, imsize)
+    def get_points(self, pts_list, idx, imsize):
+        r""" Return key-points of an image """
+        return super(SPairDataset, self).get_points(pts_list, idx, imsize)
+    def match_idx(self, kps, n_pts):
+        r""" Sample the nearst feature (receptive field) indices """
+        return super(SPairDataset, self).match_idx(kps, n_pts)
+    def get_bbox(self, bbox_list, idx, imsize):
+        r""" Return object bounding-box """
+        bbox = bbox_list[idx].clone()
+        bbox[0::2] *= (self.img_size / imsize[0])
+        bbox[1::2] *= (self.img_size / imsize[1])
+        return bbox

model/__pycache__/chmlearner.cpython-38.pyc ADDED Viewed

Binary file (1.85 kB). View file

model/__pycache__/chmnet.cpython-38.pyc ADDED Viewed

Binary file (1.8 kB). View file

model/base/__pycache__/backbone.cpython-38.pyc ADDED Viewed

Binary file (4.14 kB). View file

model/base/__pycache__/chm.cpython-38.pyc ADDED Viewed

Binary file (6.85 kB). View file

model/base/__pycache__/chm_kernel.cpython-38.pyc ADDED Viewed

Binary file (2.03 kB). View file

model/base/__pycache__/correlation.cpython-38.pyc ADDED Viewed

Binary file (2.09 kB). View file

model/base/__pycache__/geometry.cpython-38.pyc ADDED Viewed

Binary file (4.69 kB). View file

model/base/backbone.py ADDED Viewed

	@@ -0,0 +1,136 @@

+r""" ResNet-101 backbone network """
+import torch.utils.model_zoo as model_zoo
+import torch.nn as nn
+import torch
+__all__ = ['Backbone', 'resnet101']
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+def conv3x3(in_planes, out_planes, stride=1):
+    r""" 3x3 convolution with padding """
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, groups=2, bias=False)
+def conv1x1(in_planes, out_planes, stride=1):
+    r""" 1x1 convolution """
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, groups=2, bias=False)
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = conv1x1(inplanes, planes)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = conv3x3(planes, planes, stride)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = conv1x1(planes, planes * self.expansion)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class Backbone(nn.Module):
+    def __init__(self, block, layers, zero_init_residual=False):
+        super(Backbone, self).__init__()
+        self.inplanes = 128
+        self.conv1 = nn.Conv2d(6, 128, kernel_size=7, stride=2, padding=3, groups=2,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(128)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 128, layers[0])
+        self.layer2 = self._make_layer(block, 256, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 512, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 1024, layers[3], stride=2)
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512 * block.expansion, 1000)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+def resnet101(pretrained=False, **kwargs):
+    """Constructs a ResNet-101 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = Backbone(Bottleneck, [3, 4, 23, 3], **kwargs)
+    if pretrained:
+        weights = model_zoo.load_url(model_urls['resnet101'])
+        for key in weights:
+            if key.split('.')[0] == 'fc':
+                weights[key] = weights[key].clone()
+                continue
+            weights[key] = torch.cat([weights[key].clone(), weights[key].clone()], dim=0)
+        model.load_state_dict(weights)
+    return model

model/base/chm.py ADDED Viewed

	@@ -0,0 +1,190 @@

+r""" 4D and 6D convolutional Hough matching layers """
+from torch.nn.modules.conv import _ConvNd
+import torch.nn.functional as F
+import torch.nn as nn
+import torch
+from common.logger import Logger
+from . import chm_kernel
+def fast4d(corr, kernel, bias=None):
+    r""" Optimized implementation of 4D convolution """
+    bsz, ch, srch, srcw, trgh, trgw = corr.size()
+    out_channels, _, kernel_size, kernel_size, kernel_size, kernel_size = kernel.size()
+    psz = kernel_size // 2
+    out_corr = torch.zeros((bsz, out_channels, srch, srcw, trgh, trgw))
+    corr = corr.transpose(1, 2).contiguous().view(bsz * srch, ch, srcw, trgh, trgw)
+    for pidx, k3d in enumerate(kernel.permute(2, 0, 1, 3, 4, 5)):
+        inter_corr = F.conv3d(corr, k3d, bias=None, stride=1, padding=psz)
+        inter_corr = inter_corr.view(bsz, srch, out_channels, srcw, trgh, trgw).transpose(1, 2).contiguous()
+        add_sid = max(psz - pidx, 0)
+        add_fid = min(srch, srch + psz - pidx)
+        slc_sid = max(pidx - psz, 0)
+        slc_fid = min(srch, srch - psz + pidx)
+        out_corr[:, :, add_sid:add_fid, :, :, :] += inter_corr[:, :, slc_sid:slc_fid, :, :, :]
+    if bias is not None:
+        out_corr += bias.view(1, out_channels, 1, 1, 1, 1)
+    return out_corr
+def fast6d(corr, kernel, bias, diagonal_idx):
+    r""" Optimized implementation of 6D convolutional Hough matching
+         NOTE: this function only supports kernel size of (3, 3, 5, 5, 5, 5).
+    r"""
+    bsz, _, s6d, s6d, s4d, s4d, s4d, s4d = corr.size()
+    _, _, ks6d, ks6d, ks4d, ks4d, ks4d, ks4d = kernel.size()
+    corr = corr.permute(0, 2, 3, 1, 4, 5, 6, 7).contiguous().view(-1, 1, s4d, s4d, s4d, s4d)
+    kernel = kernel.view(-1, ks6d ** 2, ks4d, ks4d, ks4d, ks4d).transpose(0, 1)
+    corr = fast4d(corr, kernel).view(bsz, s6d * s6d, ks6d * ks6d, s4d, s4d, s4d, s4d)
+    corr = corr.view(bsz, s6d, s6d, ks6d, ks6d, s4d, s4d, s4d, s4d).transpose(2, 3).\
+        contiguous().view(-1, s6d * ks6d, s4d, s4d, s4d, s4d)
+    ndiag = s6d + (ks6d // 2) * 2
+    first_sum = []
+    for didx in diagonal_idx:
+        first_sum.append(corr[:, didx, :, :, :, :].sum(dim=1))
+    first_sum = torch.stack(first_sum).transpose(0, 1).view(bsz, s6d * ks6d, ndiag, s4d, s4d, s4d, s4d)
+    corr = []
+    for didx in diagonal_idx:
+        corr.append(first_sum[:, didx, :, :, :, :, :].sum(dim=1))
+    sidx = ks6d // 2
+    eidx = ndiag - sidx
+    corr = torch.stack(corr).transpose(0, 1)[:, sidx:eidx, sidx:eidx, :, :, :, :].unsqueeze(1).contiguous()
+    corr += bias.view(1, -1, 1, 1, 1, 1, 1, 1)
+    reverse_idx = torch.linspace(s6d * s6d - 1, 0, s6d * s6d).long()
+    corr = corr.view(bsz, 1, s6d * s6d, s4d, s4d, s4d, s4d)[:, :, reverse_idx, :, :, :, :].\
+        view(bsz, 1, s6d, s6d, s4d, s4d, s4d, s4d)
+    return corr
+def init_param_idx4d(param_dict):
+    param_idx = []
+    for key in param_dict:
+        curr_offset = int(key.split('_')[-1])
+        param_idx.append(torch.tensor(param_dict[key]))
+    return param_idx
+class CHM4d(_ConvNd):
+    r""" 4D convolutional Hough matching layer
+         NOTE: this function only supports in_channels=1 and out_channels=1.
+    r"""
+    def __init__(self, in_channels, out_channels, ksz4d, ktype, bias=True):
+        super(CHM4d, self).__init__(in_channels, out_channels, (ksz4d,) * 4,
+                                    (1,) * 4, (0,) * 4, (1,) * 4, False, (0,) * 4,
+                                    1, bias, padding_mode='zeros')
+        # Zero kernel initialization
+        self.zero_kernel4d = torch.zeros((in_channels, out_channels, ksz4d, ksz4d, ksz4d, ksz4d))
+        self.nkernels = in_channels * out_channels
+        # Initialize kernel indices
+        param_dict4d = chm_kernel.KernelGenerator(ksz4d, ktype).generate()
+        param_shared =  param_dict4d is not None
+        if param_shared:
+            # Initialize the shared parameters (multiplied by the number of times being shared)
+            self.param_idx = init_param_idx4d(param_dict4d)
+            weights = torch.abs(torch.randn(len(self.param_idx) * self.nkernels)) * 1e-3
+            for weight, param_idx in zip(weights.sort()[0], self.param_idx):
+                weight *= len(param_idx)
+            self.weight = nn.Parameter(weights)
+        else:  # full kernel initialziation
+            self.param_idx = None
+            self.weight = nn.Parameter(torch.abs(self.weight))
+            if bias: self.bias = nn.Parameter(torch.tensor(0.0))
+        Logger.info('(%s) # params in CHM 4D: %d' % (ktype, len(self.weight.view(-1))))
+    def forward(self, x):
+        kernel = self.init_kernel()
+        x = fast4d(x, kernel, self.bias)
+        return x
+    def init_kernel(self):
+        # Initialize CHM kernel (divided by the number of times being shared)
+        ksz = self.kernel_size[-1]
+        if self.param_idx is None:
+            kernel = self.weight
+        else:
+            kernel = torch.zeros_like(self.zero_kernel4d)
+            for idx, pdx in enumerate(self.param_idx):
+                kernel = kernel.view(-1, ksz, ksz, ksz, ksz)
+                for jdx, kernel_single in enumerate(kernel):
+                    weight = self.weight[idx + jdx * len(self.param_idx)].repeat(len(pdx)) / len(pdx)
+                    kernel_single.view(-1)[pdx] += weight
+            kernel = kernel.view(self.in_channels, self.out_channels, ksz, ksz, ksz, ksz)
+        return kernel
+class CHM6d(_ConvNd):
+    r""" 6D convolutional Hough matching layer with kernel (3, 3, 5, 5, 5, 5)
+         NOTE: this function only supports in_channels=1 and out_channels=1.
+    r"""
+    def __init__(self, in_channels, out_channels, ksz6d, ksz4d, ktype):
+        kernel_size = (ksz6d, ksz6d, ksz4d, ksz4d, ksz4d, ksz4d)
+        super(CHM6d, self).__init__(in_channels, out_channels, kernel_size, (1,) * 6,
+                                    (0,) * 6, (1,) * 6, False, (0,) * 6,
+                                    1, bias=True, padding_mode='zeros')
+        # Zero kernel initialization
+        self.zero_kernel4d = torch.zeros((ksz4d, ksz4d, ksz4d, ksz4d))
+        self.zero_kernel6d = torch.zeros((ksz6d, ksz6d, ksz4d, ksz4d, ksz4d, ksz4d))
+        self.nkernels = in_channels * out_channels
+        # Initialize kernel indices
+        # Indices in scale-space where 4D convolutions are performed (3 by 3 scale-space)
+        self.diagonal_idx = [torch.tensor(x) for x in [[6], [3, 7], [0, 4, 8], [1, 5], [2]]]
+        param_dict4d = chm_kernel.KernelGenerator(ksz4d, ktype).generate()
+        param_shared =  param_dict4d is not None
+        if param_shared:  # psi & iso kernel initialization
+            if ktype == 'psi':
+                self.param_dict6d = [[4], [0, 8], [2, 6], [1, 3, 5, 7]]
+            elif ktype == 'iso':
+                self.param_dict6d = [[0, 4, 8], [2, 6], [1, 3, 5, 7]]
+            self.param_dict6d = [torch.tensor(i) for i in self.param_dict6d]
+            # Initialize the shared parameters (multiplied by the number of times being shared)
+            self.param_idx = init_param_idx4d(param_dict4d)
+            self.param = []
+            for param_dict6d in self.param_dict6d:
+                weights = torch.abs(torch.randn(len(self.param_idx))) * 1e-3
+                for weight, param_idx in zip(weights, self.param_idx):
+                    weight *= (len(param_idx) * len(param_dict6d))
+                self.param.append(nn.Parameter(weights))
+            self.param = nn.ParameterList(self.param)
+        else:  # full kernel initialziation
+            self.param_idx = None
+            self.param = nn.Parameter(torch.abs(self.weight) * 1e-3)
+        Logger.info('(%s) # params in CHM 6D: %d' % (ktype, sum([len(x.view(-1)) for x in self.param])))
+        self.weight = None
+    def forward(self, corr):
+        kernel = self.init_kernel()
+        corr = fast6d(corr, kernel, self.bias, self.diagonal_idx)
+        return corr
+    def init_kernel(self):
+        # Initialize CHM kernel (divided by the number of times being shared)
+        if self.param_idx is None:
+            return self.param
+        kernel6d = torch.zeros_like(self.zero_kernel6d)
+        for idx, (param, param_dict6d) in enumerate(zip(self.param, self.param_dict6d)):
+            ksz4d = self.kernel_size[-1]
+            kernel4d = torch.zeros_like(self.zero_kernel4d)
+            for jdx, pdx in enumerate(self.param_idx):
+                kernel4d.view(-1)[pdx] += ((param[jdx] / len(pdx)) / len(param_dict6d))
+            kernel6d.view(-1, ksz4d, ksz4d, ksz4d, ksz4d)[param_dict6d] += kernel4d.view(ksz4d, ksz4d, ksz4d, ksz4d)
+        kernel6d = kernel6d.unsqueeze(0).unsqueeze(0)
+        return kernel6d

model/base/chm_kernel.py ADDED Viewed

	@@ -0,0 +1,66 @@

+r""" CHM 4D kernel (psi, iso, and full) generator """
+import torch
+from .geometry import Geometry
+class KernelGenerator:
+    def __init__(self, ksz, ktype):
+        self.ksz = ksz
+        self.idx4d = Geometry.init_idx4d(ksz)
+        self.kernel = torch.zeros((ksz, ksz, ksz, ksz))
+        self.center = (ksz // 2, ksz // 2)
+        self.ktype = ktype
+    def quadrant(self, crd):
+        if crd[0] < self.center[0]:
+            horz_quad = -1
+        elif crd[0] < self.center[0]:
+            horz_quad = 1
+        else:
+            horz_quad = 0
+        if crd[1] < self.center[1]:
+            vert_quad = -1
+        elif crd[1] < self.center[1]:
+            vert_quad = 1
+        else:
+            vert_quad = 0
+        return horz_quad, vert_quad
+    def generate(self):
+        return None if self.ktype == 'full' else self.generate_chm_kernel()
+    def generate_chm_kernel(self):
+        param_dict = {}
+        for idx in self.idx4d:
+            src_i, src_j, trg_i, trg_j = idx
+            d_tail = Geometry.get_distance((src_i, src_j), self.center)
+            d_head = Geometry.get_distance((trg_i, trg_j), self.center)
+            d_off = Geometry.get_distance((src_i, src_j), (trg_i, trg_j))
+            horz_quad, vert_quad = self.quadrant((src_j, src_i))
+            src_crd = (src_i, src_j)
+            trg_crd = (trg_i, trg_j)
+            key = self.build_key(horz_quad, vert_quad, d_head, d_tail, src_crd, trg_crd, d_off)
+            coord1d = Geometry.get_coord1d((src_i, src_j, trg_i, trg_j), self.ksz)
+            if param_dict.get(key) is None: param_dict[key] = []
+            param_dict[key].append(coord1d)
+        return param_dict
+    def build_key(self, horz_quad, vert_quad, d_head, d_tail, src_crd, trg_crd, d_off):
+        if self.ktype == 'iso':
+            return '%d' % d_off
+        elif self.ktype == 'psi':
+            d_max = max(d_head, d_tail)
+            d_min = min(d_head, d_tail)
+            return '%d_%d_%d' % (d_max, d_min, d_off)
+        else:
+            raise Exception('not implemented.')

model/base/correlation.py ADDED Viewed

	@@ -0,0 +1,68 @@

+r""" Provides functions that creates/manipulates correlation matrices """
+import math
+from torch.nn.functional import interpolate as resize
+import torch
+from .geometry import Geometry
+class Correlation:
+    @classmethod
+    def mutual_nn_filter(cls, correlation_matrix, eps=1e-30):
+        r""" Mutual nearest neighbor filtering (Rocco et al. NeurIPS'18 )"""
+        corr_src_max = torch.max(correlation_matrix, dim=2, keepdim=True)[0]
+        corr_trg_max = torch.max(correlation_matrix, dim=1, keepdim=True)[0]
+        corr_src_max[corr_src_max == 0] += eps
+        corr_trg_max[corr_trg_max == 0] += eps
+        corr_src = correlation_matrix / corr_src_max
+        corr_trg = correlation_matrix / corr_trg_max
+        return correlation_matrix * (corr_src * corr_trg)
+    @classmethod
+    def build_correlation6d(self, src_feat, trg_feat, scales, conv2ds):
+        r""" Build 6-dimensional correlation tensor """
+        bsz, _, side, side = src_feat.size()
+        # Construct feature pairs with multiple scales
+        _src_feats = []
+        _trg_feats = []
+        for scale, conv in zip(scales, conv2ds):
+            s = (round(side * math.sqrt(scale)),) * 2
+            _src_feat = conv(resize(src_feat, s, mode='bilinear', align_corners=True))
+            _trg_feat = conv(resize(trg_feat, s, mode='bilinear', align_corners=True))
+            _src_feats.append(_src_feat)
+            _trg_feats.append(_trg_feat)
+        # Build multiple 4-dimensional correlation tensor
+        corr6d = []
+        for src_feat in _src_feats:
+            ch = src_feat.size(1)
+            src_side = src_feat.size(-1)
+            src_feat = src_feat.view(bsz, ch, -1).transpose(1, 2)
+            src_norm = src_feat.norm(p=2, dim=2, keepdim=True)
+            for trg_feat in _trg_feats:
+                trg_side = trg_feat.size(-1)
+                trg_feat = trg_feat.view(bsz, ch, -1)
+                trg_norm = trg_feat.norm(p=2, dim=1, keepdim=True)
+                correlation = torch.bmm(src_feat, trg_feat) / torch.bmm(src_norm, trg_norm)
+                correlation = correlation.view(bsz, src_side, src_side, trg_side, trg_side).contiguous()
+                corr6d.append(correlation)
+        # Resize the spatial sizes of the 4D tensors to the same size
+        for idx, correlation in enumerate(corr6d):
+            corr6d[idx] = Geometry.interpolate4d(correlation, [side, side])
+        # Build 6-dimensional correlation tensor
+        corr6d = torch.stack(corr6d).view(len(scales), len(scales),
+                             bsz, side, side, side, side).permute(2, 0, 1, 3, 4, 5, 6)
+        return corr6d.clamp(min=0)

model/base/geometry.py ADDED Viewed

	@@ -0,0 +1,133 @@

+r""" Provides functions that manipulate boxes and points """
+import math
+import torch.nn.functional as F
+import torch
+class Geometry(object):
+    @classmethod
+    def initialize(cls, img_size):
+        cls.img_size = img_size
+        cls.spatial_side = int(img_size / 8)
+        norm_grid1d = torch.linspace(-1, 1, cls.spatial_side)
+        cls.norm_grid_x = norm_grid1d.view(1, -1).repeat(cls.spatial_side, 1).view(1, 1, -1)
+        cls.norm_grid_y = norm_grid1d.view(-1, 1).repeat(1, cls.spatial_side).view(1, 1, -1)
+        cls.grid = torch.stack(list(reversed(torch.meshgrid(norm_grid1d, norm_grid1d)))).permute(1, 2, 0)
+        cls.feat_idx = torch.arange(0, cls.spatial_side).float()
+    @classmethod
+    def normalize_kps(cls, kps):
+        kps = kps.clone().detach()
+        kps[kps != -2] -= (cls.img_size // 2)
+        kps[kps != -2] /= (cls.img_size // 2)
+        return kps
+    @classmethod
+    def unnormalize_kps(cls, kps):
+        kps = kps.clone().detach()
+        kps[kps != -2] *= (cls.img_size // 2)
+        kps[kps != -2] += (cls.img_size // 2)
+        return kps
+    @classmethod
+    def attentive_indexing(cls, kps, thres=0.1):
+        r"""kps: normalized keypoints x, y (N, 2)
+            returns attentive index map(N, spatial_side, spatial_side)
+        """
+        nkps = kps.size(0)
+        kps = kps.view(nkps, 1, 1, 2)
+        eps = 1e-5
+        attmap = (cls.grid.unsqueeze(0).repeat(nkps, 1, 1, 1) - kps).pow(2).sum(dim=3)
+        attmap = (attmap + eps).pow(0.5)
+        attmap = (thres - attmap).clamp(min=0).view(nkps, -1)
+        attmap = attmap / attmap.sum(dim=1, keepdim=True)
+        attmap = attmap.view(nkps, cls.spatial_side, cls.spatial_side)
+        return attmap
+    @classmethod
+    def apply_gaussian_kernel(cls, corr, sigma=17):
+        bsz, side, side = corr.size()
+        center = corr.max(dim=2)[1]
+        center_y = center // cls.spatial_side
+        center_x = center % cls.spatial_side
+        y = cls.feat_idx.view(1, 1, cls.spatial_side).repeat(bsz, center_y.size(1), 1) - center_y.unsqueeze(2)
+        x = cls.feat_idx.view(1, 1, cls.spatial_side).repeat(bsz, center_x.size(1), 1) - center_x.unsqueeze(2)
+        y = y.unsqueeze(3).repeat(1, 1, 1, cls.spatial_side)
+        x = x.unsqueeze(2).repeat(1, 1, cls.spatial_side, 1)
+        gauss_kernel = torch.exp(-(x.pow(2) + y.pow(2)) / (2 * sigma ** 2))
+        filtered_corr = gauss_kernel * corr.view(bsz, -1, cls.spatial_side, cls.spatial_side)
+        filtered_corr = filtered_corr.view(bsz, side, side)
+        return filtered_corr
+    @classmethod
+    def transfer_kps(cls, confidence_ts, src_kps, n_pts, normalized):
+        r""" Transfer keypoints by weighted average """
+        if not normalized:
+            src_kps = Geometry.normalize_kps(src_kps)
+        confidence_ts = cls.apply_gaussian_kernel(confidence_ts)
+        pdf = F.softmax(confidence_ts, dim=2)
+        prd_x = (pdf * cls.norm_grid_x).sum(dim=2)
+        prd_y = (pdf * cls.norm_grid_y).sum(dim=2)
+        prd_kps = []
+        for idx, (x, y, src_kp, np) in enumerate(zip(prd_x, prd_y, src_kps, n_pts)):
+            max_pts = src_kp.size()[1]
+            prd_xy = torch.stack([x, y]).t()
+            src_kp = src_kp[:, :np].t()
+            attmap = cls.attentive_indexing(src_kp).view(np, -1)
+            prd_kp = (prd_xy.unsqueeze(0) * attmap.unsqueeze(-1)).sum(dim=1).t()
+            pads = (torch.zeros((2, max_pts - np)) - 2)
+            prd_kp = torch.cat([prd_kp, pads], dim=1)
+            prd_kps.append(prd_kp)
+        return torch.stack(prd_kps)
+    @staticmethod
+    def get_coord1d(coord4d, ksz):
+        i, j, k, l = coord4d
+        coord1d = i * (ksz ** 3) + j * (ksz ** 2) + k * (ksz) + l
+        return coord1d
+    @staticmethod
+    def get_distance(coord1, coord2):
+        delta_y = int(math.pow(coord1[0] - coord2[0], 2))
+        delta_x = int(math.pow(coord1[1] - coord2[1], 2))
+        dist = delta_y + delta_x
+        return dist
+    @staticmethod
+    def interpolate4d(tensor4d, size):
+        bsz, h1, w1, h2, w2 = tensor4d.size()
+        tensor4d = tensor4d.view(bsz, h1, w1, -1).permute(0, 3, 1, 2)
+        tensor4d = F.interpolate(tensor4d, size, mode='bilinear', align_corners=True)
+        tensor4d = tensor4d.view(bsz, h2, w2, -1).permute(0, 3, 1, 2)
+        tensor4d = F.interpolate(tensor4d, size, mode='bilinear', align_corners=True)
+        tensor4d = tensor4d.view(bsz, size[0], size[0], size[0], size[0])
+        return tensor4d
+    @staticmethod
+    def init_idx4d(ksz):
+        i0 = torch.arange(0, ksz).repeat(ksz ** 3)
+        i1 = torch.arange(0, ksz).unsqueeze(1).repeat(1, ksz).view(-1).repeat(ksz ** 2)
+        i2 = torch.arange(0, ksz).unsqueeze(1).repeat(1, ksz ** 2).view(-1).repeat(ksz)
+        i3 = torch.arange(0, ksz).unsqueeze(1).repeat(1, ksz ** 3).view(-1)
+        idx4d = torch.stack([i3, i2, i1, i0]).t().numpy()
+        return idx4d

model/chmlearner.py ADDED Viewed

	@@ -0,0 +1,52 @@

+r""" Conovlutional Hough matching layers """
+import torch.nn as nn
+import torch
+from .base.correlation import Correlation
+from .base.geometry import Geometry
+from .base.chm import CHM4d, CHM6d
+class CHMLearner(nn.Module):
+    def __init__(self, ktype, feat_dim):
+        super(CHMLearner, self).__init__()
+        # Scale-wise feature transformation
+        self.scales = [0.5, 1, 2]
+        self.conv2ds = nn.ModuleList([nn.Conv2d(feat_dim, feat_dim // 4, kernel_size=3, padding=1, bias=False) for _ in self.scales])
+        # CHM layers
+        ksz_translation = 5
+        ksz_scale = 3
+        self.chm6d = CHM6d(1, 1, ksz_scale, ksz_translation, ktype)
+        self.chm4d = CHM4d(1, 1, ksz_translation, ktype, bias=True)
+        # Activations
+        self.relu = nn.ReLU(inplace=True)
+        self.sigmoid = nn.Sigmoid()
+        self.softplus = nn.Softplus()
+    def forward(self, src_feat, trg_feat):
+        corr = Correlation.build_correlation6d(src_feat, trg_feat, self.scales, self.conv2ds).unsqueeze(1)
+        bsz, ch, s, s, h, w, h, w = corr.size()
+        # CHM layer (6D)
+        corr = self.chm6d(corr)
+        corr = self.sigmoid(corr)
+        # Scale-space maxpool
+        corr = corr.view(bsz, -1, h, w, h, w).max(dim=1)[0]
+        corr = Geometry.interpolate4d(corr, [h * 2, w * 2]).unsqueeze(1)
+        # CHM layer (4D)
+        corr = self.chm4d(corr).squeeze(1)
+        # To ensure non-negative vote scores & soft cyclic constraints
+        corr = self.softplus(corr)
+        corr = Correlation.mutual_nn_filter(corr.view(bsz, corr.size(-1) ** 2, corr.size(-1) ** 2).contiguous())
+        return corr

model/chmnet.py ADDED Viewed

	@@ -0,0 +1,42 @@

+r""" Convolutional Hough Matching Networks """
+import torch.nn as nn
+import torch
+from . import chmlearner as chmlearner
+from .base import backbone
+class CHMNet(nn.Module):
+    def __init__(self, ktype):
+        super(CHMNet, self).__init__()
+        self.backbone = backbone.resnet101(pretrained=True)
+        self.learner = chmlearner.CHMLearner(ktype, feat_dim=1024)
+    def forward(self, src_img, trg_img):
+        src_feat, trg_feat = self.extract_features(src_img, trg_img)
+        correlation  = self.learner(src_feat, trg_feat)
+        return correlation
+    def extract_features(self, src_img, trg_img):
+        feat = self.backbone.conv1.forward(torch.cat([src_img, trg_img], dim=1))
+        feat = self.backbone.bn1.forward(feat)
+        feat = self.backbone.relu.forward(feat)
+        feat = self.backbone.maxpool.forward(feat)
+        for idx in range(1, 5):
+            feat = self.backbone.__getattr__('layer%d' % idx)(feat)
+            if idx == 3:
+                src_feat = feat.narrow(1, 0, feat.size(1) // 2).clone()
+                trg_feat = feat.narrow(1, feat.size(1) // 2, feat.size(1) // 2).clone()
+                return src_feat, trg_feat
+    def training_objective(cls, prd_kps, trg_kps, npts):
+        l2dist = (prd_kps - trg_kps).pow(2).sum(dim=1)
+        loss = []
+        for dist, npt in zip(l2dist, npts):
+            loss.append(dist[:npt].mean())
+        return torch.stack(loss).mean()

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+gradio==2.4.5
+matplotlib==3.4.3
+numpy==1.21.2
+pandas==1.3.4
+Pillow==8.4.0
+requests==2.26.0
+scipy==1.7.1
+tensorboardX==2.4.1
+torch==1.10.0
+torchvision==0.11.1