Spaces:

venite
/

sat3density

Runtime error

File size: 19,619 Bytes

f670afc

# Copyright (C) 2021 NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, check out LICENSE.md
import numpy as np
import cv2
import torch
from scipy.optimize import curve_fit
from scipy.signal import medfilt
import warnings
from imaginaire.utils.io import get_checkpoint


def connect_face_keypoints(resize_h, resize_w, crop_h, crop_w, original_h,
                           original_w, is_flipped, cfgdata, keypoints):
    r"""Connect the face keypoints to edges and draw the sketch.

    Args:
        resize_h (int): Height the input image was resized to.
        resize_w (int): Width the input image was resized to.
        crop_h (int): Height the input image was cropped.
        crop_w (int): Width the input image was cropped.
        original_h (int): Original height of the input image.
        original_w (int): Original width of the input image.
        is_flipped (bool): Is the input image flipped.
        cfgdata (obj): Data configuration.
        keypoints (NxKx2 numpy array): Facial landmarks (with K keypoints).

    Returns:
        (list of HxWxC numpy array): Drawn label map.
    """
    if hasattr(cfgdata, 'for_face_dataset'):
        face_cfg = cfgdata.for_face_dataset
        # Whether to add the upper part of face to label map.
        add_upper_face = getattr(face_cfg, 'add_upper_face', False)
        # Whether to add distance transform output to label map.
        add_dist_map = getattr(face_cfg, 'add_distance_transform', False)
        # Whether to add positional encoding to label map.
        add_pos_encode = add_dist_map and getattr(
            face_cfg, 'add_positional_encode', False)
    else:
        add_upper_face = add_dist_map = add_pos_encode = False

    # Mapping from keypoint index to facial part.
    part_list = [[list(range(0, 17)) + (
        (list(range(68, 83)) + [0]) if add_upper_face else [])],  # ai_emoji
                      [range(17, 22)],  # right eyebrow
                      [range(22, 27)],  # left eyebrow
                      [[28, 31], range(31, 36), [35, 28]],  # nose
                      [[36, 37, 38, 39], [39, 40, 41, 36]],  # right eye
                      [[42, 43, 44, 45], [45, 46, 47, 42]],  # left eye
                      [range(48, 55), [54, 55, 56, 57, 58, 59, 48],
                       range(60, 65), [64, 65, 66, 67, 60]],  # mouth and tongue
    ]
    if add_upper_face:
        pts = keypoints[:, :17, :].astype(np.int32)
        baseline_y = (pts[:, 0:1, 1] + pts[:, -1:, 1]) / 2
        upper_pts = pts[:, 1:-1, :].copy()
        upper_pts[:, :, 1] = baseline_y + (
                baseline_y - upper_pts[:, :, 1]) * 2 // 3
        keypoints = np.hstack((keypoints, upper_pts[:, ::-1, :]))

    edge_len = 3  # Interpolate 3 keypoints to form a curve when drawing edges.
    bw = max(1, resize_h // 256)  # Width of the stroke.

    outputs = []
    for t_idx in range(keypoints.shape[0]):
        # Edge map for the face region from keypoints.
        im_edges = np.zeros((resize_h, resize_w, 1), np.uint8)
        im_dists = np.zeros((resize_h, resize_w, 0), np.uint8)
        for edge_list in part_list:
            for e, edge in enumerate(edge_list):
                # Edge map for the current edge.
                im_edge = np.zeros((resize_h, resize_w, 1), np.uint8)
                # Divide a long edge into multiple small edges when drawing.
                for i in range(0, max(1, len(edge) - 1), edge_len - 1):
                    sub_edge = edge[i:i + edge_len]
                    x = keypoints[t_idx, sub_edge, 0]
                    y = keypoints[t_idx, sub_edge, 1]

                    # Interp keypoints to get the curve shape.
                    curve_x, curve_y = interp_points(x, y)
                    draw_edge(im_edges, curve_x, curve_y, bw=bw)
                    if add_dist_map:
                        draw_edge(im_edge, curve_x, curve_y, bw=bw)

                if add_dist_map:
                    # Add distance transform map on each facial part.
                    im_dist = cv2.distanceTransform(255 - im_edge,
                                                    cv2.DIST_L1, 3)
                    im_dist = np.clip((im_dist / 3), 0, 255)
                    im_dists = np.dstack((im_dists, im_dist))

                if add_pos_encode and e == 0:
                    # Add positional encoding for the first edge.
                    from math import pi
                    im_pos = np.zeros((resize_h, resize_w, 0), np.float32)
                    for l in range(10):  # noqa: E741
                        dist = (im_dist.astype(np.float32) - 127.5) / 127.5
                        sin = np.sin(pi * (2 ** l) * dist)
                        cos = np.cos(pi * (2 ** l) * dist)
                        im_pos = np.dstack((im_pos, sin, cos))

        # Combine all components to form the final label map.
        if add_dist_map:
            im_edges = np.dstack((im_edges, im_dists))
        im_edges = im_edges.astype(np.float32) / 255.0
        if add_pos_encode:
            im_edges = np.dstack((im_edges, im_pos))
        outputs.append(im_edges)
    return outputs


def normalize_and_connect_face_keypoints(cfg, is_inference, data):
    r"""Normalize face keypoints w.r.t. reference face keypoints and connect
    keypoints to form 2D images.

    Args:
        cfg (obj): Data configuration.
        is_inference (bool): Is doing inference or not.
        data (dict): Input data.

    Returns:
        (dict): Output data.
    """
    assert is_inference
    resize_h, resize_w = data['images'][0].shape[-2:]

    keypoints = data['label'].numpy()[0]
    ref_keypoints = data['few_shot_label'].numpy()[0]

    # Get the normalization params and prev data if it's been computed before.
    dist_scales = prev_keypoints = None
    if 'common_attr' in data and 'prev_data' in data['common_attr']:
        dist_scales = data['common_attr']['dist_scales']
        prev_keypoints = data['common_attr']['prev_data']

    def concat(prev, now, t):
        r"""Concat prev and now frames in first dimension, up to t frames."""
        if prev is None:
            return now
        return np.vstack([prev, now])[-t:]

    # Normalize face keypoints w.r.t. reference face keypoints.
    keypoints, dist_scales = \
        normalize_face_keypoints(keypoints[0], ref_keypoints[0], dist_scales,
                                 momentum=getattr(cfg.for_face_dataset,
                                                  'normalize_momentum', 0.9))
    keypoints = keypoints[np.newaxis, :]

    # Temporally smooth the face keypoints by median filtering.
    ks = getattr(cfg.for_face_dataset, 'smooth_kernel_size', 5)
    concat_keypoints = concat(prev_keypoints, keypoints, ks)
    if ks > 1 and concat_keypoints.shape[0] == ks:
        keypoints = smooth_face_keypoints(concat_keypoints, ks)

    # Store the computed params.
    if 'common_attr' not in data:
        data['common_attr'] = dict()
    data['common_attr']['dist_scales'] = dist_scales
    data['common_attr']['prev_data'] = concat_keypoints

    # Draw the keypoints to turn them into images.
    labels = []
    for kpt in [keypoints, ref_keypoints]:
        label = connect_face_keypoints(resize_h, resize_w, None, None, None,
                                       None, False, cfg, kpt)
        labels += [torch.from_numpy(label[0]).permute(2, 0, 1).unsqueeze(0)]
    data['label'], data['few_shot_label'] = labels
    return data


def smooth_face_keypoints(concat_keypoints, ks):
    r""" Temporally smooth the face keypoints by median filtering.

    Args:
        concat_keypoints (TxKx2 numpy array): Face keypoints to be filtered.
        ks (int): Filter kernel size.

    Returns:
        (1xKx2 numpy array): Output face keypoints.
    """
    # Median filtering.
    filtered_keypoints = medfilt(concat_keypoints, kernel_size=[ks, 1, 1])
    # Fill in any zero keypoints with the value from previous frame.
    if (filtered_keypoints == 0).any():
        for t in range(1, filtered_keypoints.shape[0]):
            kpt_prev = filtered_keypoints[t - 1]
            kpt_cur = filtered_keypoints[t]
            kpt_max = np.maximum(kpt_cur, kpt_prev)
            kpt_cur[kpt_cur == 0] = kpt_max[kpt_cur == 0]
            filtered_keypoints[t] = kpt_cur
    keypoints = filtered_keypoints[ks // 2: ks // 2 + 1]
    return keypoints


def normalize_face_keypoints(keypoints, ref_keypoints, dist_scales=None,
                             momentum=0.9):
    r"""Normalize face keypoints w.r.t. the reference face keypoints.

    Args:
        keypoints (Kx2 numpy array): Target facial keypoints to be normalized.
        ref_keypoints (Kx2 numpy array): Reference facial keypoints.
        dist_scales (list of list of floats): Normalization params.
        momentum (float): Temporal momentum for the normalization params.

    Returns:
        (Kx2 numpy array): Normalized facial keypoints.
    """
    if keypoints.shape[0] == 68:
        central_keypoints = [8]
        part_list = [[0, 16], [1, 15], [2, 14], [3, 13], [4, 12],
                     [5, 11], [6, 10], [7, 9, 8],
                     [17, 26], [18, 25], [19, 24], [20, 23], [21, 22],
                     [27], [28], [29], [30], [31, 35], [32, 34], [33],
                     [36, 45], [37, 44], [38, 43], [39, 42], [40, 47], [41, 46],
                     [48, 54], [49, 53], [50, 52], [51], [55, 59], [56, 58],
                     [57],
                     [60, 64], [61, 63], [62], [65, 67], [66]
                     ]
    else:
        raise ValueError('Input keypoints type not supported.')

    face_cen = np.mean(keypoints[central_keypoints, :], axis=0)
    ref_face_cen = np.mean(ref_keypoints[central_keypoints, :], axis=0)

    def get_mean_dists(pts, face_cen):
        r"""Get mean distances of the points from face center."""
        mean_dists_x, mean_dists_y = [], []
        pts_cen = np.mean(pts, axis=0)
        for p, pt in enumerate(pts):
            mean_dists_x.append(np.linalg.norm(pt - pts_cen))
            mean_dists_y.append(np.linalg.norm(pts_cen - face_cen))
        mean_dist_x = sum(mean_dists_x) / len(mean_dists_x) + 1e-3
        mean_dist_y = sum(mean_dists_y) / len(mean_dists_y) + 1e-3
        return mean_dist_x, mean_dist_y

    dist_scale_x, dist_scale_y = [None] * len(part_list), \
                                 [None] * len(part_list)
    if dist_scales is None:
        dist_scale_x_prev = dist_scale_y_prev = img_scale = None
    else:
        dist_scale_x_prev, dist_scale_y_prev, img_scale = dist_scales
    if img_scale is None:
        img_scale = (keypoints[:, 0].max() - keypoints[:, 0].min()) \
                    / (ref_keypoints[:, 0].max() - ref_keypoints[:, 0].min())

    for i, pts_idx in enumerate(part_list):
        pts = keypoints[pts_idx]
        pts = pts[pts[:, 0] != 0]
        if pts.shape[0]:
            ref_pts = ref_keypoints[pts_idx]
            mean_dist_x, mean_dist_y = get_mean_dists(pts, face_cen)
            ref_dist_x, ref_dist_y = get_mean_dists(ref_pts, ref_face_cen)
            dist_scale_x[i] = ref_dist_x / mean_dist_x * img_scale
            dist_scale_y[i] = ref_dist_y / mean_dist_y * img_scale
            if dist_scale_x_prev is not None:
                dist_scale_x[i] = dist_scale_x_prev[i] * momentum + \
                    dist_scale_x[i] * (1 - momentum)
                dist_scale_y[i] = dist_scale_y_prev[i] * momentum + \
                    dist_scale_y[i] * (1 - momentum)

            pts_cen = np.mean(pts, axis=0)
            pts = (pts - pts_cen) * dist_scale_x[i] + \
                  (pts_cen - face_cen) * dist_scale_y[i] + face_cen
            keypoints[pts_idx] = pts

    return keypoints, [dist_scale_x, dist_scale_y, img_scale]


def npy_to_tensor(keypoints):
    r"""Convert numpy array to pytorch tensor."""
    return torch.from_numpy(keypoints).unsqueeze(0)


def get_dlib_landmarks_from_image(
        imgs, predictor_path='shape_predictor_68_face_landmarks.dat'):
    r"""Get face keypoints from an image.

    Args:
        imgs (N x 3 x H x W tensor or N x H x W x 3 numpy array): Input images.
        predictor_path (str): Path to the predictor model.
    """
    import dlib
    predictor_path = get_checkpoint(predictor_path,
                                    url='1l9zT-AI1yKlfyAb_wl_RjLBSaiWQr8dr')
    if type(imgs) == torch.Tensor:
        imgs = ((imgs + 1) / 2 * 255).byte()
        imgs = np.transpose(imgs.cpu().numpy(), (0, 2, 3, 1))
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor(predictor_path)
    points = np.zeros([imgs.shape[0], 68, 2], dtype=int)
    for i in range(imgs.shape[0]):
        img = imgs[i]
        dets = detector(img, 1)
        if len(dets) > 0:
            # Only returns the first face.
            shape = predictor(img, dets[0])
            for b in range(68):
                points[i, b, 0] = shape.part(b).x
                points[i, b, 1] = shape.part(b).y
    return points


def get_126_landmarks_from_image(imgs, landmarks_network):
    r"""Get face keypoints from an image.

    Args:
        imgs (Nx3xHxW tensor or NxHxWx3 numpy array):
        Input images.
        landmarks_network (obj): The landmark detection network.

    Return:
        (Nx126x2 numpy array): Predicted landmarks.
    """
    if type(imgs) == torch.Tensor:
        imgs = ((imgs + 1) / 2 * 255).byte()
        imgs = np.transpose(imgs.cpu().numpy(), (0, 2, 3, 1))

    landmarks = []
    for i in range(imgs.shape[0]):
        img = imgs[i]
        out_boxes, landmark = \
            landmarks_network.get_face_boxes_and_landmarks(img)
        if len(landmark) > 1:
            # Pick the largest face in the image.
            face_size_max = face_index = 0
            for i, out_box in enumerate(out_boxes):
                face_size = max(out_box[2] - out_box[0],
                                out_box[1] - out_box[1])
                if face_size > face_size_max:
                    face_size_max = face_size
                    face_index = i
            landmark = landmark[face_index]
        elif len(landmark) == 1:
            landmark = landmark[0]
        else:
            landmark = np.zeros((126, 2), dtype=np.float32)
        landmarks += [landmark[np.newaxis]]
    landmarks = np.vstack(landmarks).astype(np.float32)
    return landmarks


def convert_face_landmarks_to_image(cfgdata, landmarks, output_size,
                                    output_tensor=True, cpu_only=False):
    r"""Convert the facial landmarks to a label map.

    Args:
        cfgdata (obj): Data configuration.
        landmarks
        output_size (tuple of int): H, W of output label map.
        output_tensor (bool): Output tensors instead of numpy arrays.
        cpu_only (bool): Output CPU tensor only.

    Returns:
        (NxCxHxW tensor or list of HxWxC numpy arrays): Label maps.
    """
    h, w = output_size
    labels = connect_face_keypoints(h, w, None, None, None, None, False,
                                    cfgdata, landmarks)
    if not output_tensor:
        return labels
    labels = [torch.from_numpy(label).permute(2, 0, 1).unsqueeze(0)
              for label in labels]
    labels = torch.cat(labels)
    if cpu_only:
        return labels
    return labels.cuda()


def add_face_keypoints(label_map, image, keypoints):
    r"""Add additional keypoints to label map.

    Args:
        label_map (Nx1xHxW tensor or None)
        image (Nx3xHxW tensor)
        keypoints (NxKx2 tensor)
    """
    if label_map is None:
        label_map = torch.zeros_like(image)[:, :1]
    x, y = keypoints[:, :, 0], keypoints[:, :, 1]
    h, w = image.shape[-2:]
    x = ((x + 1) / 2 * w).long()
    y = ((y + 1) / 2 * h).long()
    bs = torch.arange(label_map.shape[0]).cuda().view(-1, 1).expand_as(x)
    label_map[bs, :, y, x] = 1
    return label_map


def draw_edge(im, x, y, bw=1, color=(255, 255, 255), draw_end_points=False):
    r"""Set colors given a list of x and y coordinates for the edge.

    Args:
        im (HxWxC numpy array): Canvas to draw.
        x (1D numpy array): x coordinates of the edge.
        y (1D numpy array): y coordinates of the edge.
        bw (int): Width of the stroke.
        color (list or tuple of int): Color to draw.
        draw_end_points (bool): Whether to draw end points of the edge.
    """
    if x is not None and x.size:
        h, w = im.shape[0], im.shape[1]
        # Draw edge.
        for i in range(-bw, bw):
            for j in range(-bw, bw):
                yy = np.maximum(0, np.minimum(h - 1, y + i))
                xx = np.maximum(0, np.minimum(w - 1, x + j))
                set_color(im, yy, xx, color)

        # Draw endpoints.
        if draw_end_points:
            for i in range(-bw * 2, bw * 2):
                for j in range(-bw * 2, bw * 2):
                    if (i ** 2) + (j ** 2) < (4 * bw ** 2):
                        yy = np.maximum(0, np.minimum(h - 1, np.array(
                            [y[0], y[-1]]) + i))
                        xx = np.maximum(0, np.minimum(w - 1, np.array(
                            [x[0], x[-1]]) + j))
                        set_color(im, yy, xx, color)


def set_color(im, yy, xx, color):
    r"""Set pixels of the image to the given color.

    Args:
        im (HxWxC numpy array): Canvas to draw.
        xx (1D numpy array): x coordinates of the pixels.
        yy (1D numpy array): y coordinates of the pixels.
        color (list or tuple of int): Color to draw.
    """
    if type(color) != list and type(color) != tuple:
        color = [color] * 3
    if len(im.shape) == 3 and im.shape[2] == 3:
        if (im[yy, xx] == 0).all():
            im[yy, xx, 0], im[yy, xx, 1], im[yy, xx, 2] = \
                color[0], color[1], color[2]
        else:
            for c in range(3):
                im[yy, xx, c] = ((im[yy, xx, c].astype(float)
                                  + color[c]) / 2).astype(np.uint8)
    else:
        im[yy, xx] = color[0]


def interp_points(x, y):
    r"""Given the start and end points, interpolate to get a curve/line.

    Args:
        x (1D array): x coordinates of the points to interpolate.
        y (1D array): y coordinates of the points to interpolate.

    Returns:
        (dict):
          - curve_x (1D array): x coordinates of the interpolated points.
          - curve_y (1D array): y coordinates of the interpolated points.
    """
    if abs(x[:-1] - x[1:]).max() < abs(y[:-1] - y[1:]).max():
        curve_y, curve_x = interp_points(y, x)
        if curve_y is None:
            return None, None
    else:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            try:
                if len(x) < 3:
                    popt, _ = curve_fit(linear, x, y)
                else:
                    popt, _ = curve_fit(func, x, y)
                    if abs(popt[0]) > 1:
                        return None, None
            except Exception:
                return None, None
        if x[0] > x[-1]:
            x = list(reversed(x))
            y = list(reversed(y))
        curve_x = np.linspace(x[0], x[-1], int(np.round(x[-1]-x[0])))
        if len(x) < 3:
            curve_y = linear(curve_x, *popt)
        else:
            curve_y = func(curve_x, *popt)
    return curve_x.astype(int), curve_y.astype(int)


def func(x, a, b, c):
    r"""Quadratic fitting function."""
    return a * x**2 + b * x + c


def linear(x, a, b):
    r"""Linear fitting function."""
    return a * x + b