File size: 4,901 Bytes

9c79341

"""This script contains the image preprocessing code for Deep3DFaceRecon_pytorch
"""

import numpy as np
from scipy.io import loadmat
from PIL import Image
import cv2
import os
from skimage import transform as trans
import torch
import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 
warnings.filterwarnings("ignore", category=FutureWarning) 


# calculating least square problem for image alignment
def POS(xp, x):
    npts = xp.shape[1]

    A = np.zeros([2*npts, 8])

    A[0:2*npts-1:2, 0:3] = x.transpose()
    A[0:2*npts-1:2, 3] = 1

    A[1:2*npts:2, 4:7] = x.transpose()
    A[1:2*npts:2, 7] = 1

    b = np.reshape(xp.transpose(), [2*npts, 1])

    k, _, _, _ = np.linalg.lstsq(A, b)

    R1 = k[0:3]
    R2 = k[4:7]
    sTx = k[3]
    sTy = k[7]
    s = (np.linalg.norm(R1) + np.linalg.norm(R2))/2
    t = np.stack([sTx, sTy], axis=0)

    return t, s
    
# resize and crop images for face reconstruction
def resize_n_crop_img(img, lm, t, s, target_size=224., mask=None):
    w0, h0 = img.size
    w = (w0*s).astype(np.int32)
    h = (h0*s).astype(np.int32)
    left = (w/2 - target_size/2 + float((t[0] - w0/2)*s)).astype(np.int32)
    right = left + target_size
    up = (h/2 - target_size/2 + float((h0/2 - t[1])*s)).astype(np.int32)
    below = up + target_size

    img = img.resize((w, h), resample=Image.BICUBIC)
    img = img.crop((left, up, right, below))

    if mask is not None:
        mask = mask.resize((w, h), resample=Image.BICUBIC)
        mask = mask.crop((left, up, right, below))

    lm = np.stack([lm[:, 0] - t[0] + w0/2, lm[:, 1] -
                  t[1] + h0/2], axis=1)*s
    lm = lm - np.reshape(
            np.array([(w/2 - target_size/2), (h/2-target_size/2)]), [1, 2])

    return img, lm, mask

# utils for face reconstruction
def extract_5p(lm):
    lm_idx = np.array([31, 37, 40, 43, 46, 49, 55]) - 1
    lm5p = np.stack([lm[lm_idx[0], :], np.mean(lm[lm_idx[[1, 2]], :], 0), np.mean(
        lm[lm_idx[[3, 4]], :], 0), lm[lm_idx[5], :], lm[lm_idx[6], :]], axis=0)
    lm5p = lm5p[[1, 2, 0, 3, 4], :]
    return lm5p

# utils for face reconstruction
def align_img(img, lm, lm3D, mask=None, target_size=224., rescale_factor=102.):
    """
    Return:
        transparams        --numpy.array  (raw_W, raw_H, scale, tx, ty)
        img_new            --PIL.Image  (target_size, target_size, 3)
        lm_new             --numpy.array  (68, 2), y direction is opposite to v direction
        mask_new           --PIL.Image  (target_size, target_size)
    
    Parameters:
        img                --PIL.Image  (raw_H, raw_W, 3)
        lm                 --numpy.array  (68, 2), y direction is opposite to v direction
        lm3D               --numpy.array  (5, 3)
        mask               --PIL.Image  (raw_H, raw_W, 3)
    """
    try:
        # Debug input shapes
        print(f"\n[DEBUG] Input shapes - lm: {lm.shape}, lm3D: {lm3D.shape}") if hasattr(lm, 'shape') else None
        
        w0, h0 = img.size
        print(f"[DEBUG] Original image size: {w0}x{h0}")

        # Extract 5 facial landmarks
        if lm.shape[0] != 5:
            lm5p = extract_5p(lm)
        else:
            lm5p = lm
        
        print(f"[DEBUG] Landmark points shape: {lm5p.shape}")

        # Calculate translation and scale factors
        t, s = POS(lm5p.transpose(), lm3D.transpose())
        s = rescale_factor / s
        
        # Ensure t is a flat numpy array with exactly 2 elements
        t = np.array(t).flatten()
        if len(t) != 2:
            raise ValueError(f"Translation vector t should have 2 elements, got {len(t)}: {t}")
        
        print(f"[DEBUG] Calculated values - t: {t}, s: {s}")

        # Process the image
        img_new, lm_new, mask_new = resize_n_crop_img(
            img, lm, t, s, target_size=target_size, mask=mask
        )

        # Create transformation parameters with type checking
        trans_params = np.array([
            float(w0),     # Convert to float explicitly
            float(h0),     # Convert to float explicitly
            float(s),      # Convert to float explicitly
            float(t[0]),   # First translation component
            float(t[1])    # Second translation component
        ], dtype=np.float32)

        print(f"[DEBUG] Transformation params: {trans_params}")

        return trans_params, img_new, lm_new, mask_new

    except Exception as e:
        print(f"\n[ERROR] in align_img(): {str(e)}")
        print("[DEBUG] Problem occurred with:")
        print(f"- img size: {img.size if img else 'None'}")
        print(f"- lm shape: {lm.shape if hasattr(lm, 'shape') else 'Not an array'}")
        print(f"- lm3D shape: {lm3D.shape if hasattr(lm3D, 'shape') else 'Not an array'}")
        print(f"- t: {t if 't' in locals() else 'Not calculated'}")
        print(f"- s: {s if 's' in locals() else 'Not calculated'}")
        raise