ECON / lib /pymafx /utils /smooth_bbox.py
Yuliang's picture
Support TEXTure
487ee6d
raw
history blame contribute delete
No virus
3.94 kB
# This script is borrowed from https://github.com/akanazawa/human_dynamics/blob/master/src/util/smooth_bbox.py
# Adhere to their licence to use this script
import numpy as np
import scipy.signal as signal
from scipy.ndimage.filters import gaussian_filter1d
def get_smooth_bbox_params(kps, vis_thresh=2, kernel_size=11, sigma=3):
"""
Computes smooth bounding box parameters from keypoints:
1. Computes bbox by rescaling the person to be around 150 px.
2. Linearly interpolates bbox params for missing annotations.
3. Median filtering
4. Gaussian filtering.
Recommended thresholds:
* detect-and-track: 0
* 3DPW: 0.1
Args:
kps (list): List of kps (Nx3) or None.
vis_thresh (float): Threshold for visibility.
kernel_size (int): Kernel size for median filtering (must be odd).
sigma (float): Sigma for gaussian smoothing.
Returns:
Smooth bbox params [cx, cy, scale], start index, end index
"""
bbox_params, start, end = get_all_bbox_params(kps, vis_thresh)
smoothed = smooth_bbox_params(bbox_params, kernel_size, sigma)
smoothed = np.vstack((np.zeros((start, 3)), smoothed))
return smoothed, start, end
def kp_to_bbox_param(kp, vis_thresh):
"""
Finds the bounding box parameters from the 2D keypoints.
Args:
kp (Kx3): 2D Keypoints.
vis_thresh (float): Threshold for visibility.
Returns:
[center_x, center_y, scale]
"""
if kp is None:
return
vis = kp[:, 2] > vis_thresh
if not np.any(vis):
return
min_pt = np.min(kp[vis, :2], axis=0)
max_pt = np.max(kp[vis, :2], axis=0)
person_height = np.linalg.norm(max_pt - min_pt)
if person_height < 0.5:
return
center = (min_pt + max_pt) / 2.
scale = 150. / person_height
return np.append(center, scale)
def get_all_bbox_params(kps, vis_thresh=2):
"""
Finds bounding box parameters for all keypoints.
Look for sequences in the middle with no predictions and linearly
interpolate the bbox params for those
Args:
kps (list): List of kps (Kx3) or None.
vis_thresh (float): Threshold for visibility.
Returns:
bbox_params, start_index (incl), end_index (excl)
"""
# keeps track of how many indices in a row with no prediction
num_to_interpolate = 0
start_index = -1
bbox_params = np.empty(shape=(0, 3), dtype=np.float32)
for i, kp in enumerate(kps):
bbox_param = kp_to_bbox_param(kp, vis_thresh=vis_thresh)
if bbox_param is None:
num_to_interpolate += 1
continue
if start_index == -1:
# Found the first index with a prediction!
start_index = i
num_to_interpolate = 0
if num_to_interpolate > 0:
# Linearly interpolate each param.
previous = bbox_params[-1]
# This will be 3x(n+2)
interpolated = np.array([
np.linspace(prev, curr, num_to_interpolate + 2)
for prev, curr in zip(previous, bbox_param)
])
bbox_params = np.vstack((bbox_params, interpolated.T[1:-1]))
num_to_interpolate = 0
bbox_params = np.vstack((bbox_params, bbox_param))
return bbox_params, start_index, i - num_to_interpolate + 1
def smooth_bbox_params(bbox_params, kernel_size=11, sigma=8):
"""
Applies median filtering and then gaussian filtering to bounding box
parameters.
Args:
bbox_params (Nx3): [cx, cy, scale].
kernel_size (int): Kernel size for median filtering (must be odd).
sigma (float): Sigma for gaussian smoothing.
Returns:
Smoothed bounding box parameters (Nx3).
"""
smoothed = np.array([signal.medfilt(param, kernel_size) for param in bbox_params.T]).T
return np.array([gaussian_filter1d(traj, sigma) for traj in smoothed.T]).T