from pathlib import Path |
import time |
from collections import OrderedDict |
from threading import Thread |
import numpy as np |
import cv2 |
import torch |
import matplotlib.pyplot as plt |
import matplotlib |
matplotlib.use('Agg') |
class AverageTimer: |
""" Class to help manage printing simple timing of code execution. """ |
def __init__(self, smoothing=0.3, newline=False): |
self.smoothing = smoothing |
self.newline = newline |
self.times = OrderedDict() |
self.will_print = OrderedDict() |
self.reset() |
def reset(self): |
now = time.time() |
self.start = now |
self.last_time = now |
for name in self.will_print: |
self.will_print[name] = False |
def update(self, name='default'): |
now = time.time() |
dt = now - self.last_time |
if name in self.times: |
dt = self.smoothing * dt + (1 - self.smoothing) * self.times[name] |
self.times[name] = dt |
self.will_print[name] = True |
self.last_time = now |
def print(self, text='Timer'): |
total = 0. |
print('[{}]'.format(text), end=' ') |
for key in self.times: |
val = self.times[key] |
if self.will_print[key]: |
print('%s=%.3f' % (key, val), end=' ') |
total += val |
print('total=%.3f sec {%.1f FPS}' % (total, 1./total), end=' ') |
if self.newline: |
print(flush=True) |
else: |
print(end='\r', flush=True) |
self.reset() |
class VideoStreamer: |
""" Class to help process image streams. Four types of possible inputs:" |
1.) USB Webcam. |
2.) An IP camera |
3.) A directory of images (files in directory matching 'image_glob'). |
4.) A video file, such as an .mp4 or .avi file. |
""" |
def __init__(self, basedir, resize, skip, image_glob, max_length=1000000): |
self._ip_grabbed = False |
self._ip_running = False |
self._ip_camera = False |
self._ip_image = None |
self._ip_index = 0 |
self.cap = [] |
self.camera = True |
self.video_file = False |
self.listing = [] |
self.resize = resize |
self.interp = cv2.INTER_AREA |
self.i = 0 |
self.skip = skip |
self.max_length = max_length |
if isinstance(basedir, int) or basedir.isdigit(): |
print('==> Processing USB webcam input: {}'.format(basedir)) |
self.cap = cv2.VideoCapture(int(basedir)) |
self.listing = range(0, self.max_length) |
elif basedir.startswith(('http', 'rtsp')): |
print('==> Processing IP camera input: {}'.format(basedir)) |
self.cap = cv2.VideoCapture(basedir) |
self.start_ip_camera_thread() |
self._ip_camera = True |
self.listing = range(0, self.max_length) |
elif Path(basedir).is_dir(): |
print('==> Processing image directory input: {}'.format(basedir)) |
self.listing = list(Path(basedir).glob(image_glob[0])) |
for j in range(1, len(image_glob)): |
image_path = list(Path(basedir).glob(image_glob[j])) |
self.listing = self.listing + image_path |
self.listing.sort() |
self.listing = self.listing[::self.skip] |
self.max_length = np.min([self.max_length, len(self.listing)]) |
if self.max_length == 0: |
raise IOError('No images found (maybe bad \'image_glob\' ?)') |
self.listing = self.listing[:self.max_length] |
self.camera = False |
elif Path(basedir).exists(): |
print('==> Processing video input: {}'.format(basedir)) |
self.cap = cv2.VideoCapture(basedir) |
self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) |
num_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
self.listing = range(0, num_frames) |
self.listing = self.listing[::self.skip] |
self.video_file = True |
self.max_length = np.min([self.max_length, len(self.listing)]) |
self.listing = self.listing[:self.max_length] |
else: |
raise ValueError('VideoStreamer input \"{}\" not recognized.'.format(basedir)) |
if self.camera and not self.cap.isOpened(): |
raise IOError('Could not read camera') |
def load_image(self, impath): |
""" Read image as grayscale and resize to img_size. |
Inputs |
impath: Path to input image. |
Returns |
grayim: uint8 numpy array sized H x W. |
""" |
grayim = cv2.imread(impath, 0) |
if grayim is None: |
raise Exception('Error reading image %s' % impath) |
w, h = grayim.shape[1], grayim.shape[0] |
w_new, h_new = process_resize(w, h, self.resize) |
grayim = cv2.resize( |
grayim, (w_new, h_new), interpolation=self.interp) |
return grayim |
def next_frame(self): |
""" Return the next frame, and increment internal counter. |
Returns |
image: Next H x W image. |
status: True or False depending whether image was loaded. |
""" |
if self.i == self.max_length: |
return (None, False) |
if self.camera: |
if self._ip_camera: |
while self._ip_grabbed is False and self._ip_exited is False: |
time.sleep(.001) |
ret, image = self._ip_grabbed, self._ip_image.copy() |
if ret is False: |
self._ip_running = False |
else: |
ret, image = self.cap.read() |
if ret is False: |
print('VideoStreamer: Cannot get image from camera') |
return (None, False) |
w, h = image.shape[1], image.shape[0] |
if self.video_file: |
self.cap.set(cv2.CAP_PROP_POS_FRAMES, self.listing[self.i]) |
w_new, h_new = process_resize(w, h, self.resize) |
image = cv2.resize(image, (w_new, h_new), |
interpolation=self.interp) |
image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) |
else: |
image_file = str(self.listing[self.i]) |
image = self.load_image(image_file) |
self.i = self.i + 1 |
return (image, True) |
def start_ip_camera_thread(self): |
self._ip_thread = Thread(target=self.update_ip_camera, args=()) |
self._ip_running = True |
self._ip_thread.start() |
self._ip_exited = False |
return self |
def update_ip_camera(self): |
while self._ip_running: |
ret, img = self.cap.read() |
if ret is False: |
self._ip_running = False |
self._ip_exited = True |
self._ip_grabbed = False |
return |
self._ip_image = img |
self._ip_grabbed = ret |
self._ip_index += 1 |
def cleanup(self): |
self._ip_running = False |
def process_resize(w, h, resize): |
assert(len(resize) > 0 and len(resize) <= 2) |
if len(resize) == 1 and resize[0] > -1: |
scale = resize[0] / max(h, w) |
w_new, h_new = int(round(w*scale)), int(round(h*scale)) |
elif len(resize) == 1 and resize[0] == -1: |
w_new, h_new = w, h |
else: |
w_new, h_new = resize[0], resize[1] |
if max(w_new, h_new) < 160: |
print('Warning: input resolution is very small, results may vary') |
elif max(w_new, h_new) > 2000: |
print('Warning: input resolution is very large, results may vary') |
return w_new, h_new |
def frame2tensor(frame, device): |
return torch.from_numpy(frame/255.).float()[None, None].to(device) |
def read_image(path, device, resize, rotation, resize_float): |
image = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE) |
if image is None: |
return None, None, None |
w, h = image.shape[1], image.shape[0] |
w_new, h_new = process_resize(w, h, resize) |
scales = (float(w) / float(w_new), float(h) / float(h_new)) |
if resize_float: |
image = cv2.resize(image.astype('float32'), (w_new, h_new)) |
else: |
image = cv2.resize(image, (w_new, h_new)).astype('float32') |
if rotation != 0: |
image = np.rot90(image, k=rotation) |
if rotation % 2: |
scales = scales[::-1] |
inp = frame2tensor(image, device) |
return image, inp, scales |
def estimate_pose(kpts0, kpts1, K0, K1, thresh, conf=0.99999): |
if len(kpts0) < 5: |
return None |
f_mean = np.mean([K0[0, 0], K1[1, 1], K0[0, 0], K1[1, 1]]) |
norm_thresh = thresh / f_mean |
kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None] |
kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None] |
E, mask = cv2.findEssentialMat( |
kpts0, kpts1, np.eye(3), threshold=norm_thresh, prob=conf, |
method=cv2.RANSAC) |
assert E is not None |
best_num_inliers = 0 |
ret = None |
for _E in np.split(E, len(E) / 3): |
n, R, t, _ = cv2.recoverPose( |
_E, kpts0, kpts1, np.eye(3), 1e9, mask=mask) |
if n > best_num_inliers: |
best_num_inliers = n |
ret = (R, t[:, 0], mask.ravel() > 0) |
return ret |
def rotate_intrinsics(K, image_shape, rot): |
"""image_shape is the shape of the image after rotation""" |
assert rot <= 3 |
h, w = image_shape[:2][::-1 if (rot % 2) else 1] |
fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] |
rot = rot % 4 |
if rot == 1: |
return np.array([[fy, 0., cy], |
[0., fx, w-1-cx], |
[0., 0., 1.]], dtype=K.dtype) |
elif rot == 2: |
return np.array([[fx, 0., w-1-cx], |
[0., fy, h-1-cy], |
[0., 0., 1.]], dtype=K.dtype) |
else: |
return np.array([[fy, 0., h-1-cy], |
[0., fx, cx], |
[0., 0., 1.]], dtype=K.dtype) |
def rotate_pose_inplane(i_T_w, rot): |
rotation_matrices = [ |
np.array([[np.cos(r), -np.sin(r), 0., 0.], |
[np.sin(r), np.cos(r), 0., 0.], |
[0., 0., 1., 0.], |
[0., 0., 0., 1.]], dtype=np.float32) |
for r in [np.deg2rad(d) for d in (0, 270, 180, 90)] |
] |
return np.dot(rotation_matrices[rot], i_T_w) |
def scale_intrinsics(K, scales): |
scales = np.diag([1./scales[0], 1./scales[1], 1.]) |
return np.dot(scales, K) |
def to_homogeneous(points): |
return np.concatenate([points, np.ones_like(points[:, :1])], axis=-1) |
def compute_epipolar_error(kpts0, kpts1, T_0to1, K0, K1): |
kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None] |
kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None] |
kpts0 = to_homogeneous(kpts0) |
kpts1 = to_homogeneous(kpts1) |
t0, t1, t2 = T_0to1[:3, 3] |
t_skew = np.array([ |
[0, -t2, t1], |
[t2, 0, -t0], |
[-t1, t0, 0] |
]) |
E = t_skew @ T_0to1[:3, :3] |
Ep0 = kpts0 @ E.T |
p1Ep0 = np.sum(kpts1 * Ep0, -1) |
Etp1 = kpts1 @ E |
d = p1Ep0**2 * (1.0 / (Ep0[:, 0]**2 + Ep0[:, 1]**2) |
+ 1.0 / (Etp1[:, 0]**2 + Etp1[:, 1]**2)) |
return d |
def angle_error_mat(R1, R2): |
cos = (np.trace(np.dot(R1.T, R2)) - 1) / 2 |
cos = np.clip(cos, -1., 1.) |
return np.rad2deg(np.abs(np.arccos(cos))) |
def angle_error_vec(v1, v2): |
n = np.linalg.norm(v1) * np.linalg.norm(v2) |
return np.rad2deg(np.arccos(np.clip(np.dot(v1, v2) / n, -1.0, 1.0))) |
def compute_pose_error(T_0to1, R, t): |
R_gt = T_0to1[:3, :3] |
t_gt = T_0to1[:3, 3] |
error_t = angle_error_vec(t, t_gt) |
error_t = np.minimum(error_t, 180 - error_t) |
error_R = angle_error_mat(R, R_gt) |
return error_t, error_R |
def pose_auc(errors, thresholds): |
sort_idx = np.argsort(errors) |
errors = np.array(errors.copy())[sort_idx] |
recall = (np.arange(len(errors)) + 1) / len(errors) |
errors = np.r_[0., errors] |
recall = np.r_[0., recall] |
aucs = [] |
for t in thresholds: |
last_index = np.searchsorted(errors, t) |
r = np.r_[recall[:last_index], recall[last_index-1]] |
e = np.r_[errors[:last_index], t] |
aucs.append(np.trapz(r, x=e)/t) |
return aucs |
def plot_image_pair(imgs, dpi=100, size=6, pad=.5): |
n = len(imgs) |
assert n == 2, 'number of images must be two' |
figsize = (size*n, size*3/4) if size is not None else None |
_, ax = plt.subplots(1, n, figsize=figsize, dpi=dpi) |
for i in range(n): |
ax[i].imshow(imgs[i], cmap=plt.get_cmap('gray'), vmin=0, vmax=255) |
ax[i].get_yaxis().set_ticks([]) |
ax[i].get_xaxis().set_ticks([]) |
for spine in ax[i].spines.values(): |
spine.set_visible(False) |
plt.tight_layout(pad=pad) |
def plot_keypoints(kpts0, kpts1, color='w', ps=2): |
ax = plt.gcf().axes |
ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps) |
ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps) |
def plot_matches(kpts0, kpts1, color, lw=1.5, ps=4): |
fig = plt.gcf() |
ax = fig.axes |
fig.canvas.draw() |
transFigure = fig.transFigure.inverted() |
fkpts0 = transFigure.transform(ax[0].transData.transform(kpts0)) |
fkpts1 = transFigure.transform(ax[1].transData.transform(kpts1)) |
fig.lines = [matplotlib.lines.Line2D( |
(fkpts0[i, 0], fkpts1[i, 0]), (fkpts0[i, 1], fkpts1[i, 1]), zorder=1, |
transform=fig.transFigure, c=color[i], linewidth=lw) |
for i in range(len(kpts0))] |
ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps) |
ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps) |
def make_matching_plot(image0, image1, kpts0, kpts1, mkpts0, mkpts1, |
color, text, path, show_keypoints=False, |
fast_viz=False, opencv_display=False, |
opencv_title='matches', small_text=[]): |
if fast_viz: |
make_matching_plot_fast(image0, image1, kpts0, kpts1, mkpts0, mkpts1, |
color, text, path, show_keypoints, 10, |
opencv_display, opencv_title, small_text) |
return |
plot_image_pair([image0, image1]) |
if show_keypoints: |
plot_keypoints(kpts0, kpts1, color='k', ps=4) |
plot_keypoints(kpts0, kpts1, color='w', ps=2) |
plot_matches(mkpts0, mkpts1, color) |
fig = plt.gcf() |
txt_color = 'k' if image0[:100, :150].mean() > 200 else 'w' |
fig.text( |
0.01, 0.99, '\n'.join(text), transform=fig.axes[0].transAxes, |
fontsize=15, va='top', ha='left', color=txt_color) |
txt_color = 'k' if image0[-100:, :150].mean() > 200 else 'w' |
fig.text( |
0.01, 0.01, '\n'.join(small_text), transform=fig.axes[0].transAxes, |
fontsize=5, va='bottom', ha='left', color=txt_color) |
plt.savefig(str(path), bbox_inches='tight', pad_inches=0) |
plt.close() |
def make_matching_plot_fast(image0, image1, kpts0, kpts1, mkpts0, |
mkpts1, color, text, path=None, |
show_keypoints=False, margin=10, |
opencv_display=False, opencv_title='', |
small_text=[]): |
H0, W0 = image0.shape |
H1, W1 = image1.shape |
H, W = max(H0, H1), W0 + W1 + margin |
out = 255*np.ones((H, W), np.uint8) |
out[:H0, :W0] = image0 |
out[:H1, W0+margin:] = image1 |
out = np.stack([out]*3, -1) |
if show_keypoints: |
kpts0, kpts1 = np.round(kpts0).astype(int), np.round(kpts1).astype(int) |
white = (255, 255, 255) |
black = (0, 0, 0) |
for x, y in kpts0: |
cv2.circle(out, (x, y), 2, black, -1, lineType=cv2.LINE_AA) |
cv2.circle(out, (x, y), 1, white, -1, lineType=cv2.LINE_AA) |
for x, y in kpts1: |
cv2.circle(out, (x + margin + W0, y), 2, black, -1, |
lineType=cv2.LINE_AA) |
cv2.circle(out, (x + margin + W0, y), 1, white, -1, |
lineType=cv2.LINE_AA) |
mkpts0, mkpts1 = np.round(mkpts0).astype(int), np.round(mkpts1).astype(int) |
color = (np.array(color[:, :3])*255).astype(int)[:, ::-1] |
for (x0, y0), (x1, y1), c in zip(mkpts0, mkpts1, color): |
c = c.tolist() |
cv2.line(out, (x0, y0), (x1 + margin + W0, y1), |
color=c, thickness=1, lineType=cv2.LINE_AA) |
cv2.circle(out, (x0, y0), 2, c, -1, lineType=cv2.LINE_AA) |
cv2.circle(out, (x1 + margin + W0, y1), 2, c, -1, |
lineType=cv2.LINE_AA) |
sc = min(H / 640., 2.0) |
Ht = int(30 * sc) |
txt_color_fg = (255, 255, 255) |
txt_color_bg = (0, 0, 0) |
for i, t in enumerate(text): |
cv2.putText(out, t, (int(8*sc), Ht*(i+1)), cv2.FONT_HERSHEY_DUPLEX, |
1.0*sc, txt_color_bg, 2, cv2.LINE_AA) |
cv2.putText(out, t, (int(8*sc), Ht*(i+1)), cv2.FONT_HERSHEY_DUPLEX, |
1.0*sc, txt_color_fg, 1, cv2.LINE_AA) |
Ht = int(18 * sc) |
for i, t in enumerate(reversed(small_text)): |
cv2.putText(out, t, (int(8*sc), int(H-Ht*(i+.6))), cv2.FONT_HERSHEY_DUPLEX, |
0.5*sc, txt_color_bg, 2, cv2.LINE_AA) |
cv2.putText(out, t, (int(8*sc), int(H-Ht*(i+.6))), cv2.FONT_HERSHEY_DUPLEX, |
0.5*sc, txt_color_fg, 1, cv2.LINE_AA) |
if path is not None: |
cv2.imwrite(str(path), out) |
if opencv_display: |
cv2.imshow(opencv_title, out) |
cv2.waitKey(1) |
return out |
def error_colormap(x): |
return np.clip( |
np.stack([2-x*2, x*2, np.zeros_like(x), np.ones_like(x)], -1), 0, 1) |