endo-yuki-t
initial commit
d7dbcdd
raw
history blame
6.69 kB
#! /usr/bin/env python2
"""
I/O script to save and load the data coming with the MPI-Sintel low-level
computer vision benchmark.
For more details about the benchmark, please visit www.mpi-sintel.de
CHANGELOG:
v1.0 (2015/02/03): First release
Copyright (c) 2015 Jonas Wulff
Max Planck Institute for Intelligent Systems, Tuebingen, Germany
"""
# Requirements: Numpy as PIL/Pillow
import numpy as np
from PIL import Image
# Check for endianness, based on Daniel Scharstein's optical flow code.
# Using little-endian architecture, these two should be equal.
TAG_FLOAT = 202021.25
TAG_CHAR = 'PIEH'
def flow_read(filename):
""" Read optical flow from file, return (U,V) tuple.
Original code by Deqing Sun, adapted from Daniel Scharstein.
"""
f = open(filename,'rb')
check = np.fromfile(f,dtype=np.float32,count=1)[0]
assert check == TAG_FLOAT, ' flow_read:: Wrong tag in flow file (should be: {0}, is: {1}). Big-endian machine? '.format(TAG_FLOAT,check)
width = np.fromfile(f,dtype=np.int32,count=1)[0]
height = np.fromfile(f,dtype=np.int32,count=1)[0]
size = width*height
assert width > 0 and height > 0 and size > 1 and size < 100000000, ' flow_read:: Wrong input size (width = {0}, height = {1}).'.format(width,height)
tmp = np.fromfile(f,dtype=np.float32,count=-1).reshape((height,width*2))
u = tmp[:,np.arange(width)*2]
v = tmp[:,np.arange(width)*2 + 1]
return u,v
def flow_write(filename,uv,v=None):
""" Write optical flow to file.
If v is None, uv is assumed to contain both u and v channels,
stacked in depth.
Original code by Deqing Sun, adapted from Daniel Scharstein.
"""
nBands = 2
if v is None:
assert(uv.ndim == 3)
assert(uv.shape[2] == 2)
u = uv[:,:,0]
v = uv[:,:,1]
else:
u = uv
assert(u.shape == v.shape)
height,width = u.shape
f = open(filename,'wb')
# write the header
f.write(TAG_CHAR)
np.array(width).astype(np.int32).tofile(f)
np.array(height).astype(np.int32).tofile(f)
# arrange into matrix form
tmp = np.zeros((height, width*nBands))
tmp[:,np.arange(width)*2] = u
tmp[:,np.arange(width)*2 + 1] = v
tmp.astype(np.float32).tofile(f)
f.close()
def depth_read(filename):
""" Read depth data from file, return as numpy array. """
f = open(filename,'rb')
check = np.fromfile(f,dtype=np.float32,count=1)[0]
assert check == TAG_FLOAT, ' depth_read:: Wrong tag in flow file (should be: {0}, is: {1}). Big-endian machine? '.format(TAG_FLOAT,check)
width = np.fromfile(f,dtype=np.int32,count=1)[0]
height = np.fromfile(f,dtype=np.int32,count=1)[0]
size = width*height
assert width > 0 and height > 0 and size > 1 and size < 100000000, ' depth_read:: Wrong input size (width = {0}, height = {1}).'.format(width,height)
depth = np.fromfile(f,dtype=np.float32,count=-1).reshape((height,width))
return depth
def depth_write(filename, depth):
""" Write depth to file. """
height,width = depth.shape[:2]
f = open(filename,'wb')
# write the header
f.write(TAG_CHAR)
np.array(width).astype(np.int32).tofile(f)
np.array(height).astype(np.int32).tofile(f)
depth.astype(np.float32).tofile(f)
f.close()
def disparity_write(filename,disparity,bitdepth=16):
""" Write disparity to file.
bitdepth can be either 16 (default) or 32.
The maximum disparity is 1024, since the image width in Sintel
is 1024.
"""
d = disparity.copy()
# Clip disparity.
d[d>1024] = 1024
d[d<0] = 0
d_r = (d / 4.0).astype('uint8')
d_g = ((d * (2.0**6)) % 256).astype('uint8')
out = np.zeros((d.shape[0],d.shape[1],3),dtype='uint8')
out[:,:,0] = d_r
out[:,:,1] = d_g
if bitdepth > 16:
d_b = (d * (2**14) % 256).astype('uint8')
out[:,:,2] = d_b
Image.fromarray(out,'RGB').save(filename,'PNG')
def disparity_read(filename):
""" Return disparity read from filename. """
f_in = np.array(Image.open(filename))
d_r = f_in[:,:,0].astype('float64')
d_g = f_in[:,:,1].astype('float64')
d_b = f_in[:,:,2].astype('float64')
depth = d_r * 4 + d_g / (2**6) + d_b / (2**14)
return depth
#def cam_read(filename):
# """ Read camera data, return (M,N) tuple.
#
# M is the intrinsic matrix, N is the extrinsic matrix, so that
#
# x = M*N*X,
# with x being a point in homogeneous image pixel coordinates, X being a
# point in homogeneous world coordinates.
# """
# txtdata = np.loadtxt(filename)
# intrinsic = txtdata[0,:9].reshape((3,3))
# extrinsic = textdata[1,:12].reshape((3,4))
# return intrinsic,extrinsic
#
#
#def cam_write(filename,M,N):
# """ Write intrinsic matrix M and extrinsic matrix N to file. """
# Z = np.zeros((2,12))
# Z[0,:9] = M.ravel()
# Z[1,:12] = N.ravel()
# np.savetxt(filename,Z)
def cam_read(filename):
""" Read camera data, return (M,N) tuple.
M is the intrinsic matrix, N is the extrinsic matrix, so that
x = M*N*X,
with x being a point in homogeneous image pixel coordinates, X being a
point in homogeneous world coordinates.
"""
f = open(filename,'rb')
check = np.fromfile(f,dtype=np.float32,count=1)[0]
assert check == TAG_FLOAT, ' cam_read:: Wrong tag in flow file (should be: {0}, is: {1}). Big-endian machine? '.format(TAG_FLOAT,check)
M = np.fromfile(f,dtype='float64',count=9).reshape((3,3))
N = np.fromfile(f,dtype='float64',count=12).reshape((3,4))
return M,N
def cam_write(filename, M, N):
""" Write intrinsic matrix M and extrinsic matrix N to file. """
f = open(filename,'wb')
# write the header
f.write(TAG_CHAR)
M.astype('float64').tofile(f)
N.astype('float64').tofile(f)
f.close()
def segmentation_write(filename,segmentation):
""" Write segmentation to file. """
segmentation_ = segmentation.astype('int32')
seg_r = np.floor(segmentation_ / (256**2)).astype('uint8')
seg_g = np.floor((segmentation_ % (256**2)) / 256).astype('uint8')
seg_b = np.floor(segmentation_ % 256).astype('uint8')
out = np.zeros((segmentation.shape[0],segmentation.shape[1],3),dtype='uint8')
out[:,:,0] = seg_r
out[:,:,1] = seg_g
out[:,:,2] = seg_b
Image.fromarray(out,'RGB').save(filename,'PNG')
def segmentation_read(filename):
""" Return disparity read from filename. """
f_in = np.array(Image.open(filename))
seg_r = f_in[:,:,0].astype('int32')
seg_g = f_in[:,:,1].astype('int32')
seg_b = f_in[:,:,2].astype('int32')
segmentation = (seg_r * 256 + seg_g) * 256 + seg_b
return segmentation