Spaces:

radames
/

UserControllableLT-Latent-Transformer

Runtime error

UserControllableLT-Latent-Transformer / expansion /utils /sintel_io.py

endo-yuki-t

initial commit

d7dbcdd about 2 years ago

6.69 kB

	#! /usr/bin/env python2

	"""
	I/O script to save and load the data coming with the MPI-Sintel low-level
	computer vision benchmark.

	For more details about the benchmark, please visit www.mpi-sintel.de

	CHANGELOG:
	v1.0 (2015/02/03): First release

	Copyright (c) 2015 Jonas Wulff
	Max Planck Institute for Intelligent Systems, Tuebingen, Germany

	"""

	# Requirements: Numpy as PIL/Pillow
	import numpy as np
	from PIL import Image

	# Check for endianness, based on Daniel Scharstein's optical flow code.
	# Using little-endian architecture, these two should be equal.
	TAG_FLOAT = 202021.25
	TAG_CHAR = 'PIEH'

	def flow_read(filename):
	""" Read optical flow from file, return (U,V) tuple.

	Original code by Deqing Sun, adapted from Daniel Scharstein.
	"""
	f = open(filename,'rb')
	check = np.fromfile(f,dtype=np.float32,count=1)[0]
	assert check == TAG_FLOAT, ' flow_read:: Wrong tag in flow file (should be: {0}, is: {1}). Big-endian machine? '.format(TAG_FLOAT,check)
	width = np.fromfile(f,dtype=np.int32,count=1)[0]
	height = np.fromfile(f,dtype=np.int32,count=1)[0]
	size = width*height
	assert width > 0 and height > 0 and size > 1 and size < 100000000, ' flow_read:: Wrong input size (width = {0}, height = {1}).'.format(width,height)
	tmp = np.fromfile(f,dtype=np.float32,count=-1).reshape((height,width*2))
	u = tmp[:,np.arange(width)*2]
	v = tmp[:,np.arange(width)*2 + 1]
	return u,v

	def flow_write(filename,uv,v=None):
	""" Write optical flow to file.

	If v is None, uv is assumed to contain both u and v channels,
	stacked in depth.

	Original code by Deqing Sun, adapted from Daniel Scharstein.
	"""
	nBands = 2

	if v is None:
	assert(uv.ndim == 3)
	assert(uv.shape[2] == 2)
	u = uv[:,:,0]
	v = uv[:,:,1]
	else:
	u = uv

	assert(u.shape == v.shape)
	height,width = u.shape
	f = open(filename,'wb')
	# write the header
	f.write(TAG_CHAR)
	np.array(width).astype(np.int32).tofile(f)
	np.array(height).astype(np.int32).tofile(f)
	# arrange into matrix form
	tmp = np.zeros((height, width*nBands))
	tmp[:,np.arange(width)*2] = u
	tmp[:,np.arange(width)*2 + 1] = v
	tmp.astype(np.float32).tofile(f)
	f.close()


	def depth_read(filename):
	""" Read depth data from file, return as numpy array. """
	f = open(filename,'rb')
	check = np.fromfile(f,dtype=np.float32,count=1)[0]
	assert check == TAG_FLOAT, ' depth_read:: Wrong tag in flow file (should be: {0}, is: {1}). Big-endian machine? '.format(TAG_FLOAT,check)
	width = np.fromfile(f,dtype=np.int32,count=1)[0]
	height = np.fromfile(f,dtype=np.int32,count=1)[0]
	size = width*height
	assert width > 0 and height > 0 and size > 1 and size < 100000000, ' depth_read:: Wrong input size (width = {0}, height = {1}).'.format(width,height)
	depth = np.fromfile(f,dtype=np.float32,count=-1).reshape((height,width))
	return depth

	def depth_write(filename, depth):
	""" Write depth to file. """
	height,width = depth.shape[:2]
	f = open(filename,'wb')
	# write the header
	f.write(TAG_CHAR)
	np.array(width).astype(np.int32).tofile(f)
	np.array(height).astype(np.int32).tofile(f)

	depth.astype(np.float32).tofile(f)
	f.close()


	def disparity_write(filename,disparity,bitdepth=16):
	""" Write disparity to file.

	bitdepth can be either 16 (default) or 32.

	The maximum disparity is 1024, since the image width in Sintel
	is 1024.
	"""
	d = disparity.copy()

	# Clip disparity.
	d[d>1024] = 1024
	d[d<0] = 0

	d_r = (d / 4.0).astype('uint8')
	d_g = ((d * (2.0**6)) % 256).astype('uint8')

	out = np.zeros((d.shape[0],d.shape[1],3),dtype='uint8')
	out[:,:,0] = d_r
	out[:,:,1] = d_g

	if bitdepth > 16:
	d_b = (d * (2**14) % 256).astype('uint8')
	out[:,:,2] = d_b

	Image.fromarray(out,'RGB').save(filename,'PNG')


	def disparity_read(filename):
	""" Return disparity read from filename. """
	f_in = np.array(Image.open(filename))
	d_r = f_in[:,:,0].astype('float64')
	d_g = f_in[:,:,1].astype('float64')
	d_b = f_in[:,:,2].astype('float64')

	depth = d_r * 4 + d_g / (26) + d_b / (214)
	return depth


	#def cam_read(filename):
	# """ Read camera data, return (M,N) tuple.
	#
	# M is the intrinsic matrix, N is the extrinsic matrix, so that
	#
	# x = MNX,
	# with x being a point in homogeneous image pixel coordinates, X being a
	# point in homogeneous world coordinates.
	# """
	# txtdata = np.loadtxt(filename)
	# intrinsic = txtdata[0,:9].reshape((3,3))
	# extrinsic = textdata[1,:12].reshape((3,4))
	# return intrinsic,extrinsic
	#
	#
	#def cam_write(filename,M,N):
	# """ Write intrinsic matrix M and extrinsic matrix N to file. """
	# Z = np.zeros((2,12))
	# Z[0,:9] = M.ravel()
	# Z[1,:12] = N.ravel()
	# np.savetxt(filename,Z)

	def cam_read(filename):
	""" Read camera data, return (M,N) tuple.

	M is the intrinsic matrix, N is the extrinsic matrix, so that

	x = MNX,
	with x being a point in homogeneous image pixel coordinates, X being a
	point in homogeneous world coordinates.
	"""
	f = open(filename,'rb')
	check = np.fromfile(f,dtype=np.float32,count=1)[0]
	assert check == TAG_FLOAT, ' cam_read:: Wrong tag in flow file (should be: {0}, is: {1}). Big-endian machine? '.format(TAG_FLOAT,check)
	M = np.fromfile(f,dtype='float64',count=9).reshape((3,3))
	N = np.fromfile(f,dtype='float64',count=12).reshape((3,4))
	return M,N

	def cam_write(filename, M, N):
	""" Write intrinsic matrix M and extrinsic matrix N to file. """
	f = open(filename,'wb')
	# write the header
	f.write(TAG_CHAR)
	M.astype('float64').tofile(f)
	N.astype('float64').tofile(f)
	f.close()


	def segmentation_write(filename,segmentation):
	""" Write segmentation to file. """

	segmentation_ = segmentation.astype('int32')
	seg_r = np.floor(segmentation_ / (256**2)).astype('uint8')
	seg_g = np.floor((segmentation_ % (256**2)) / 256).astype('uint8')
	seg_b = np.floor(segmentation_ % 256).astype('uint8')

	out = np.zeros((segmentation.shape[0],segmentation.shape[1],3),dtype='uint8')
	out[:,:,0] = seg_r
	out[:,:,1] = seg_g
	out[:,:,2] = seg_b

	Image.fromarray(out,'RGB').save(filename,'PNG')


	def segmentation_read(filename):
	""" Return disparity read from filename. """
	f_in = np.array(Image.open(filename))
	seg_r = f_in[:,:,0].astype('int32')
	seg_g = f_in[:,:,1].astype('int32')
	seg_b = f_in[:,:,2].astype('int32')

	segmentation = (seg_r * 256 + seg_g) * 256 + seg_b
	return segmentation