Spaces:

radames
/

UserControllableLT-Latent-Transformer

Runtime error

App Files Files Community

UserControllableLT-Latent-Transformer / expansion /dataloader /depthloader.py

endo-yuki-t

initial commit

d7dbcdd about 2 years ago

raw

history blame

No virus

8.53 kB

	import os
	import numbers
	import torch
	import torch.utils.data as data
	import torch
	import torchvision.transforms as transforms
	import random
	from PIL import Image, ImageOps
	import numpy as np
	import torchvision
	from . import depth_transforms as flow_transforms
	import pdb
	import cv2
	from utils.flowlib import read_flow
	from utils.util_flow import readPFM, load_calib_cam_to_cam

	def default_loader(path):
	return Image.open(path).convert('RGB')

	def flow_loader(path):
	if '.pfm' in path:
	data = readPFM(path)[0]
	data[:,:,2] = 1
	return data
	else:
	return read_flow(path)

	def load_exts(cam_file):
	with open(cam_file, 'r') as f:
	lines = f.readlines()

	l_exts = []
	r_exts = []
	for l in lines:
	if 'L ' in l:
	l_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))
	if 'R ' in l:
	r_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))
	return l_exts,r_exts

	def disparity_loader(path):
	if '.png' in path:
	data = Image.open(path)
	data = np.ascontiguousarray(data,dtype=np.float32)/256
	return data
	else:
	return readPFM(path)[0]

	# triangulation
	def triangulation(disp, xcoord, ycoord, bl=1, fl = 450, cx = 479.5, cy = 269.5):
	depth = bl*fl / disp # 450px->15mm focal length
	X = (xcoord - cx) * depth / fl
	Y = (ycoord - cy) * depth / fl
	Z = depth
	P = np.concatenate((X[np.newaxis],Y[np.newaxis],Z[np.newaxis]),0).reshape(3,-1)
	P = np.concatenate((P,np.ones((1,P.shape[-1]))),0)
	return P

	class myImageFloder(data.Dataset):
	def __init__(self, iml0, iml1, flowl0, loader=default_loader, dploader= flow_loader, scale=1.,shape=[320,448], order=1, noise=0.06, pca_augmentor=True, prob = 1.,sc=False,disp0=None,disp1=None,calib=None ):
	self.iml0 = iml0
	self.iml1 = iml1
	self.flowl0 = flowl0
	self.loader = loader
	self.dploader = dploader
	self.scale=scale
	self.shape=shape
	self.order=order
	self.noise = noise
	self.pca_augmentor = pca_augmentor
	self.prob = prob
	self.sc = sc
	self.disp0 = disp0
	self.disp1 = disp1
	self.calib = calib

	def __getitem__(self, index):
	iml0 = self.iml0[index]
	iml1 = self.iml1[index]
	flowl0= self.flowl0[index]
	th, tw = self.shape

	iml0 = self.loader(iml0)
	iml1 = self.loader(iml1)

	# get disparity
	if self.sc:
	flowl0 = self.dploader(flowl0)
	flowl0 = np.ascontiguousarray(flowl0,dtype=np.float32)
	flowl0[np.isnan(flowl0)] = 1e6 # set to max
	if 'camera_data.txt' in self.calib[index]:
	bl=1
	if '15mm_' in self.calib[index]:
	fl=450 # 450
	else:
	fl=1050
	cx = 479.5
	cy = 269.5
	# negative disp
	d1 = np.abs(disparity_loader(self.disp0[index]))
	d2 = np.abs(disparity_loader(self.disp1[index]) + d1)
	elif 'Sintel' in self.calib[index]:
	fl = 1000
	bl = 1
	cx = 511.5
	cy = 217.5
	d1 = np.zeros(flowl0.shape[:2])
	d2 = np.zeros(flowl0.shape[:2])
	else:
	ints = load_calib_cam_to_cam(self.calib[index])
	fl = ints['K_cam2'][0,0]
	cx = ints['K_cam2'][0,2]
	cy = ints['K_cam2'][1,2]
	bl = ints['b20']-ints['b30']
	d1 = disparity_loader(self.disp0[index])
	d2 = disparity_loader(self.disp1[index])
	#flowl0[:,:,2] = (flowl0[:,:,2]==1).astype(float)
	flowl0[:,:,2] = np.logical_and(np.logical_and(flowl0[:,:,2]==1, d1!=0), d2!=0).astype(float)

	shape = d1.shape
	mesh = np.meshgrid(range(shape[1]),range(shape[0]))
	xcoord = mesh[0].astype(float)
	ycoord = mesh[1].astype(float)

	# triangulation in two frames
	P0 = triangulation(d1, xcoord, ycoord, bl=bl, fl = fl, cx = cx, cy = cy)
	P1 = triangulation(d2, xcoord + flowl0[:,:,0], ycoord + flowl0[:,:,1], bl=bl, fl = fl, cx = cx, cy = cy)
	dis0 = P0[2]
	dis1 = P1[2]

	change_size = dis0.reshape(shape).astype(np.float32)
	flow3d = (P1-P0)[:3].reshape((3,)+shape).transpose((1,2,0))

	gt_normal = np.concatenate((d1[:,:,np.newaxis],d2[:,:,np.newaxis],d2[:,:,np.newaxis]),-1)
	change_size = np.concatenate((change_size[:,:,np.newaxis],gt_normal,flow3d),2)
	else:
	shape = iml0.size
	shape=[shape[1],shape[0]]
	flowl0 = np.zeros((shape[0],shape[1],3))
	change_size = np.zeros((shape[0],shape[1],7))
	depth = disparity_loader(self.iml1[index].replace('camera','groundtruth'))
	change_size[:,:,0] = depth

	seqid = self.iml0[index].split('/')[-5].rsplit('_',3)[0]
	ints = load_calib_cam_to_cam('/data/gengshay/KITTI/%s/calib_cam_to_cam.txt'%seqid)
	fl = ints['K_cam2'][0,0]
	cx = ints['K_cam2'][0,2]
	cy = ints['K_cam2'][1,2]
	bl = ints['b20']-ints['b30']


	iml1 = np.asarray(iml1)/255.
	iml0 = np.asarray(iml0)/255.
	iml0 = iml0[:,:,::-1].copy()
	iml1 = iml1[:,:,::-1].copy()

	## following data augmentation procedure in PWCNet
	## https://github.com/lmb-freiburg/flownet2/blob/master/src/caffe/layers/data_augmentation_layer.cu
	import __main__ # a workaround for "discount_coeff"
	try:
	with open('/scratch/gengshay/iter_counts-%d.txt'%int(__main__.args.logname.split('-')[-1]), 'r') as f:
	iter_counts = int(f.readline())
	except:
	iter_counts = 0
	schedule = [0.5, 1., 50000.] # initial coeff, final_coeff, half life
	schedule_coeff = schedule[0] + (schedule[1] - schedule[0]) * \
	(2/(1+np.exp(-1.0986*iter_counts/schedule[2])) - 1)

	if self.pca_augmentor:
	pca_augmentor = flow_transforms.pseudoPCAAug( schedule_coeff=schedule_coeff)
	else:
	pca_augmentor = flow_transforms.Scale(1., order=0)

	if np.random.binomial(1,self.prob):
	co_transform1 = flow_transforms.Compose([
	flow_transforms.SpatialAug([th,tw],
	scale=[0.2,0.,0.1],
	rot=[0.4,0.],
	trans=[0.4,0.],
	squeeze=[0.3,0.], schedule_coeff=schedule_coeff, order=self.order),
	])
	else:
	co_transform1 = flow_transforms.Compose([
	flow_transforms.RandomCrop([th,tw]),
	])

	co_transform2 = flow_transforms.Compose([
	flow_transforms.pseudoPCAAug( schedule_coeff=schedule_coeff),
	#flow_transforms.PCAAug(schedule_coeff=schedule_coeff),
	flow_transforms.ChromaticAug( schedule_coeff=schedule_coeff, noise=self.noise),
	])

	flowl0 = np.concatenate([flowl0,change_size],-1)
	augmented,flowl0,intr = co_transform1([iml0, iml1], flowl0, [fl,cx,cy,bl])
	imol0 = augmented[0]
	imol1 = augmented[1]
	augmented,flowl0,intr = co_transform2(augmented, flowl0, intr)

	iml0 = augmented[0]
	iml1 = augmented[1]
	flowl0 = flowl0.astype(np.float32)
	change_size = flowl0[:,:,3:]
	flowl0 = flowl0[:,:,:3]

	# randomly cover a region
	sx=0;sy=0;cx=0;cy=0
	if np.random.binomial(1,0.5):
	sx = int(np.random.uniform(25,100))
	sy = int(np.random.uniform(25,100))
	#sx = int(np.random.uniform(50,150))
	#sy = int(np.random.uniform(50,150))
	cx = int(np.random.uniform(sx,iml1.shape[0]-sx))
	cy = int(np.random.uniform(sy,iml1.shape[1]-sy))
	iml1[cx-sx:cx+sx,cy-sy:cy+sy] = np.mean(np.mean(iml1,0),0)[np.newaxis,np.newaxis]

	iml0 = torch.Tensor(np.transpose(iml0,(2,0,1)))
	iml1 = torch.Tensor(np.transpose(iml1,(2,0,1)))

	return iml0, iml1, flowl0, change_size, intr, imol0, imol1, np.asarray([cx-sx,cx+sx,cy-sy,cy+sy])

	def __len__(self):
	return len(self.iml0)