Spaces:

dbuscombe
/

SandSnapModelDemo

Sleeping

SandSnapModelDemo / app_files /src /sedinet_eval.py

d86998c over 1 year ago

11.2 kB

	# Written by Dr Daniel Buscombe, Marda Science LLC
	# for the SandSnap Program
	#
	# MIT License
	#
	# Copyright (c) 2020-2021, Marda Science LLC
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to deal
	# in the Software without restriction, including without limitation the rights
	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	# copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in all
	# copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	# SOFTWARE.


	##> Release v1.4 (Aug 2021)

	###===================================================
	# import libraries
	from sedinet_models import *

	###===================================================
	def get_data_generator(df, indices, greyscale, tilesize,batch_size=16):
	"""
	This function generates data for a batch of images and no metric, for # "unseen" samples
	"""

	for_training = False
	images = []
	while True:
	for i in indices:
	r = df.iloc[i]
	file = r['files']

	# if greyscale==True:
	# im = Image.open(file).convert('LA')
	# else:
	# im = Image.open(file)
	# im = im.resize((IM_HEIGHT, IM_HEIGHT))
	# im = np.array(im) / 255.0

	if greyscale==True:
	im = Image.open(file).convert('LA')
	#im = im.resize((IM_HEIGHT, IM_HEIGHT))
	im = np.array(im)[:,:,0]
	nx,ny = np.shape(im)
	if (nx!=tilesize) or (ny!=tilesize):
	im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)]

	else:
	im = Image.open(file)
	#im = im.resize((IM_HEIGHT, IM_HEIGHT))
	im = np.array(im)
	nx,ny,nz = np.shape(im)
	if (nx!=tilesize) or (ny!=tilesize):
	im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)]

	if greyscale==True:
	images.append(np.expand_dims(im, axis=2)) #[:,:,0]
	else:
	images.append(im)

	if len(images) >= batch_size:
	yield np.array(images)
	images = []
	if not for_training:
	break

	###===================================================
	def get_data_generator_1vars(df, indices, for_training, vars, greyscale,
	tilesize, batch_size=16):
	"""
	This function generates data for a batch of images and 1 associated metric
	"""
	images, p1s = [], []
	while True:
	for i in indices:
	r = df.iloc[i]
	file, p1 = r['files'], r[vars[0]]
	#im = Image.open(file).convert('LA')
	#im = im.resize((IM_HEIGHT, IM_HEIGHT))
	#im = np.array(im) / 255.0
	#im2 = np.rot90(im)

	# if greyscale==True:
	# im = Image.open(file).convert('LA')
	# else:
	# im = Image.open(file)
	# im = im.resize((IM_HEIGHT, IM_HEIGHT))
	# im = np.array(im) / 255.0

	if greyscale==True:
	im = Image.open(file).convert('LA')
	#im = im.resize((IM_HEIGHT, IM_HEIGHT))
	im = np.array(im)[:,:,0]
	nx,ny = np.shape(im)
	if (nx!=tilesize) or (ny!=tilesize):
	im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)]

	else:
	im = Image.open(file)
	#im = im.resize((IM_HEIGHT, IM_HEIGHT))
	im = np.array(im)
	nx,ny,nz = np.shape(im)
	if (nx!=tilesize) or (ny!=tilesize):
	im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)]


	if greyscale==True:
	images.append(np.expand_dims(im, axis=2))
	else:
	images.append(im)

	p1s.append(p1)
	if len(images) >= batch_size:
	yield np.array(images), [np.array(p1s)]
	images, p1s = [], []
	if not for_training:
	break

	###===================================================
	def estimate_categorical(vars, csvfile, res_folder, dropout,
	numclass, greyscale, name, mode):
	"""
	This function uses a SediNet model for categorical prediction
	"""

	ID_MAP = dict(zip(np.arange(numclass), [str(k) for k in range(numclass)]))

	##======================================
	## this randomly selects imagery for training and testing imagery sets
	## while also making sure that both training and tetsing sets have
	## at least 3 examples of each category
	test_idx, test_df. _ = get_df(csvfile,fortrain=True)

	# for 16GB RAM, used maximum of 200 samples to test on
	# need to change batch gnerator into a better keras one

	valid_gen = get_data_generator_1image(test_df, test_idx, True, ID_MAP,
	vars[0], len(train_idx), greyscale, False, IM_HEIGHT) #np.min((200, len(train_idx))),

	if SHALLOW is True:
	if DO_AUG is True:
	weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\
	"_"+str(IM_WIDTH)+"_shallow_"+vars[0]+"_"+CAT_LOSS+"_aug.hdf5"
	else:
	weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\
	"_"+str(IM_WIDTH)+"_shallow_"+vars[0]+"_"+CAT_LOSS+"_noaug.hdf5"
	else:
	if DO_AUG is True:
	weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\
	"_"+str(IM_WIDTH)+"_"+vars[0]+"_"+CAT_LOSS+"_aug.hdf5"
	else:
	weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\
	"_"+str(IM_WIDTH)+"_"+vars[0]+"_"+CAT_LOSS+"_noaug.hdf5"


	if not os.path.exists(weights_path):
	weights_path = res_folder + os.sep+ weights_path
	print("Using %s" % (weights_path))

	if numclass>0:
	ID_MAP = dict(zip(np.arange(numclass), [str(k) for k in range(numclass)]))

	SM = make_cat_sedinet(ID_MAP, dropout)

	if type(BATCH_SIZE)==list:
	predict_test_train_cat(test_df, None, test_idx, None, vars[0],
	SMs, [i for i in ID_MAP.keys()], weights_path, greyscale,
	name, DO_AUG, IM_HEIGHT)
	else:
	predict_test_train_cat(test_df, None, test_idx, None, vars[0],
	SM, [i for i in ID_MAP.keys()], weights_path, greyscale,
	name, DO_AUG, IM_HEIGHT)

	K.clear_session()

	##===================================
	## move model files and plots to the results folder
	tidy(name, res_folder)

	###===================================================
	def estimate_siso_simo(vars, csvfile, greyscale,
	dropout, numclass, name, mode, res_folder,#scale,
	batch_size, weights_path):
	"""
	This function uses a sedinet model for continuous prediction
	"""

	if not os.path.exists(weights_path):
	weights_path = res_folder + os.sep+ weights_path
	print("Using %s" % (weights_path))

	##======================================
	## this randomly selects imagery for training and testing imagery sets
	## while also making sure that both training and tetsing sets have
	## at least 3 examples of each category
	#train_idx, train_df = get_df(train_csvfile)
	train_idx, train_df,split = get_df(csvfile)

	##==============================================
	## create a sedinet model to estimate category
	SM = make_sedinet_siso_simo(vars, greyscale, dropout)

	# if scale==True:
	# CS = []
	# for var in vars:
	# cs = RobustScaler() #MinMaxScaler()
	# if split:
	# cs.fit_transform(
	# np.r_[train_df[0][var].values].reshape(-1,1)
	# )
	# else:
	# cs.fit_transform(
	# np.r_[train_df[var].values].reshape(-1,1)
	# )
	# CS.append(cs)
	# del cs
	# else:
	# CS = []


	do_aug = False
	for_training = False
	if type(train_df)==list:
	print('Reading in all files and memory mapping in batches ... takes a while')
	train_gen = []
	for df,id in zip(train_df,train_idx):
	train_gen.append(get_data_generator_Nvars_siso_simo(df, id, for_training,
	vars, len(id), greyscale, do_aug, DO_STANDARDIZE, IM_HEIGHT))#CS,

	x_train = []; vals = []; files = []
	for gen in train_gen:
	a, b = next(gen)
	outfile = TemporaryFile()
	files.append(outfile)
	dt = a.dtype; sh = a.shape
	fp = np.memmap(outfile, dtype=dt, mode='w+', shape=sh)
	fp[:] = a[:]
	fp.flush()
	del a
	del fp
	a = np.memmap(outfile, dtype=dt, mode='r', shape=sh)
	x_train.append(a)
	vals.append(b)

	else:
	train_gen = get_data_generator_Nvars_siso_simo(train_df, train_idx, for_training,
	vars, len(train_idx), greyscale,do_aug, DO_STANDARDIZE, IM_HEIGHT)# CS,

	x_train, vals = next(train_gen)

	# test model
	# if numclass==0:
	x_test=None
	test_vals = None
	if type(BATCH_SIZE)==list:
	predict_test_train_siso_simo(x_train, vals, x_test, test_vals, vars, #train_df, None, train_idx, None,
	SMs, weights_path, name, mode, greyscale, #CS,
	dropout, DO_AUG, DO_STANDARDIZE,counter)#scale,
	else:
	if type(x_train)==list:
	for counter, x in enumerate(x_train):
	#print(counter)
	predict_test_train_siso_simo(x, vals[counter], x_test, test_vals, vars,
	SM, weights_path, name, mode, greyscale, #CS,
	dropout, DO_AUG, DO_STANDARDIZE,counter)#scale,
	else:
	predict_test_train_siso_simo(x_train,vals, x_test, test_vals, vars,
	SM, weights_path, name, mode, greyscale,# CS,
	dropout,DO_AUG, DO_STANDARDIZE,counter)# scale
	K.clear_session()

	##===================================
	## move model files and plots to the results folder
	tidy(name, res_folder)