SandSnapModelDemo / app_files /src /sedinet_eval.py
dbuscombe's picture
v1
d86998c
raw
history blame
11.2 kB
# Written by Dr Daniel Buscombe, Marda Science LLC
# for the SandSnap Program
#
# MIT License
#
# Copyright (c) 2020-2021, Marda Science LLC
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##> Release v1.4 (Aug 2021)
###===================================================
# import libraries
from sedinet_models import *
###===================================================
def get_data_generator(df, indices, greyscale, tilesize,batch_size=16):
"""
This function generates data for a batch of images and no metric, for # "unseen" samples
"""
for_training = False
images = []
while True:
for i in indices:
r = df.iloc[i]
file = r['files']
# if greyscale==True:
# im = Image.open(file).convert('LA')
# else:
# im = Image.open(file)
# im = im.resize((IM_HEIGHT, IM_HEIGHT))
# im = np.array(im) / 255.0
if greyscale==True:
im = Image.open(file).convert('LA')
#im = im.resize((IM_HEIGHT, IM_HEIGHT))
im = np.array(im)[:,:,0]
nx,ny = np.shape(im)
if (nx!=tilesize) or (ny!=tilesize):
im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)]
else:
im = Image.open(file)
#im = im.resize((IM_HEIGHT, IM_HEIGHT))
im = np.array(im)
nx,ny,nz = np.shape(im)
if (nx!=tilesize) or (ny!=tilesize):
im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)]
if greyscale==True:
images.append(np.expand_dims(im, axis=2)) #[:,:,0]
else:
images.append(im)
if len(images) >= batch_size:
yield np.array(images)
images = []
if not for_training:
break
###===================================================
def get_data_generator_1vars(df, indices, for_training, vars, greyscale,
tilesize, batch_size=16):
"""
This function generates data for a batch of images and 1 associated metric
"""
images, p1s = [], []
while True:
for i in indices:
r = df.iloc[i]
file, p1 = r['files'], r[vars[0]]
#im = Image.open(file).convert('LA')
#im = im.resize((IM_HEIGHT, IM_HEIGHT))
#im = np.array(im) / 255.0
#im2 = np.rot90(im)
# if greyscale==True:
# im = Image.open(file).convert('LA')
# else:
# im = Image.open(file)
# im = im.resize((IM_HEIGHT, IM_HEIGHT))
# im = np.array(im) / 255.0
if greyscale==True:
im = Image.open(file).convert('LA')
#im = im.resize((IM_HEIGHT, IM_HEIGHT))
im = np.array(im)[:,:,0]
nx,ny = np.shape(im)
if (nx!=tilesize) or (ny!=tilesize):
im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)]
else:
im = Image.open(file)
#im = im.resize((IM_HEIGHT, IM_HEIGHT))
im = np.array(im)
nx,ny,nz = np.shape(im)
if (nx!=tilesize) or (ny!=tilesize):
im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)]
if greyscale==True:
images.append(np.expand_dims(im, axis=2))
else:
images.append(im)
p1s.append(p1)
if len(images) >= batch_size:
yield np.array(images), [np.array(p1s)]
images, p1s = [], []
if not for_training:
break
###===================================================
def estimate_categorical(vars, csvfile, res_folder, dropout,
numclass, greyscale, name, mode):
"""
This function uses a SediNet model for categorical prediction
"""
ID_MAP = dict(zip(np.arange(numclass), [str(k) for k in range(numclass)]))
##======================================
## this randomly selects imagery for training and testing imagery sets
## while also making sure that both training and tetsing sets have
## at least 3 examples of each category
test_idx, test_df. _ = get_df(csvfile,fortrain=True)
# for 16GB RAM, used maximum of 200 samples to test on
# need to change batch gnerator into a better keras one
valid_gen = get_data_generator_1image(test_df, test_idx, True, ID_MAP,
vars[0], len(train_idx), greyscale, False, IM_HEIGHT) #np.min((200, len(train_idx))),
if SHALLOW is True:
if DO_AUG is True:
weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\
"_"+str(IM_WIDTH)+"_shallow_"+vars[0]+"_"+CAT_LOSS+"_aug.hdf5"
else:
weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\
"_"+str(IM_WIDTH)+"_shallow_"+vars[0]+"_"+CAT_LOSS+"_noaug.hdf5"
else:
if DO_AUG is True:
weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\
"_"+str(IM_WIDTH)+"_"+vars[0]+"_"+CAT_LOSS+"_aug.hdf5"
else:
weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\
"_"+str(IM_WIDTH)+"_"+vars[0]+"_"+CAT_LOSS+"_noaug.hdf5"
if not os.path.exists(weights_path):
weights_path = res_folder + os.sep+ weights_path
print("Using %s" % (weights_path))
if numclass>0:
ID_MAP = dict(zip(np.arange(numclass), [str(k) for k in range(numclass)]))
SM = make_cat_sedinet(ID_MAP, dropout)
if type(BATCH_SIZE)==list:
predict_test_train_cat(test_df, None, test_idx, None, vars[0],
SMs, [i for i in ID_MAP.keys()], weights_path, greyscale,
name, DO_AUG, IM_HEIGHT)
else:
predict_test_train_cat(test_df, None, test_idx, None, vars[0],
SM, [i for i in ID_MAP.keys()], weights_path, greyscale,
name, DO_AUG, IM_HEIGHT)
K.clear_session()
##===================================
## move model files and plots to the results folder
tidy(name, res_folder)
###===================================================
def estimate_siso_simo(vars, csvfile, greyscale,
dropout, numclass, name, mode, res_folder,#scale,
batch_size, weights_path):
"""
This function uses a sedinet model for continuous prediction
"""
if not os.path.exists(weights_path):
weights_path = res_folder + os.sep+ weights_path
print("Using %s" % (weights_path))
##======================================
## this randomly selects imagery for training and testing imagery sets
## while also making sure that both training and tetsing sets have
## at least 3 examples of each category
#train_idx, train_df = get_df(train_csvfile)
train_idx, train_df,split = get_df(csvfile)
##==============================================
## create a sedinet model to estimate category
SM = make_sedinet_siso_simo(vars, greyscale, dropout)
# if scale==True:
# CS = []
# for var in vars:
# cs = RobustScaler() #MinMaxScaler()
# if split:
# cs.fit_transform(
# np.r_[train_df[0][var].values].reshape(-1,1)
# )
# else:
# cs.fit_transform(
# np.r_[train_df[var].values].reshape(-1,1)
# )
# CS.append(cs)
# del cs
# else:
# CS = []
do_aug = False
for_training = False
if type(train_df)==list:
print('Reading in all files and memory mapping in batches ... takes a while')
train_gen = []
for df,id in zip(train_df,train_idx):
train_gen.append(get_data_generator_Nvars_siso_simo(df, id, for_training,
vars, len(id), greyscale, do_aug, DO_STANDARDIZE, IM_HEIGHT))#CS,
x_train = []; vals = []; files = []
for gen in train_gen:
a, b = next(gen)
outfile = TemporaryFile()
files.append(outfile)
dt = a.dtype; sh = a.shape
fp = np.memmap(outfile, dtype=dt, mode='w+', shape=sh)
fp[:] = a[:]
fp.flush()
del a
del fp
a = np.memmap(outfile, dtype=dt, mode='r', shape=sh)
x_train.append(a)
vals.append(b)
else:
train_gen = get_data_generator_Nvars_siso_simo(train_df, train_idx, for_training,
vars, len(train_idx), greyscale,do_aug, DO_STANDARDIZE, IM_HEIGHT)# CS,
x_train, vals = next(train_gen)
# test model
# if numclass==0:
x_test=None
test_vals = None
if type(BATCH_SIZE)==list:
predict_test_train_siso_simo(x_train, vals, x_test, test_vals, vars, #train_df, None, train_idx, None,
SMs, weights_path, name, mode, greyscale, #CS,
dropout, DO_AUG, DO_STANDARDIZE,counter)#scale,
else:
if type(x_train)==list:
for counter, x in enumerate(x_train):
#print(counter)
predict_test_train_siso_simo(x, vals[counter], x_test, test_vals, vars,
SM, weights_path, name, mode, greyscale, #CS,
dropout, DO_AUG, DO_STANDARDIZE,counter)#scale,
else:
predict_test_train_siso_simo(x_train,vals, x_test, test_vals, vars,
SM, weights_path, name, mode, greyscale,# CS,
dropout,DO_AUG, DO_STANDARDIZE,counter)# scale
K.clear_session()
##===================================
## move model files and plots to the results folder
tidy(name, res_folder)