Spaces:
Sleeping
Sleeping
# Written by Dr Daniel Buscombe, Marda Science LLC | |
# for the SandSnap Program | |
# | |
# MIT License | |
# | |
# Copyright (c) 2020-2021, Marda Science LLC | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included in all | |
# copies or substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
# SOFTWARE. | |
##> Release v1.4 (Aug 2021) | |
###=================================================== | |
# import libraries | |
from sedinet_models import * | |
###=================================================== | |
def get_data_generator(df, indices, greyscale, tilesize,batch_size=16): | |
""" | |
This function generates data for a batch of images and no metric, for # "unseen" samples | |
""" | |
for_training = False | |
images = [] | |
while True: | |
for i in indices: | |
r = df.iloc[i] | |
file = r['files'] | |
# if greyscale==True: | |
# im = Image.open(file).convert('LA') | |
# else: | |
# im = Image.open(file) | |
# im = im.resize((IM_HEIGHT, IM_HEIGHT)) | |
# im = np.array(im) / 255.0 | |
if greyscale==True: | |
im = Image.open(file).convert('LA') | |
#im = im.resize((IM_HEIGHT, IM_HEIGHT)) | |
im = np.array(im)[:,:,0] | |
nx,ny = np.shape(im) | |
if (nx!=tilesize) or (ny!=tilesize): | |
im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)] | |
else: | |
im = Image.open(file) | |
#im = im.resize((IM_HEIGHT, IM_HEIGHT)) | |
im = np.array(im) | |
nx,ny,nz = np.shape(im) | |
if (nx!=tilesize) or (ny!=tilesize): | |
im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)] | |
if greyscale==True: | |
images.append(np.expand_dims(im, axis=2)) #[:,:,0] | |
else: | |
images.append(im) | |
if len(images) >= batch_size: | |
yield np.array(images) | |
images = [] | |
if not for_training: | |
break | |
###=================================================== | |
def get_data_generator_1vars(df, indices, for_training, vars, greyscale, | |
tilesize, batch_size=16): | |
""" | |
This function generates data for a batch of images and 1 associated metric | |
""" | |
images, p1s = [], [] | |
while True: | |
for i in indices: | |
r = df.iloc[i] | |
file, p1 = r['files'], r[vars[0]] | |
#im = Image.open(file).convert('LA') | |
#im = im.resize((IM_HEIGHT, IM_HEIGHT)) | |
#im = np.array(im) / 255.0 | |
#im2 = np.rot90(im) | |
# if greyscale==True: | |
# im = Image.open(file).convert('LA') | |
# else: | |
# im = Image.open(file) | |
# im = im.resize((IM_HEIGHT, IM_HEIGHT)) | |
# im = np.array(im) / 255.0 | |
if greyscale==True: | |
im = Image.open(file).convert('LA') | |
#im = im.resize((IM_HEIGHT, IM_HEIGHT)) | |
im = np.array(im)[:,:,0] | |
nx,ny = np.shape(im) | |
if (nx!=tilesize) or (ny!=tilesize): | |
im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)] | |
else: | |
im = Image.open(file) | |
#im = im.resize((IM_HEIGHT, IM_HEIGHT)) | |
im = np.array(im) | |
nx,ny,nz = np.shape(im) | |
if (nx!=tilesize) or (ny!=tilesize): | |
im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)] | |
if greyscale==True: | |
images.append(np.expand_dims(im, axis=2)) | |
else: | |
images.append(im) | |
p1s.append(p1) | |
if len(images) >= batch_size: | |
yield np.array(images), [np.array(p1s)] | |
images, p1s = [], [] | |
if not for_training: | |
break | |
###=================================================== | |
def estimate_categorical(vars, csvfile, res_folder, dropout, | |
numclass, greyscale, name, mode): | |
""" | |
This function uses a SediNet model for categorical prediction | |
""" | |
ID_MAP = dict(zip(np.arange(numclass), [str(k) for k in range(numclass)])) | |
##====================================== | |
## this randomly selects imagery for training and testing imagery sets | |
## while also making sure that both training and tetsing sets have | |
## at least 3 examples of each category | |
test_idx, test_df. _ = get_df(csvfile,fortrain=True) | |
# for 16GB RAM, used maximum of 200 samples to test on | |
# need to change batch gnerator into a better keras one | |
valid_gen = get_data_generator_1image(test_df, test_idx, True, ID_MAP, | |
vars[0], len(train_idx), greyscale, False, IM_HEIGHT) #np.min((200, len(train_idx))), | |
if SHALLOW is True: | |
if DO_AUG is True: | |
weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\ | |
"_"+str(IM_WIDTH)+"_shallow_"+vars[0]+"_"+CAT_LOSS+"_aug.hdf5" | |
else: | |
weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\ | |
"_"+str(IM_WIDTH)+"_shallow_"+vars[0]+"_"+CAT_LOSS+"_noaug.hdf5" | |
else: | |
if DO_AUG is True: | |
weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\ | |
"_"+str(IM_WIDTH)+"_"+vars[0]+"_"+CAT_LOSS+"_aug.hdf5" | |
else: | |
weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\ | |
"_"+str(IM_WIDTH)+"_"+vars[0]+"_"+CAT_LOSS+"_noaug.hdf5" | |
if not os.path.exists(weights_path): | |
weights_path = res_folder + os.sep+ weights_path | |
print("Using %s" % (weights_path)) | |
if numclass>0: | |
ID_MAP = dict(zip(np.arange(numclass), [str(k) for k in range(numclass)])) | |
SM = make_cat_sedinet(ID_MAP, dropout) | |
if type(BATCH_SIZE)==list: | |
predict_test_train_cat(test_df, None, test_idx, None, vars[0], | |
SMs, [i for i in ID_MAP.keys()], weights_path, greyscale, | |
name, DO_AUG, IM_HEIGHT) | |
else: | |
predict_test_train_cat(test_df, None, test_idx, None, vars[0], | |
SM, [i for i in ID_MAP.keys()], weights_path, greyscale, | |
name, DO_AUG, IM_HEIGHT) | |
K.clear_session() | |
##=================================== | |
## move model files and plots to the results folder | |
tidy(name, res_folder) | |
###=================================================== | |
def estimate_siso_simo(vars, csvfile, greyscale, | |
dropout, numclass, name, mode, res_folder,#scale, | |
batch_size, weights_path): | |
""" | |
This function uses a sedinet model for continuous prediction | |
""" | |
if not os.path.exists(weights_path): | |
weights_path = res_folder + os.sep+ weights_path | |
print("Using %s" % (weights_path)) | |
##====================================== | |
## this randomly selects imagery for training and testing imagery sets | |
## while also making sure that both training and tetsing sets have | |
## at least 3 examples of each category | |
#train_idx, train_df = get_df(train_csvfile) | |
train_idx, train_df,split = get_df(csvfile) | |
##============================================== | |
## create a sedinet model to estimate category | |
SM = make_sedinet_siso_simo(vars, greyscale, dropout) | |
# if scale==True: | |
# CS = [] | |
# for var in vars: | |
# cs = RobustScaler() #MinMaxScaler() | |
# if split: | |
# cs.fit_transform( | |
# np.r_[train_df[0][var].values].reshape(-1,1) | |
# ) | |
# else: | |
# cs.fit_transform( | |
# np.r_[train_df[var].values].reshape(-1,1) | |
# ) | |
# CS.append(cs) | |
# del cs | |
# else: | |
# CS = [] | |
do_aug = False | |
for_training = False | |
if type(train_df)==list: | |
print('Reading in all files and memory mapping in batches ... takes a while') | |
train_gen = [] | |
for df,id in zip(train_df,train_idx): | |
train_gen.append(get_data_generator_Nvars_siso_simo(df, id, for_training, | |
vars, len(id), greyscale, do_aug, DO_STANDARDIZE, IM_HEIGHT))#CS, | |
x_train = []; vals = []; files = [] | |
for gen in train_gen: | |
a, b = next(gen) | |
outfile = TemporaryFile() | |
files.append(outfile) | |
dt = a.dtype; sh = a.shape | |
fp = np.memmap(outfile, dtype=dt, mode='w+', shape=sh) | |
fp[:] = a[:] | |
fp.flush() | |
del a | |
del fp | |
a = np.memmap(outfile, dtype=dt, mode='r', shape=sh) | |
x_train.append(a) | |
vals.append(b) | |
else: | |
train_gen = get_data_generator_Nvars_siso_simo(train_df, train_idx, for_training, | |
vars, len(train_idx), greyscale,do_aug, DO_STANDARDIZE, IM_HEIGHT)# CS, | |
x_train, vals = next(train_gen) | |
# test model | |
# if numclass==0: | |
x_test=None | |
test_vals = None | |
if type(BATCH_SIZE)==list: | |
predict_test_train_siso_simo(x_train, vals, x_test, test_vals, vars, #train_df, None, train_idx, None, | |
SMs, weights_path, name, mode, greyscale, #CS, | |
dropout, DO_AUG, DO_STANDARDIZE,counter)#scale, | |
else: | |
if type(x_train)==list: | |
for counter, x in enumerate(x_train): | |
#print(counter) | |
predict_test_train_siso_simo(x, vals[counter], x_test, test_vals, vars, | |
SM, weights_path, name, mode, greyscale, #CS, | |
dropout, DO_AUG, DO_STANDARDIZE,counter)#scale, | |
else: | |
predict_test_train_siso_simo(x_train,vals, x_test, test_vals, vars, | |
SM, weights_path, name, mode, greyscale,# CS, | |
dropout,DO_AUG, DO_STANDARDIZE,counter)# scale | |
K.clear_session() | |
##=================================== | |
## move model files and plots to the results folder | |
tidy(name, res_folder) | |