# Written by Dr Daniel Buscombe, Marda Science LLC # for the SandSnap Program # # MIT License # # Copyright (c) 2020-2021, Marda Science LLC # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ##> Release v1.4 (Aug 2021) ###=================================================== # import libraries from sedinet_models import * ###=================================================== def get_data_generator(df, indices, greyscale, tilesize,batch_size=16): """ This function generates data for a batch of images and no metric, for # "unseen" samples """ for_training = False images = [] while True: for i in indices: r = df.iloc[i] file = r['files'] # if greyscale==True: # im = Image.open(file).convert('LA') # else: # im = Image.open(file) # im = im.resize((IM_HEIGHT, IM_HEIGHT)) # im = np.array(im) / 255.0 if greyscale==True: im = Image.open(file).convert('LA') #im = im.resize((IM_HEIGHT, IM_HEIGHT)) im = np.array(im)[:,:,0] nx,ny = np.shape(im) if (nx!=tilesize) or (ny!=tilesize): im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)] else: im = Image.open(file) #im = im.resize((IM_HEIGHT, IM_HEIGHT)) im = np.array(im) nx,ny,nz = np.shape(im) if (nx!=tilesize) or (ny!=tilesize): im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)] if greyscale==True: images.append(np.expand_dims(im, axis=2)) #[:,:,0] else: images.append(im) if len(images) >= batch_size: yield np.array(images) images = [] if not for_training: break ###=================================================== def get_data_generator_1vars(df, indices, for_training, vars, greyscale, tilesize, batch_size=16): """ This function generates data for a batch of images and 1 associated metric """ images, p1s = [], [] while True: for i in indices: r = df.iloc[i] file, p1 = r['files'], r[vars[0]] #im = Image.open(file).convert('LA') #im = im.resize((IM_HEIGHT, IM_HEIGHT)) #im = np.array(im) / 255.0 #im2 = np.rot90(im) # if greyscale==True: # im = Image.open(file).convert('LA') # else: # im = Image.open(file) # im = im.resize((IM_HEIGHT, IM_HEIGHT)) # im = np.array(im) / 255.0 if greyscale==True: im = Image.open(file).convert('LA') #im = im.resize((IM_HEIGHT, IM_HEIGHT)) im = np.array(im)[:,:,0] nx,ny = np.shape(im) if (nx!=tilesize) or (ny!=tilesize): im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)] else: im = Image.open(file) #im = im.resize((IM_HEIGHT, IM_HEIGHT)) im = np.array(im) nx,ny,nz = np.shape(im) if (nx!=tilesize) or (ny!=tilesize): im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)] if greyscale==True: images.append(np.expand_dims(im, axis=2)) else: images.append(im) p1s.append(p1) if len(images) >= batch_size: yield np.array(images), [np.array(p1s)] images, p1s = [], [] if not for_training: break ###=================================================== def estimate_categorical(vars, csvfile, res_folder, dropout, numclass, greyscale, name, mode): """ This function uses a SediNet model for categorical prediction """ ID_MAP = dict(zip(np.arange(numclass), [str(k) for k in range(numclass)])) ##====================================== ## this randomly selects imagery for training and testing imagery sets ## while also making sure that both training and tetsing sets have ## at least 3 examples of each category test_idx, test_df. _ = get_df(csvfile,fortrain=True) # for 16GB RAM, used maximum of 200 samples to test on # need to change batch gnerator into a better keras one valid_gen = get_data_generator_1image(test_df, test_idx, True, ID_MAP, vars[0], len(train_idx), greyscale, False, IM_HEIGHT) #np.min((200, len(train_idx))), if SHALLOW is True: if DO_AUG is True: weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\ "_"+str(IM_WIDTH)+"_shallow_"+vars[0]+"_"+CAT_LOSS+"_aug.hdf5" else: weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\ "_"+str(IM_WIDTH)+"_shallow_"+vars[0]+"_"+CAT_LOSS+"_noaug.hdf5" else: if DO_AUG is True: weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\ "_"+str(IM_WIDTH)+"_"+vars[0]+"_"+CAT_LOSS+"_aug.hdf5" else: weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\ "_"+str(IM_WIDTH)+"_"+vars[0]+"_"+CAT_LOSS+"_noaug.hdf5" if not os.path.exists(weights_path): weights_path = res_folder + os.sep+ weights_path print("Using %s" % (weights_path)) if numclass>0: ID_MAP = dict(zip(np.arange(numclass), [str(k) for k in range(numclass)])) SM = make_cat_sedinet(ID_MAP, dropout) if type(BATCH_SIZE)==list: predict_test_train_cat(test_df, None, test_idx, None, vars[0], SMs, [i for i in ID_MAP.keys()], weights_path, greyscale, name, DO_AUG, IM_HEIGHT) else: predict_test_train_cat(test_df, None, test_idx, None, vars[0], SM, [i for i in ID_MAP.keys()], weights_path, greyscale, name, DO_AUG, IM_HEIGHT) K.clear_session() ##=================================== ## move model files and plots to the results folder tidy(name, res_folder) ###=================================================== def estimate_siso_simo(vars, csvfile, greyscale, dropout, numclass, name, mode, res_folder,#scale, batch_size, weights_path): """ This function uses a sedinet model for continuous prediction """ if not os.path.exists(weights_path): weights_path = res_folder + os.sep+ weights_path print("Using %s" % (weights_path)) ##====================================== ## this randomly selects imagery for training and testing imagery sets ## while also making sure that both training and tetsing sets have ## at least 3 examples of each category #train_idx, train_df = get_df(train_csvfile) train_idx, train_df,split = get_df(csvfile) ##============================================== ## create a sedinet model to estimate category SM = make_sedinet_siso_simo(vars, greyscale, dropout) # if scale==True: # CS = [] # for var in vars: # cs = RobustScaler() #MinMaxScaler() # if split: # cs.fit_transform( # np.r_[train_df[0][var].values].reshape(-1,1) # ) # else: # cs.fit_transform( # np.r_[train_df[var].values].reshape(-1,1) # ) # CS.append(cs) # del cs # else: # CS = [] do_aug = False for_training = False if type(train_df)==list: print('Reading in all files and memory mapping in batches ... takes a while') train_gen = [] for df,id in zip(train_df,train_idx): train_gen.append(get_data_generator_Nvars_siso_simo(df, id, for_training, vars, len(id), greyscale, do_aug, DO_STANDARDIZE, IM_HEIGHT))#CS, x_train = []; vals = []; files = [] for gen in train_gen: a, b = next(gen) outfile = TemporaryFile() files.append(outfile) dt = a.dtype; sh = a.shape fp = np.memmap(outfile, dtype=dt, mode='w+', shape=sh) fp[:] = a[:] fp.flush() del a del fp a = np.memmap(outfile, dtype=dt, mode='r', shape=sh) x_train.append(a) vals.append(b) else: train_gen = get_data_generator_Nvars_siso_simo(train_df, train_idx, for_training, vars, len(train_idx), greyscale,do_aug, DO_STANDARDIZE, IM_HEIGHT)# CS, x_train, vals = next(train_gen) # test model # if numclass==0: x_test=None test_vals = None if type(BATCH_SIZE)==list: predict_test_train_siso_simo(x_train, vals, x_test, test_vals, vars, #train_df, None, train_idx, None, SMs, weights_path, name, mode, greyscale, #CS, dropout, DO_AUG, DO_STANDARDIZE,counter)#scale, else: if type(x_train)==list: for counter, x in enumerate(x_train): #print(counter) predict_test_train_siso_simo(x, vals[counter], x_test, test_vals, vars, SM, weights_path, name, mode, greyscale, #CS, dropout, DO_AUG, DO_STANDARDIZE,counter)#scale, else: predict_test_train_siso_simo(x_train,vals, x_test, test_vals, vars, SM, weights_path, name, mode, greyscale,# CS, dropout,DO_AUG, DO_STANDARDIZE,counter)# scale K.clear_session() ##=================================== ## move model files and plots to the results folder tidy(name, res_folder)