Spaces:

dbuscombe
/

SandSnapModelDemo

Sleeping

File size: 11,150 Bytes

d86998c

# Written by Dr Daniel Buscombe, Marda Science LLC
# for the SandSnap Program
#
# MIT License
#
# Copyright (c) 2020-2021, Marda Science LLC
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.


##> Release v1.4 (Aug 2021)

###===================================================
# import libraries
from sedinet_models import *

###===================================================
def get_data_generator(df, indices, greyscale, tilesize,batch_size=16):
    """
    This function generates data for a batch of images and no metric, for  # "unseen" samples
    """

    for_training = False
    images = []
    while True:
        for i in indices:
            r = df.iloc[i]
            file = r['files']

            # if greyscale==True:
            #    im = Image.open(file).convert('LA')
            # else:
            #    im = Image.open(file)
            # im = im.resize((IM_HEIGHT, IM_HEIGHT))
            # im = np.array(im) / 255.0

            if greyscale==True:
               im = Image.open(file).convert('LA')
               #im = im.resize((IM_HEIGHT, IM_HEIGHT))
               im = np.array(im)[:,:,0]
               nx,ny = np.shape(im)
               if (nx!=tilesize) or (ny!=tilesize):
                   im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)]

            else:
               im = Image.open(file)
               #im = im.resize((IM_HEIGHT, IM_HEIGHT))
               im = np.array(im)
               nx,ny,nz = np.shape(im)
               if (nx!=tilesize) or (ny!=tilesize):
                   im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)]

            if greyscale==True:
               images.append(np.expand_dims(im, axis=2)) #[:,:,0]
            else:
               images.append(im)

            if len(images) >= batch_size:
                yield np.array(images)
                images = []
        if not for_training:
            break

###===================================================
def get_data_generator_1vars(df, indices, for_training, vars, greyscale,
                                tilesize, batch_size=16):
    """
    This function generates data for a batch of images and 1 associated metric
    """
    images, p1s = [], []
    while True:
        for i in indices:
            r = df.iloc[i]
            file, p1 = r['files'], r[vars[0]]
            #im = Image.open(file).convert('LA')
            #im = im.resize((IM_HEIGHT, IM_HEIGHT))
            #im = np.array(im) / 255.0
            #im2 = np.rot90(im)

            # if greyscale==True:
            #    im = Image.open(file).convert('LA')
            # else:
            #    im = Image.open(file)
            # im = im.resize((IM_HEIGHT, IM_HEIGHT))
            # im = np.array(im) / 255.0

            if greyscale==True:
               im = Image.open(file).convert('LA')
               #im = im.resize((IM_HEIGHT, IM_HEIGHT))
               im = np.array(im)[:,:,0]
               nx,ny = np.shape(im)
               if (nx!=tilesize) or (ny!=tilesize):
                   im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)]

            else:
               im = Image.open(file)
               #im = im.resize((IM_HEIGHT, IM_HEIGHT))
               im = np.array(im)
               nx,ny,nz = np.shape(im)
               if (nx!=tilesize) or (ny!=tilesize):
                   im = im[int(nx/2)-int(tilesize/2):int(nx/2)+int(tilesize/2), int(ny/2)-int(tilesize/2):int(ny/2)+int(tilesize/2)]


            if greyscale==True:
               images.append(np.expand_dims(im, axis=2))
            else:
               images.append(im)

            p1s.append(p1)
            if len(images) >= batch_size:
                yield np.array(images), [np.array(p1s)]
                images, p1s = [], []
        if not for_training:
            break

###===================================================
def estimate_categorical(vars, csvfile, res_folder, dropout,
                         numclass, greyscale, name, mode):
   """
   This function uses a SediNet model for categorical prediction
   """

   ID_MAP = dict(zip(np.arange(numclass), [str(k) for k in range(numclass)]))

   ##======================================
   ## this randomly selects imagery for training and testing imagery sets
   ## while also making sure that both training and tetsing sets have
   ## at least 3 examples of each category
   test_idx, test_df. _ = get_df(csvfile,fortrain=True)

   # for 16GB RAM, used maximum of 200 samples to test on
   # need to change batch gnerator into a better keras one

   valid_gen = get_data_generator_1image(test_df, test_idx, True, ID_MAP,
                vars[0],  len(train_idx), greyscale, False, IM_HEIGHT) #np.min((200, len(train_idx))),

   if SHALLOW is True:
      if DO_AUG is True:
          weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\
                   "_"+str(IM_WIDTH)+"_shallow_"+vars[0]+"_"+CAT_LOSS+"_aug.hdf5"
      else:
          weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\
                   "_"+str(IM_WIDTH)+"_shallow_"+vars[0]+"_"+CAT_LOSS+"_noaug.hdf5"
   else:
      if DO_AUG is True:
           weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\
                   "_"+str(IM_WIDTH)+"_"+vars[0]+"_"+CAT_LOSS+"_aug.hdf5"
      else:
           weights_path = name+"_"+mode+"_batch"+str(BATCH_SIZE)+"_im"+str(IM_HEIGHT)+\
                   "_"+str(IM_WIDTH)+"_"+vars[0]+"_"+CAT_LOSS+"_noaug.hdf5"


   if not os.path.exists(weights_path):
       weights_path = res_folder + os.sep+ weights_path
       print("Using %s" % (weights_path))

   if numclass>0:
      ID_MAP = dict(zip(np.arange(numclass), [str(k) for k in range(numclass)]))

   SM = make_cat_sedinet(ID_MAP, dropout)

   if type(BATCH_SIZE)==list:
       predict_test_train_cat(test_df, None, test_idx, None, vars[0],
                         SMs, [i for i in ID_MAP.keys()], weights_path, greyscale,
                         name, DO_AUG, IM_HEIGHT)
   else:
       predict_test_train_cat(test_df, None, test_idx, None, vars[0],
                         SM, [i for i in ID_MAP.keys()], weights_path, greyscale,
                         name, DO_AUG, IM_HEIGHT)

   K.clear_session()

   ##===================================
   ## move model files and plots to the results folder
   tidy(name, res_folder)

###===================================================
def estimate_siso_simo(vars, csvfile, greyscale,
                       dropout, numclass, name, mode, res_folder,#scale,
                       batch_size, weights_path):
   """
   This function uses a sedinet model for continuous prediction
   """

   if not os.path.exists(weights_path):
       weights_path = res_folder + os.sep+ weights_path
       print("Using %s" % (weights_path))

   ##======================================
   ## this randomly selects imagery for training and testing imagery sets
   ## while also making sure that both training and tetsing sets have
   ## at least 3 examples of each category
   #train_idx, train_df = get_df(train_csvfile)
   train_idx, train_df,split = get_df(csvfile)

   ##==============================================
   ## create a sedinet model to estimate category
   SM = make_sedinet_siso_simo(vars, greyscale, dropout)

   # if scale==True:
   #     CS = []
   #     for var in vars:
   #        cs = RobustScaler() #MinMaxScaler()
   #        if split:
   #            cs.fit_transform(
   #              np.r_[train_df[0][var].values].reshape(-1,1)
   #              )
   #        else:
   #            cs.fit_transform(
   #              np.r_[train_df[var].values].reshape(-1,1)
   #              )
   #        CS.append(cs)
   #        del cs
   # else:
   #     CS = []


   do_aug = False
   for_training = False
   if type(train_df)==list:
       print('Reading in all files and memory mapping in batches ... takes a while')
       train_gen = []
       for df,id in zip(train_df,train_idx):
           train_gen.append(get_data_generator_Nvars_siso_simo(df, id, for_training,
                   vars, len(id), greyscale,  do_aug, DO_STANDARDIZE, IM_HEIGHT))#CS,

       x_train = []; vals = []; files = []
       for gen in train_gen:
           a, b = next(gen)
           outfile = TemporaryFile()
           files.append(outfile)
           dt  = a.dtype; sh = a.shape
           fp = np.memmap(outfile, dtype=dt, mode='w+', shape=sh)
           fp[:] = a[:]
           fp.flush()
           del a
           del fp
           a = np.memmap(outfile, dtype=dt, mode='r', shape=sh)
           x_train.append(a)
           vals.append(b)

   else:
       train_gen = get_data_generator_Nvars_siso_simo(train_df, train_idx, for_training,
                vars, len(train_idx), greyscale,do_aug, DO_STANDARDIZE, IM_HEIGHT)# CS,

       x_train, vals = next(train_gen)

   # test model
   # if numclass==0:
   x_test=None
   test_vals = None
   if type(BATCH_SIZE)==list:
       predict_test_train_siso_simo(x_train, vals, x_test, test_vals, vars, #train_df, None, train_idx, None,
                            SMs, weights_path, name, mode, greyscale, #CS,
                            dropout,  DO_AUG, DO_STANDARDIZE,counter)#scale,
   else:
       if type(x_train)==list:
           for counter, x in enumerate(x_train):
               #print(counter)
               predict_test_train_siso_simo(x, vals[counter], x_test, test_vals, vars,
                                SM, weights_path, name, mode, greyscale, #CS,
                                dropout, DO_AUG, DO_STANDARDIZE,counter)#scale,
       else:
           predict_test_train_siso_simo(x_train,vals, x_test, test_vals, vars,
                            SM, weights_path, name, mode, greyscale,# CS,
                            dropout,DO_AUG, DO_STANDARDIZE,counter)# scale
       K.clear_session()

   ##===================================
   ## move model files and plots to the results folder
   tidy(name, res_folder)