#------------------------------------------------------------------------------- # Name: pySaliencyMap # Purpose: Extracting a saliency map from a single still image # # Author: Akisato Kimura # # Created: April 24, 2014 # Copyright: (c) Akisato Kimura 2014- # Licence: All rights reserved #------------------------------------------------------------------------------- import cv2 import numpy as np import SaRa.pySaliencyMapDefs as pySaliencyMapDefs import time class pySaliencyMap: # initialization def __init__(self, width, height): self.width = width self.height = height self.prev_frame = None self.SM = None self.GaborKernel0 = np.array(pySaliencyMapDefs.GaborKernel_0) self.GaborKernel45 = np.array(pySaliencyMapDefs.GaborKernel_45) self.GaborKernel90 = np.array(pySaliencyMapDefs.GaborKernel_90) self.GaborKernel135 = np.array(pySaliencyMapDefs.GaborKernel_135) # extracting color channels def SMExtractRGBI(self, inputImage): # convert scale of array elements src = np.float32(inputImage) * 1./255 # split (B, G, R) = cv2.split(src) # extract an intensity image I = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY) # return return R, G, B, I # feature maps ## constructing a Gaussian pyramid def FMCreateGaussianPyr(self, src): dst = list() dst.append(src) for i in range(1,9): nowdst = cv2.pyrDown(dst[i-1]) dst.append(nowdst) return dst ## taking center-surround differences def FMCenterSurroundDiff(self, GaussianMaps): dst = list() for s in range(2,5): now_size = GaussianMaps[s].shape now_size = (now_size[1], now_size[0]) ## (width, height) tmp = cv2.resize(GaussianMaps[s+3], now_size, interpolation=cv2.INTER_LINEAR) nowdst = cv2.absdiff(GaussianMaps[s], tmp) dst.append(nowdst) tmp = cv2.resize(GaussianMaps[s+4], now_size, interpolation=cv2.INTER_LINEAR) nowdst = cv2.absdiff(GaussianMaps[s], tmp) dst.append(nowdst) return dst ## constructing a Gaussian pyramid + taking center-surround differences def FMGaussianPyrCSD(self, src): GaussianMaps = self.FMCreateGaussianPyr(src) dst = self.FMCenterSurroundDiff(GaussianMaps) return dst ## intensity feature maps def IFMGetFM(self, I): return self.FMGaussianPyrCSD(I) ## color feature maps def CFMGetFM(self, R, G, B): # max(R,G,B) tmp1 = cv2.max(R, G) RGBMax = cv2.max(B, tmp1) RGBMax[RGBMax <= 0] = 0.0001 # prevent dividing by 0 # min(R,G) RGMin = cv2.min(R, G) # RG = (R-G)/max(R,G,B) RG = (R - G) / RGBMax # BY = (B-min(R,G)/max(R,G,B) BY = (B - RGMin) / RGBMax # clamp nagative values to 0 RG[RG < 0] = 0 BY[BY < 0] = 0 # obtain feature maps in the same way as intensity RGFM = self.FMGaussianPyrCSD(RG) BYFM = self.FMGaussianPyrCSD(BY) # return return RGFM, BYFM ## orientation feature maps def OFMGetFM(self, src): # creating a Gaussian pyramid GaussianI = self.FMCreateGaussianPyr(src) # convoluting a Gabor filter with an intensity image to extract oriemtation features GaborOutput0 = [ np.empty((1,1)), np.empty((1,1)) ] # dummy data: any kinds of np.array()s are OK GaborOutput45 = [ np.empty((1,1)), np.empty((1,1)) ] GaborOutput90 = [ np.empty((1,1)), np.empty((1,1)) ] GaborOutput135 = [ np.empty((1,1)), np.empty((1,1)) ] for j in range(2,9): GaborOutput0.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel0) ) GaborOutput45.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel45) ) GaborOutput90.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel90) ) GaborOutput135.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel135) ) # calculating center-surround differences for every oriantation CSD0 = self.FMCenterSurroundDiff(GaborOutput0) CSD45 = self.FMCenterSurroundDiff(GaborOutput45) CSD90 = self.FMCenterSurroundDiff(GaborOutput90) CSD135 = self.FMCenterSurroundDiff(GaborOutput135) # concatenate dst = list(CSD0) dst.extend(CSD45) dst.extend(CSD90) dst.extend(CSD135) # return return dst ## motion feature maps def MFMGetFM(self, src): # convert scale I8U = np.uint8(255 * src) # cv2.waitKey(10) # calculating optical flows if self.prev_frame is not None: farne_pyr_scale= pySaliencyMapDefs.farne_pyr_scale farne_levels = pySaliencyMapDefs.farne_levels farne_winsize = pySaliencyMapDefs.farne_winsize farne_iterations = pySaliencyMapDefs.farne_iterations farne_poly_n = pySaliencyMapDefs.farne_poly_n farne_poly_sigma = pySaliencyMapDefs.farne_poly_sigma farne_flags = pySaliencyMapDefs.farne_flags flow = cv2.calcOpticalFlowFarneback(\ prev = self.prev_frame, \ next = I8U, \ pyr_scale = farne_pyr_scale, \ levels = farne_levels, \ winsize = farne_winsize, \ iterations = farne_iterations, \ poly_n = farne_poly_n, \ poly_sigma = farne_poly_sigma, \ flags = farne_flags, \ flow = None \ ) flowx = flow[...,0] flowy = flow[...,1] else: flowx = np.zeros(I8U.shape) flowy = np.zeros(I8U.shape) # create Gaussian pyramids dst_x = self.FMGaussianPyrCSD(flowx) dst_y = self.FMGaussianPyrCSD(flowy) # update the current frame self.prev_frame = np.uint8(I8U) # return return dst_x, dst_y # conspicuity maps ## standard range normalization def SMRangeNormalize(self, src): minn, maxx, dummy1, dummy2 = cv2.minMaxLoc(src) if maxx!=minn: dst = src/(maxx-minn) + minn/(minn-maxx) else: dst = src - minn return dst ## computing an average of local maxima def SMAvgLocalMax(self, src): # size stepsize = pySaliencyMapDefs.default_step_local width = src.shape[1] height = src.shape[0] # find local maxima numlocal = 0 lmaxmean = 0 for y in range(0, height-stepsize, stepsize): for x in range(0, width-stepsize, stepsize): localimg = src[y:y+stepsize, x:x+stepsize] lmin, lmax, dummy1, dummy2 = cv2.minMaxLoc(localimg) lmaxmean += lmax numlocal += 1 # averaging over all the local regions (error checking for numlocal) if numlocal==0: return 0 else: return lmaxmean / numlocal ## normalization specific for the saliency map model def SMNormalization(self, src): dst = self.SMRangeNormalize(src) lmaxmean = self.SMAvgLocalMax(dst) normcoeff = (1-lmaxmean)*(1-lmaxmean) return dst * normcoeff ## normalizing feature maps def normalizeFeatureMaps(self, FM): NFM = list() for i in range(0,6): normalizedImage = self.SMNormalization(FM[i]) nownfm = cv2.resize(normalizedImage, (self.width, self.height), interpolation=cv2.INTER_LINEAR) NFM.append(nownfm) return NFM ## intensity conspicuity map def ICMGetCM(self, IFM): NIFM = self.normalizeFeatureMaps(IFM) ICM = sum(NIFM) return ICM ## color conspicuity map def CCMGetCM(self, CFM_RG, CFM_BY): # extracting a conspicuity map for every color opponent pair CCM_RG = self.ICMGetCM(CFM_RG) CCM_BY = self.ICMGetCM(CFM_BY) # merge CCM = CCM_RG + CCM_BY # return return CCM ## orientation conspicuity map def OCMGetCM(self, OFM): OCM = np.zeros((self.height, self.width)) for i in range (0,4): # slicing nowofm = OFM[i*6:(i+1)*6] # angle = i*45 # extracting a conspicuity map for every angle NOFM = self.ICMGetCM(nowofm) # normalize NOFM2 = self.SMNormalization(NOFM) # accumulate OCM += NOFM2 return OCM ## motion conspicuity map def MCMGetCM(self, MFM_X, MFM_Y): return self.CCMGetCM(MFM_X, MFM_Y) # core def SMGetSM(self, src): # definitions size = src.shape width = size[1] height = size[0] # check # if(width != self.width or height != self.height): # sys.exit("size mismatch") # extracting individual color channels R, G, B, I = self.SMExtractRGBI(src) # extracting feature maps IFM = self.IFMGetFM(I) CFM_RG, CFM_BY = self.CFMGetFM(R, G, B) OFM = self.OFMGetFM(I) MFM_X, MFM_Y = self.MFMGetFM(I) # extracting conspicuity maps ICM = self.ICMGetCM(IFM) CCM = self.CCMGetCM(CFM_RG, CFM_BY) OCM = self.OCMGetCM(OFM) MCM = self.MCMGetCM(MFM_X, MFM_Y) # adding all the conspicuity maps to form a saliency map wi = pySaliencyMapDefs.weight_intensity wc = pySaliencyMapDefs.weight_color wo = pySaliencyMapDefs.weight_orientation wm = pySaliencyMapDefs.weight_motion SMMat = wi*ICM + wc*CCM + wo*OCM + wm*MCM # normalize normalizedSM = self.SMRangeNormalize(SMMat) normalizedSM2 = normalizedSM.astype(np.float32) smoothedSM = cv2.bilateralFilter(normalizedSM2, 7, 3, 1.55) self.SM = cv2.resize(smoothedSM, (width,height), interpolation=cv2.INTER_NEAREST) # return return self.SM def SMGetBinarizedSM(self, src): # get a saliency map if self.SM is None: self.SM = self.SMGetSM(src) # convert scale SM_I8U = np.uint8(255 * self.SM) # binarize thresh, binarized_SM = cv2.threshold(SM_I8U, thresh=0, maxval=255, type=cv2.THRESH_BINARY+cv2.THRESH_OTSU) return binarized_SM def SMGetSalientRegion(self, src): # get a binarized saliency map binarized_SM = self.SMGetBinarizedSM(src) # GrabCut img = src.copy() mask = np.where((binarized_SM!=0), cv2.GC_PR_FGD, cv2.GC_PR_BGD).astype('uint8') bgdmodel = np.zeros((1,65),np.float64) fgdmodel = np.zeros((1,65),np.float64) rect = (0,0,1,1) # dummy iterCount = 1 cv2.grabCut(img, mask=mask, rect=rect, bgdModel=bgdmodel, fgdModel=fgdmodel, iterCount=iterCount, mode=cv2.GC_INIT_WITH_MASK) # post-processing mask_out = np.where((mask==cv2.GC_FGD) + (mask==cv2.GC_PR_FGD), 255, 0).astype('uint8') output = cv2.bitwise_and(img,img,mask=mask_out) return output