Saliency-Ranking / SaRa /pySaliencyMap.py
mbar0075's picture
Testing Commit
c9baa67
#-------------------------------------------------------------------------------
# Name: pySaliencyMap
# Purpose: Extracting a saliency map from a single still image
#
# Author: Akisato Kimura <akisato@ieee.org>
#
# Created: April 24, 2014
# Copyright: (c) Akisato Kimura 2014-
# Licence: All rights reserved
#-------------------------------------------------------------------------------
import cv2
import numpy as np
import SaRa.pySaliencyMapDefs as pySaliencyMapDefs
import time
class pySaliencyMap:
# initialization
def __init__(self, width, height):
self.width = width
self.height = height
self.prev_frame = None
self.SM = None
self.GaborKernel0 = np.array(pySaliencyMapDefs.GaborKernel_0)
self.GaborKernel45 = np.array(pySaliencyMapDefs.GaborKernel_45)
self.GaborKernel90 = np.array(pySaliencyMapDefs.GaborKernel_90)
self.GaborKernel135 = np.array(pySaliencyMapDefs.GaborKernel_135)
# extracting color channels
def SMExtractRGBI(self, inputImage):
# convert scale of array elements
src = np.float32(inputImage) * 1./255
# split
(B, G, R) = cv2.split(src)
# extract an intensity image
I = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
# return
return R, G, B, I
# feature maps
## constructing a Gaussian pyramid
def FMCreateGaussianPyr(self, src):
dst = list()
dst.append(src)
for i in range(1,9):
nowdst = cv2.pyrDown(dst[i-1])
dst.append(nowdst)
return dst
## taking center-surround differences
def FMCenterSurroundDiff(self, GaussianMaps):
dst = list()
for s in range(2,5):
now_size = GaussianMaps[s].shape
now_size = (now_size[1], now_size[0]) ## (width, height)
tmp = cv2.resize(GaussianMaps[s+3], now_size, interpolation=cv2.INTER_LINEAR)
nowdst = cv2.absdiff(GaussianMaps[s], tmp)
dst.append(nowdst)
tmp = cv2.resize(GaussianMaps[s+4], now_size, interpolation=cv2.INTER_LINEAR)
nowdst = cv2.absdiff(GaussianMaps[s], tmp)
dst.append(nowdst)
return dst
## constructing a Gaussian pyramid + taking center-surround differences
def FMGaussianPyrCSD(self, src):
GaussianMaps = self.FMCreateGaussianPyr(src)
dst = self.FMCenterSurroundDiff(GaussianMaps)
return dst
## intensity feature maps
def IFMGetFM(self, I):
return self.FMGaussianPyrCSD(I)
## color feature maps
def CFMGetFM(self, R, G, B):
# max(R,G,B)
tmp1 = cv2.max(R, G)
RGBMax = cv2.max(B, tmp1)
RGBMax[RGBMax <= 0] = 0.0001 # prevent dividing by 0
# min(R,G)
RGMin = cv2.min(R, G)
# RG = (R-G)/max(R,G,B)
RG = (R - G) / RGBMax
# BY = (B-min(R,G)/max(R,G,B)
BY = (B - RGMin) / RGBMax
# clamp nagative values to 0
RG[RG < 0] = 0
BY[BY < 0] = 0
# obtain feature maps in the same way as intensity
RGFM = self.FMGaussianPyrCSD(RG)
BYFM = self.FMGaussianPyrCSD(BY)
# return
return RGFM, BYFM
## orientation feature maps
def OFMGetFM(self, src):
# creating a Gaussian pyramid
GaussianI = self.FMCreateGaussianPyr(src)
# convoluting a Gabor filter with an intensity image to extract oriemtation features
GaborOutput0 = [ np.empty((1,1)), np.empty((1,1)) ] # dummy data: any kinds of np.array()s are OK
GaborOutput45 = [ np.empty((1,1)), np.empty((1,1)) ]
GaborOutput90 = [ np.empty((1,1)), np.empty((1,1)) ]
GaborOutput135 = [ np.empty((1,1)), np.empty((1,1)) ]
for j in range(2,9):
GaborOutput0.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel0) )
GaborOutput45.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel45) )
GaborOutput90.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel90) )
GaborOutput135.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel135) )
# calculating center-surround differences for every oriantation
CSD0 = self.FMCenterSurroundDiff(GaborOutput0)
CSD45 = self.FMCenterSurroundDiff(GaborOutput45)
CSD90 = self.FMCenterSurroundDiff(GaborOutput90)
CSD135 = self.FMCenterSurroundDiff(GaborOutput135)
# concatenate
dst = list(CSD0)
dst.extend(CSD45)
dst.extend(CSD90)
dst.extend(CSD135)
# return
return dst
## motion feature maps
def MFMGetFM(self, src):
# convert scale
I8U = np.uint8(255 * src)
# cv2.waitKey(10)
# calculating optical flows
if self.prev_frame is not None:
farne_pyr_scale= pySaliencyMapDefs.farne_pyr_scale
farne_levels = pySaliencyMapDefs.farne_levels
farne_winsize = pySaliencyMapDefs.farne_winsize
farne_iterations = pySaliencyMapDefs.farne_iterations
farne_poly_n = pySaliencyMapDefs.farne_poly_n
farne_poly_sigma = pySaliencyMapDefs.farne_poly_sigma
farne_flags = pySaliencyMapDefs.farne_flags
flow = cv2.calcOpticalFlowFarneback(\
prev = self.prev_frame, \
next = I8U, \
pyr_scale = farne_pyr_scale, \
levels = farne_levels, \
winsize = farne_winsize, \
iterations = farne_iterations, \
poly_n = farne_poly_n, \
poly_sigma = farne_poly_sigma, \
flags = farne_flags, \
flow = None \
)
flowx = flow[...,0]
flowy = flow[...,1]
else:
flowx = np.zeros(I8U.shape)
flowy = np.zeros(I8U.shape)
# create Gaussian pyramids
dst_x = self.FMGaussianPyrCSD(flowx)
dst_y = self.FMGaussianPyrCSD(flowy)
# update the current frame
self.prev_frame = np.uint8(I8U)
# return
return dst_x, dst_y
# conspicuity maps
## standard range normalization
def SMRangeNormalize(self, src):
minn, maxx, dummy1, dummy2 = cv2.minMaxLoc(src)
if maxx!=minn:
dst = src/(maxx-minn) + minn/(minn-maxx)
else:
dst = src - minn
return dst
## computing an average of local maxima
def SMAvgLocalMax(self, src):
# size
stepsize = pySaliencyMapDefs.default_step_local
width = src.shape[1]
height = src.shape[0]
# find local maxima
numlocal = 0
lmaxmean = 0
for y in range(0, height-stepsize, stepsize):
for x in range(0, width-stepsize, stepsize):
localimg = src[y:y+stepsize, x:x+stepsize]
lmin, lmax, dummy1, dummy2 = cv2.minMaxLoc(localimg)
lmaxmean += lmax
numlocal += 1
# averaging over all the local regions (error checking for numlocal)
if numlocal==0:
return 0
else:
return lmaxmean / numlocal
## normalization specific for the saliency map model
def SMNormalization(self, src):
dst = self.SMRangeNormalize(src)
lmaxmean = self.SMAvgLocalMax(dst)
normcoeff = (1-lmaxmean)*(1-lmaxmean)
return dst * normcoeff
## normalizing feature maps
def normalizeFeatureMaps(self, FM):
NFM = list()
for i in range(0,6):
normalizedImage = self.SMNormalization(FM[i])
nownfm = cv2.resize(normalizedImage, (self.width, self.height), interpolation=cv2.INTER_LINEAR)
NFM.append(nownfm)
return NFM
## intensity conspicuity map
def ICMGetCM(self, IFM):
NIFM = self.normalizeFeatureMaps(IFM)
ICM = sum(NIFM)
return ICM
## color conspicuity map
def CCMGetCM(self, CFM_RG, CFM_BY):
# extracting a conspicuity map for every color opponent pair
CCM_RG = self.ICMGetCM(CFM_RG)
CCM_BY = self.ICMGetCM(CFM_BY)
# merge
CCM = CCM_RG + CCM_BY
# return
return CCM
## orientation conspicuity map
def OCMGetCM(self, OFM):
OCM = np.zeros((self.height, self.width))
for i in range (0,4):
# slicing
nowofm = OFM[i*6:(i+1)*6] # angle = i*45
# extracting a conspicuity map for every angle
NOFM = self.ICMGetCM(nowofm)
# normalize
NOFM2 = self.SMNormalization(NOFM)
# accumulate
OCM += NOFM2
return OCM
## motion conspicuity map
def MCMGetCM(self, MFM_X, MFM_Y):
return self.CCMGetCM(MFM_X, MFM_Y)
# core
def SMGetSM(self, src):
# definitions
size = src.shape
width = size[1]
height = size[0]
# check
# if(width != self.width or height != self.height):
# sys.exit("size mismatch")
# extracting individual color channels
R, G, B, I = self.SMExtractRGBI(src)
# extracting feature maps
IFM = self.IFMGetFM(I)
CFM_RG, CFM_BY = self.CFMGetFM(R, G, B)
OFM = self.OFMGetFM(I)
MFM_X, MFM_Y = self.MFMGetFM(I)
# extracting conspicuity maps
ICM = self.ICMGetCM(IFM)
CCM = self.CCMGetCM(CFM_RG, CFM_BY)
OCM = self.OCMGetCM(OFM)
MCM = self.MCMGetCM(MFM_X, MFM_Y)
# adding all the conspicuity maps to form a saliency map
wi = pySaliencyMapDefs.weight_intensity
wc = pySaliencyMapDefs.weight_color
wo = pySaliencyMapDefs.weight_orientation
wm = pySaliencyMapDefs.weight_motion
SMMat = wi*ICM + wc*CCM + wo*OCM + wm*MCM
# normalize
normalizedSM = self.SMRangeNormalize(SMMat)
normalizedSM2 = normalizedSM.astype(np.float32)
smoothedSM = cv2.bilateralFilter(normalizedSM2, 7, 3, 1.55)
self.SM = cv2.resize(smoothedSM, (width,height), interpolation=cv2.INTER_NEAREST)
# return
return self.SM
def SMGetBinarizedSM(self, src):
# get a saliency map
if self.SM is None:
self.SM = self.SMGetSM(src)
# convert scale
SM_I8U = np.uint8(255 * self.SM)
# binarize
thresh, binarized_SM = cv2.threshold(SM_I8U, thresh=0, maxval=255, type=cv2.THRESH_BINARY+cv2.THRESH_OTSU)
return binarized_SM
def SMGetSalientRegion(self, src):
# get a binarized saliency map
binarized_SM = self.SMGetBinarizedSM(src)
# GrabCut
img = src.copy()
mask = np.where((binarized_SM!=0), cv2.GC_PR_FGD, cv2.GC_PR_BGD).astype('uint8')
bgdmodel = np.zeros((1,65),np.float64)
fgdmodel = np.zeros((1,65),np.float64)
rect = (0,0,1,1) # dummy
iterCount = 1
cv2.grabCut(img, mask=mask, rect=rect, bgdModel=bgdmodel, fgdModel=fgdmodel, iterCount=iterCount, mode=cv2.GC_INIT_WITH_MASK)
# post-processing
mask_out = np.where((mask==cv2.GC_FGD) + (mask==cv2.GC_PR_FGD), 255, 0).astype('uint8')
output = cv2.bitwise_and(img,img,mask=mask_out)
return output