Spaces:

mbar0075
/

Saliency-Ranking

Running

App Files Files Community

Saliency-Ranking / SaRa /pySaliencyMap.py

mbar0075

Testing Commit

c9baa67 4 months ago

raw

history blame

11.3 kB

	#-------------------------------------------------------------------------------
	# Name: pySaliencyMap
	# Purpose: Extracting a saliency map from a single still image
	#
	# Author: Akisato Kimura <akisato@ieee.org>
	#
	# Created: April 24, 2014
	# Copyright: (c) Akisato Kimura 2014-
	# Licence: All rights reserved
	#-------------------------------------------------------------------------------

	import cv2
	import numpy as np
	import SaRa.pySaliencyMapDefs as pySaliencyMapDefs
	import time

	class pySaliencyMap:
	# initialization
	def __init__(self, width, height):
	self.width = width
	self.height = height
	self.prev_frame = None
	self.SM = None
	self.GaborKernel0 = np.array(pySaliencyMapDefs.GaborKernel_0)
	self.GaborKernel45 = np.array(pySaliencyMapDefs.GaborKernel_45)
	self.GaborKernel90 = np.array(pySaliencyMapDefs.GaborKernel_90)
	self.GaborKernel135 = np.array(pySaliencyMapDefs.GaborKernel_135)

	# extracting color channels
	def SMExtractRGBI(self, inputImage):
	# convert scale of array elements
	src = np.float32(inputImage) * 1./255
	# split
	(B, G, R) = cv2.split(src)
	# extract an intensity image
	I = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
	# return
	return R, G, B, I

	# feature maps
	## constructing a Gaussian pyramid
	def FMCreateGaussianPyr(self, src):
	dst = list()
	dst.append(src)
	for i in range(1,9):
	nowdst = cv2.pyrDown(dst[i-1])
	dst.append(nowdst)
	return dst
	## taking center-surround differences
	def FMCenterSurroundDiff(self, GaussianMaps):
	dst = list()
	for s in range(2,5):
	now_size = GaussianMaps[s].shape
	now_size = (now_size[1], now_size[0]) ## (width, height)
	tmp = cv2.resize(GaussianMaps[s+3], now_size, interpolation=cv2.INTER_LINEAR)
	nowdst = cv2.absdiff(GaussianMaps[s], tmp)
	dst.append(nowdst)
	tmp = cv2.resize(GaussianMaps[s+4], now_size, interpolation=cv2.INTER_LINEAR)
	nowdst = cv2.absdiff(GaussianMaps[s], tmp)
	dst.append(nowdst)
	return dst
	## constructing a Gaussian pyramid + taking center-surround differences
	def FMGaussianPyrCSD(self, src):
	GaussianMaps = self.FMCreateGaussianPyr(src)
	dst = self.FMCenterSurroundDiff(GaussianMaps)
	return dst
	## intensity feature maps
	def IFMGetFM(self, I):
	return self.FMGaussianPyrCSD(I)
	## color feature maps
	def CFMGetFM(self, R, G, B):
	# max(R,G,B)
	tmp1 = cv2.max(R, G)
	RGBMax = cv2.max(B, tmp1)
	RGBMax[RGBMax <= 0] = 0.0001 # prevent dividing by 0
	# min(R,G)
	RGMin = cv2.min(R, G)
	# RG = (R-G)/max(R,G,B)
	RG = (R - G) / RGBMax
	# BY = (B-min(R,G)/max(R,G,B)
	BY = (B - RGMin) / RGBMax
	# clamp nagative values to 0
	RG[RG < 0] = 0
	BY[BY < 0] = 0
	# obtain feature maps in the same way as intensity
	RGFM = self.FMGaussianPyrCSD(RG)
	BYFM = self.FMGaussianPyrCSD(BY)
	# return
	return RGFM, BYFM
	## orientation feature maps
	def OFMGetFM(self, src):
	# creating a Gaussian pyramid
	GaussianI = self.FMCreateGaussianPyr(src)
	# convoluting a Gabor filter with an intensity image to extract oriemtation features
	GaborOutput0 = [ np.empty((1,1)), np.empty((1,1)) ] # dummy data: any kinds of np.array()s are OK
	GaborOutput45 = [ np.empty((1,1)), np.empty((1,1)) ]
	GaborOutput90 = [ np.empty((1,1)), np.empty((1,1)) ]
	GaborOutput135 = [ np.empty((1,1)), np.empty((1,1)) ]
	for j in range(2,9):
	GaborOutput0.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel0) )
	GaborOutput45.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel45) )
	GaborOutput90.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel90) )
	GaborOutput135.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel135) )
	# calculating center-surround differences for every oriantation
	CSD0 = self.FMCenterSurroundDiff(GaborOutput0)
	CSD45 = self.FMCenterSurroundDiff(GaborOutput45)
	CSD90 = self.FMCenterSurroundDiff(GaborOutput90)
	CSD135 = self.FMCenterSurroundDiff(GaborOutput135)
	# concatenate
	dst = list(CSD0)
	dst.extend(CSD45)
	dst.extend(CSD90)
	dst.extend(CSD135)
	# return
	return dst
	## motion feature maps
	def MFMGetFM(self, src):
	# convert scale
	I8U = np.uint8(255 * src)
	# cv2.waitKey(10)
	# calculating optical flows
	if self.prev_frame is not None:
	farne_pyr_scale= pySaliencyMapDefs.farne_pyr_scale
	farne_levels = pySaliencyMapDefs.farne_levels
	farne_winsize = pySaliencyMapDefs.farne_winsize
	farne_iterations = pySaliencyMapDefs.farne_iterations
	farne_poly_n = pySaliencyMapDefs.farne_poly_n
	farne_poly_sigma = pySaliencyMapDefs.farne_poly_sigma
	farne_flags = pySaliencyMapDefs.farne_flags
	flow = cv2.calcOpticalFlowFarneback(\
	prev = self.prev_frame, \
	next = I8U, \
	pyr_scale = farne_pyr_scale, \
	levels = farne_levels, \
	winsize = farne_winsize, \
	iterations = farne_iterations, \
	poly_n = farne_poly_n, \
	poly_sigma = farne_poly_sigma, \
	flags = farne_flags, \
	flow = None \
	)
	flowx = flow[...,0]
	flowy = flow[...,1]
	else:
	flowx = np.zeros(I8U.shape)
	flowy = np.zeros(I8U.shape)
	# create Gaussian pyramids
	dst_x = self.FMGaussianPyrCSD(flowx)
	dst_y = self.FMGaussianPyrCSD(flowy)
	# update the current frame
	self.prev_frame = np.uint8(I8U)
	# return
	return dst_x, dst_y

	# conspicuity maps
	## standard range normalization
	def SMRangeNormalize(self, src):
	minn, maxx, dummy1, dummy2 = cv2.minMaxLoc(src)
	if maxx!=minn:
	dst = src/(maxx-minn) + minn/(minn-maxx)
	else:
	dst = src - minn
	return dst
	## computing an average of local maxima
	def SMAvgLocalMax(self, src):
	# size
	stepsize = pySaliencyMapDefs.default_step_local
	width = src.shape[1]
	height = src.shape[0]
	# find local maxima
	numlocal = 0
	lmaxmean = 0
	for y in range(0, height-stepsize, stepsize):
	for x in range(0, width-stepsize, stepsize):
	localimg = src[y:y+stepsize, x:x+stepsize]
	lmin, lmax, dummy1, dummy2 = cv2.minMaxLoc(localimg)
	lmaxmean += lmax
	numlocal += 1
	# averaging over all the local regions (error checking for numlocal)
	if numlocal==0:
	return 0
	else:
	return lmaxmean / numlocal
	## normalization specific for the saliency map model
	def SMNormalization(self, src):
	dst = self.SMRangeNormalize(src)
	lmaxmean = self.SMAvgLocalMax(dst)
	normcoeff = (1-lmaxmean)*(1-lmaxmean)
	return dst * normcoeff
	## normalizing feature maps
	def normalizeFeatureMaps(self, FM):
	NFM = list()
	for i in range(0,6):
	normalizedImage = self.SMNormalization(FM[i])
	nownfm = cv2.resize(normalizedImage, (self.width, self.height), interpolation=cv2.INTER_LINEAR)
	NFM.append(nownfm)
	return NFM
	## intensity conspicuity map
	def ICMGetCM(self, IFM):
	NIFM = self.normalizeFeatureMaps(IFM)
	ICM = sum(NIFM)
	return ICM
	## color conspicuity map
	def CCMGetCM(self, CFM_RG, CFM_BY):
	# extracting a conspicuity map for every color opponent pair
	CCM_RG = self.ICMGetCM(CFM_RG)
	CCM_BY = self.ICMGetCM(CFM_BY)
	# merge
	CCM = CCM_RG + CCM_BY
	# return
	return CCM
	## orientation conspicuity map
	def OCMGetCM(self, OFM):
	OCM = np.zeros((self.height, self.width))
	for i in range (0,4):
	# slicing
	nowofm = OFM[i6:(i+1)6] # angle = i*45
	# extracting a conspicuity map for every angle
	NOFM = self.ICMGetCM(nowofm)
	# normalize
	NOFM2 = self.SMNormalization(NOFM)
	# accumulate
	OCM += NOFM2
	return OCM
	## motion conspicuity map
	def MCMGetCM(self, MFM_X, MFM_Y):
	return self.CCMGetCM(MFM_X, MFM_Y)

	# core
	def SMGetSM(self, src):
	# definitions
	size = src.shape
	width = size[1]
	height = size[0]
	# check
	# if(width != self.width or height != self.height):
	# sys.exit("size mismatch")
	# extracting individual color channels
	R, G, B, I = self.SMExtractRGBI(src)
	# extracting feature maps
	IFM = self.IFMGetFM(I)
	CFM_RG, CFM_BY = self.CFMGetFM(R, G, B)
	OFM = self.OFMGetFM(I)
	MFM_X, MFM_Y = self.MFMGetFM(I)
	# extracting conspicuity maps
	ICM = self.ICMGetCM(IFM)
	CCM = self.CCMGetCM(CFM_RG, CFM_BY)
	OCM = self.OCMGetCM(OFM)
	MCM = self.MCMGetCM(MFM_X, MFM_Y)
	# adding all the conspicuity maps to form a saliency map
	wi = pySaliencyMapDefs.weight_intensity
	wc = pySaliencyMapDefs.weight_color
	wo = pySaliencyMapDefs.weight_orientation
	wm = pySaliencyMapDefs.weight_motion
	SMMat = wiICM + wcCCM + woOCM + wmMCM
	# normalize
	normalizedSM = self.SMRangeNormalize(SMMat)
	normalizedSM2 = normalizedSM.astype(np.float32)
	smoothedSM = cv2.bilateralFilter(normalizedSM2, 7, 3, 1.55)
	self.SM = cv2.resize(smoothedSM, (width,height), interpolation=cv2.INTER_NEAREST)
	# return
	return self.SM

	def SMGetBinarizedSM(self, src):
	# get a saliency map
	if self.SM is None:
	self.SM = self.SMGetSM(src)
	# convert scale
	SM_I8U = np.uint8(255 * self.SM)
	# binarize
	thresh, binarized_SM = cv2.threshold(SM_I8U, thresh=0, maxval=255, type=cv2.THRESH_BINARY+cv2.THRESH_OTSU)
	return binarized_SM

	def SMGetSalientRegion(self, src):
	# get a binarized saliency map
	binarized_SM = self.SMGetBinarizedSM(src)
	# GrabCut
	img = src.copy()
	mask = np.where((binarized_SM!=0), cv2.GC_PR_FGD, cv2.GC_PR_BGD).astype('uint8')
	bgdmodel = np.zeros((1,65),np.float64)
	fgdmodel = np.zeros((1,65),np.float64)
	rect = (0,0,1,1) # dummy
	iterCount = 1
	cv2.grabCut(img, mask=mask, rect=rect, bgdModel=bgdmodel, fgdModel=fgdmodel, iterCount=iterCount, mode=cv2.GC_INIT_WITH_MASK)
	# post-processing
	mask_out = np.where((mask==cv2.GC_FGD) + (mask==cv2.GC_PR_FGD), 255, 0).astype('uint8')
	output = cv2.bitwise_and(img,img,mask=mask_out)
	return output