Spaces:

mbar0075
/

Saliency-Ranking

Sleeping

App Files Files Community

Saliency-Ranking / SaRa /saraRC1.py

mbar0075

Testing Commit

c9baa67 5 months ago

raw

history blame

35.4 kB

	import cv2
	import numpy as np
	import math
	import scipy.stats as st
	from mpl_toolkits.mplot3d import Axes3D
	from matplotlib.lines import Line2D
	import matplotlib.pyplot as plt
	import operator
	import time
	import os
	from enum import Enum
	import pandas as pd

	# Akisato Kimura <akisato@ieee.org> implementation of Itti's Saliency Map Generator -- https://github.com/akisatok/pySaliencyMap
	from SaRa.pySaliencyMap import pySaliencyMap


	# Global Variables

	# Entropy, sum, depth, centre-bias
	WEIGHTS = (1, 1, 1, 1)

	# segments_entropies = []
	segments_scores = []
	segments_coords = []

	seg_dim = 0
	segments = []
	gt_segments = []
	dws = []
	sara_list = []

	eval_list = []
	labels_eval_list = ['Image', 'Index', 'Rank', 'Quartile', 'isGT', 'Outcome']

	outcome_list = []
	labels_outcome_list = ['Image', 'FN', 'FP', 'TN', 'TP']

	dataframe_collection = {}
	error_count = 0


	# SaRa Initial Functions
	def generate_segments(img, seg_count) -> list:
	'''
	Given an image img and the desired number of segments seg_count, this
	function divides the image into segments and returns a list of segments.
	'''

	segments = []
	segment_count = seg_count
	index = 0

	w_interval = int(img.shape[1] / segment_count)
	h_interval = int(img.shape[0] / segment_count)

	for i in range(segment_count):
	for j in range(segment_count):
	temp_segment = img[int(h_interval * i):int(h_interval * (i + 1)),
	int(w_interval * j):int(w_interval * (j + 1))]
	segments.append(temp_segment)

	coord_tup = (index, int(w_interval * j), int(h_interval * i),
	int(w_interval * (j + 1)), int(h_interval * (i + 1)))
	segments_coords.append(coord_tup)

	index += 1

	return segments


	def return_saliency(img, generator='itti', deepgaze_model=None, emlnet_models=None, DEVICE='cpu'):
	'''
	Takes an image img as input and calculates the saliency map using the
	Itti's Saliency Map Generator. It returns the saliency map.
	'''

	img_width, img_height = img.shape[1], img.shape[0]

	if generator == 'itti':

	sm = pySaliencyMap(img_width, img_height)
	saliency_map = sm.SMGetSM(img)

	# Scale pixel values to 0-255 instead of float (approx 0, hence black image)
	# https://stackoverflow.com/questions/48331211/how-to-use-cv2-imshow-correctly-for-the-float-image-returned-by-cv2-distancet/48333272
	saliency_map = cv2.normalize(saliency_map, None, 255, 0, cv2.NORM_MINMAX, cv2.CV_8UC1)
	elif generator == 'deepgaze':
	import numpy as np
	from scipy.misc import face
	from scipy.ndimage import zoom
	from scipy.special import logsumexp
	import torch

	import deepgaze_pytorch

	# you can use DeepGazeI or DeepGazeIIE
	# model = deepgaze_pytorch.DeepGazeIIE(pretrained=True).to(DEVICE)

	if deepgaze_model is None:
	model = deepgaze_pytorch.DeepGazeIIE(pretrained=True).to(DEVICE)
	else:
	model = deepgaze_model

	# image = face()
	image = img

	# load precomputed centerbias log density (from MIT1003) over a 1024x1024 image
	# you can download the centerbias from https://github.com/matthias-k/DeepGaze/releases/download/v1.0.0/centerbias_mit1003.npy
	# alternatively, you can use a uniform centerbias via `centerbias_template = np.zeros((1024, 1024))`.
	# centerbias_template = np.load('centerbias_mit1003.npy')
	centerbias_template = np.zeros((1024, 1024))
	# rescale to match image size
	centerbias = zoom(centerbias_template, (image.shape[0]/centerbias_template.shape[0], image.shape[1]/centerbias_template.shape[1]), order=0, mode='nearest')
	# renormalize log density
	centerbias -= logsumexp(centerbias)

	image_tensor = torch.tensor([image.transpose(2, 0, 1)]).to(DEVICE)
	centerbias_tensor = torch.tensor([centerbias]).to(DEVICE)

	log_density_prediction = model(image_tensor, centerbias_tensor)

	saliency_map = cv2.resize(log_density_prediction.detach().cpu().numpy()[0, 0], (img_width, img_height))

	elif generator == 'fpn':
	# Add ./fpn to the system path
	import sys
	sys.path.append('./fpn')
	import inference as inf

	results_dict = {}
	rt_args = inf.parse_arguments(img)

	# Call the run_inference function and capture the results
	pred_masks_raw_list, pred_masks_round_list = inf.run_inference(rt_args)

	# Store the results in the dictionary
	results_dict['pred_masks_raw'] = pred_masks_raw_list
	results_dict['pred_masks_round'] = pred_masks_round_list

	saliency_map = results_dict['pred_masks_raw']

	if img_width > img_height:
	saliency_map = cv2.resize(saliency_map, (img_width, img_width))

	diff = (img_width - img_height) // 2

	saliency_map = saliency_map[diff:img_width - diff, 0:img_width]
	else:
	saliency_map = cv2.resize(saliency_map, (img_height, img_height))

	diff = (img_height - img_width) // 2

	saliency_map = saliency_map[0:img_height, diff:img_height - diff]

	elif generator == 'emlnet':
	from emlnet.eval_combined import main as eval_combined
	saliency_map = eval_combined(img, emlnet_models)

	# Resize to image size
	saliency_map = cv2.resize(saliency_map, (img_width, img_height))

	# Normalize saliency map
	saliency_map = cv2.normalize(saliency_map, None, 255, 0, cv2.NORM_MINMAX, cv2.CV_8UC1)

	saliency_map = cv2.GaussianBlur(saliency_map, (31, 31), 10)
	return saliency_map
	saliency_map = saliency_map // 16

	return saliency_map


	def return_saliency_batch(images, generator='deepgaze', deepgaze_model=None, emlnet_models=None, DEVICE='cuda', BATCH_SIZE=1):
	img_widths, img_heights = [], []
	if generator == 'deepgaze':
	import numpy as np
	from scipy.misc import face
	from scipy.ndimage import zoom
	from scipy.special import logsumexp
	import torch

	import deepgaze_pytorch

	# you can use DeepGazeI or DeepGazeIIE
	# model = deepgaze_pytorch.DeepGazeIIE(pretrained=True).to(DEVICE)

	if deepgaze_model is None:
	model = deepgaze_pytorch.DeepGazeIIE(pretrained=True).to(DEVICE)
	else:
	model = deepgaze_model

	# image = face()
	# image = img
	image_batch = torch.tensor([img.transpose(2, 0, 1) for img in images]).to(DEVICE)
	centerbias_template = np.zeros((1024, 1024))
	centerbias_tensors = []

	for img in images:
	centerbias = zoom(centerbias_template, (img.shape[0] / centerbias_template.shape[0], img.shape[1] / centerbias_template.shape[1]), order=0, mode='nearest')
	centerbias -= logsumexp(centerbias)
	centerbias_tensors.append(torch.tensor(centerbias).to(DEVICE))

	# Set img_width and img_height
	img_widths.append(img.shape[1])


	# rescale to match image size
	# centerbias = zoom(centerbias_template, (image.shape[0]/centerbias_template.shape[0], image.shape[1]/centerbias_template.shape[1]), order=0, mode='nearest')
	# # renormalize log density
	# centerbias -= logsumexp(centerbias)

	# image_tensor = torch.tensor([image.transpose(2, 0, 1)]).to(DEVICE)
	# centerbias_tensor = torch.tensor([centerbias]).to(DEVICE)
	with torch.no_grad():
	# Process the batch of images in one forward pass
	log_density_predictions = model(image_batch, torch.stack(centerbias_tensors))

	# log_density_prediction = model(image_tensor, centerbias_tensor)

	# saliency_map = cv2.resize(log_density_prediction.detach().cpu().numpy()[0, 0], (img_width, img_height))

	saliency_maps = []

	for i in range(len(images)):
	saliency_map = cv2.resize(log_density_predictions[i, 0].cpu().numpy(), (img_widths[i], img_widths[i]))

	saliency_map = cv2.normalize(saliency_map, None, 255, 0, cv2.NORM_MINMAX, cv2.CV_8UC1)

	saliency_map = cv2.GaussianBlur(saliency_map, (31, 31), 10)
	saliency_map = saliency_map // 16

	saliency_maps.append(saliency_map)

	return saliency_maps


	# def return_itti_saliency(img):
	# '''
	# Takes an image img as input and calculates the saliency map using the
	# Itti's Saliency Map Generator. It returns the saliency map.
	# '''

	# img_width, img_height = img.shape[1], img.shape[0]

	# sm = pySaliencyMap.pySaliencyMap(img_width, img_height)
	# saliency_map = sm.SMGetSM(img)

	# # Scale pixel values to 0-255 instead of float (approx 0, hence black image)
	# # https://stackoverflow.com/questions/48331211/how-to-use-cv2-imshow-correctly-for-the-float-image-returned-by-cv2-distancet/48333272
	# saliency_map = cv2.normalize(saliency_map, None, 255, 0, cv2.NORM_MINMAX, cv2.CV_8UC1)

	# return saliency_map


	# Saliency Ranking
	def calculate_pixel_frequency(img) -> dict:
	'''
	Calculates the frequency of each pixel value in the image img and
	returns a dictionary containing the pixel frequencies.
	'''

	flt = img.flatten()
	unique, counts = np.unique(flt, return_counts=True)
	pixels_frequency = dict(zip(unique, counts))

	return pixels_frequency


	def calculate_score(H, sum, ds, cb, w):
	'''
	Calculates the saliency score of an image img using the entropy H, depth score ds, centre-bias cb and weights w. It returns the saliency score.
	'''

	# Normalise H
	# H = (H - 0) / (math.log(2, 256) - 0)

	# H = wth root of H
	H = H ** w[0]

	if sum > 0:
	sum = np.log(sum)
	sum = sum ** w[1]

	ds = ds ** w[2]

	cb = (cb + 1) ** w[3]

	return H + sum + ds + cb


	def calculate_entropy(img, w, dw) -> float:
	'''
	Calculates the entropy of an image img using the given weights w and
	depth weights dw. It returns the entropy value.
	'''

	flt = img.flatten()

	# c = flt.shape[0]
	total_pixels = 0
	t_prob = 0
	# sum_of_probs = 0
	entropy = 0
	wt = w * 10

	# if imgD=None then proceed normally
	# else calculate its frequency and find max
	# use this max value as a weight in entropy

	pixels_frequency = calculate_pixel_frequency(flt)

	total_pixels = sum(pixels_frequency.values())

	for px in pixels_frequency:
	t_prob = pixels_frequency[px] / total_pixels

	if t_prob != 0:
	entropy += (t_prob * math.log((1 / t_prob), 2))

	# entropy = entropy * wt * dw

	return entropy


	def find_most_salient_segment(segments, kernel, dws):
	'''
	Finds the most salient segment among the provided segments using a
	given kernel and depth weights. It returns the maximum entropy value
	and the index of the most salient segment.
	'''

	# max_entropy = 0
	max_score = 0
	index = 0
	i = 0

	for segment in segments:
	temp_entropy = calculate_entropy(segment, kernel[i], dws[i])
	# Normalise semgnet bweetn 0 and 255
	segment = cv2.normalize(segment, None, 255, 0, cv2.NORM_MINMAX, cv2.CV_8UC1)
	temp_sum = np.sum(segment)
	# temp_tup = (i, temp_entropy)
	# segments_entropies.append(temp_tup)

	w = WEIGHTS

	temp_score = calculate_score(temp_entropy, temp_sum, dws[i], kernel[i], w)

	temp_tup = (i, temp_score, temp_entropy w[0], temp_sum w[1], (kernel[i] + 1) w[2], dws[i] w[3])

	# segments_scores.append((i, temp_score))
	segments_scores.append(temp_tup)

	# if temp_entropy > max_entropy:
	# max_entropy = temp_entropy
	# index = i

	if temp_score > max_score:
	max_score = temp_score
	index = i

	i += 1

	# return max_entropy, index
	return max_score, index


	def make_gaussian(size, fwhm=10, center=None):
	'''
	Generates a 2D Gaussian kernel with the specified size and full-width-half-maximum (fwhm). It returns the Gaussian kernel.

	size: length of a side of the square
	fwhm: full-width-half-maximum, which can be thought of as an effective
	radius.

	https://gist.github.com/andrewgiessel/4635563
	'''

	x = np.arange(0, size, 1, float)
	y = x[:, np.newaxis]

	if center is None:
	x0 = y0 = size // 2
	else:
	x0 = center[0]
	y0 = center[1]


	return np.exp(-4 * np.log(2) * ((x - x0) 2 + (y - y0) 2) / fwhm ** 2)


	def gen_depth_weights(d_segments, depth_map) -> list:
	'''
	Generates depth weights for the segments based on the depth map. It
	returns a list of depth weights.
	'''

	hist_d, _ = np.histogram(depth_map, 256, [0, 256])

	# Get first non-zero index
	first_nz = next((i for i, x in enumerate(hist_d) if x), None)

	# Get last non-zero index
	rev = (len(hist_d) - idx for idx, item in enumerate(reversed(hist_d), 1) if item)
	last_nz = next(rev, default=None)

	mid = (first_nz + last_nz) / 2

	for seg in d_segments:
	hist, _ = np.histogram(seg, 256, [0, 256])
	dw = 0
	ind = 0
	for s in hist:
	if ind > mid:
	dw = dw + (s * 1)
	ind = ind + 1
	dws.append(dw)

	return dws


	def gen_blank_depth_weight(d_segments):
	'''
	Generates blank depth weights for the segments. It returns a list of
	depth weights.
	'''

	for _ in d_segments:
	dw = 1
	dws.append(dw)
	return dws


	# def generate_heatmap(img, mode, sorted_seg_scores, segments_coords) -> tuple:
	# '''
	# Generates a heatmap overlay on the input image img based on the
	# provided sorted segment scores. The mode parameter determines the color
	# scheme of the heatmap. It returns the image with the heatmap overlay
	# and a list of segment scores.

	# mode: 0 for white grid, 1 for color-coded grid
	# '''

	# font = cv2.FONT_HERSHEY_SIMPLEX
	# # print_index = 0
	# print_index = len(sorted_seg_scores) - 1
	# set_value = int(0.25 * len(sorted_seg_scores))
	# color = (0, 0, 0)

	# max_x = 0
	# max_y = 0

	# overlay = np.zeros_like(img, dtype=np.uint8)
	# text_overlay = np.zeros_like(img, dtype=np.uint8)

	# sara_list_out = []

	# for ent in reversed(sorted_seg_scores):
	# quartile = 0
	# if mode == 0:
	# color = (255, 255, 255)
	# t = 4
	# elif mode == 1:
	# if print_index + 1 <= set_value:
	# color = (0, 0, 255, 255)
	# t = 2
	# quartile = 1
	# elif print_index + 1 <= set_value * 2:
	# color = (0, 128, 255, 192)
	# t = 4
	# quartile = 2
	# elif print_index + 1 <= set_value * 3:
	# color = (0, 255, 255, 128)
	# t = 4
	# t = 6
	# quartile = 3
	# # elif print_index + 1 <= set_value * 4:
	# # color = (0, 250, 0, 64)
	# # t = 8
	# # quartile = 4
	# else:
	# color = (0, 250, 0, 64)
	# t = 8
	# quartile = 4


	# x1 = segments_coords[ent[0]][1]
	# y1 = segments_coords[ent[0]][2]
	# x2 = segments_coords[ent[0]][3]
	# y2 = segments_coords[ent[0]][4]

	# if x2 > max_x:
	# max_x = x2
	# if y2 > max_y:
	# max_y = y2

	# x = int((x1 + x2) / 2)
	# y = int((y1 + y2) / 2)



	# # fill rectangle
	# cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)

	# cv2.rectangle(overlay, (x1, y1), (x2, y2), (0, 0, 0), 1)
	# # put text in the middle of the rectangle

	# # white text
	# cv2.putText(text_overlay, str(print_index), (x - 5, y),
	# font, .4, (255, 255, 255), 1, cv2.LINE_AA)

	# # Index, rank, score, entropy, entropy_sum, centre_bias, depth, quartile
	# sara_tuple = (ent[0], print_index, ent[1], ent[2], ent[3], ent[4], ent[5], quartile)
	# sara_list_out.append(sara_tuple)
	# print_index -= 1

	# # crop the overlay to up to x2 and y2
	# overlay = overlay[0:max_y, 0:max_x]
	# text_overlay = text_overlay[0:max_y, 0:max_x]
	# img = img[0:max_y, 0:max_x]


	# img = cv2.addWeighted(overlay, 0.3, img, 0.7, 0, img)

	# img[text_overlay > 128] = text_overlay[text_overlay > 128]


	# return img, sara_list_out
	def generate_heatmap(img, sorted_seg_scores, segments_coords, mode=1) -> tuple:
	'''
	Generates a more vibrant heatmap overlay on the input image img based on the
	provided sorted segment scores. It returns the image with the heatmap overlay
	and a list of segment scores with quartile information.

	mode: 0 for white grid, 1 for color-coded grid, 2 for heatmap to be used as a feature
	'''
	alpha =0.3
	if mode == 2:

	font = cv2.FONT_HERSHEY_SIMPLEX
	print_index = len(sorted_seg_scores) - 1
	set_value = int(0.25 * len(sorted_seg_scores))

	max_x = 0
	max_y = 0

	overlay = np.zeros_like(img, dtype=np.uint8)
	text_overlay = np.zeros_like(img, dtype=np.uint8)

	sara_list_out = []

	scores = [score[1] for score in sorted_seg_scores]
	min_score = min(scores)
	max_score = max(scores)

	# Choose a colormap from matplotlib
	colormap = plt.get_cmap('jet') # 'jet', 'viridis', 'plasma', 'magma', 'cividis, jet_r, viridis_r, plasma_r, magma_r, cividis_r

	for ent in reversed(sorted_seg_scores):
	score = ent[1]
	normalized_score = (score - min_score) / (max_score - min_score)
	color_weight = normalized_score * score # Weighted color based on the score
	color = np.array(colormap(normalized_score)[:3]) * 255 #* color_weight

	x1 = segments_coords[ent[0]][1]
	y1 = segments_coords[ent[0]][2]
	x2 = segments_coords[ent[0]][3]
	y2 = segments_coords[ent[0]][4]

	if x2 > max_x:
	max_x = x2
	if y2 > max_y:
	max_y = y2

	x = int((x1 + x2) / 2)
	y = int((y1 + y2) / 2)

	# fill rectangle
	cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)
	# black border
	# cv2.rectangle(overlay, (x1, y1), (x2, y2), (0, 0, 0), 1)

	# white text
	# cv2.putText(text_overlay, str(print_index), (x - 5, y),
	# font, .4, (255, 255, 255), 1, cv2.LINE_AA)

	# Determine quartile based on print_index
	if print_index + 1 <= set_value:
	quartile = 1
	elif print_index + 1 <= set_value * 2:
	quartile = 2
	elif print_index + 1 <= set_value * 3:
	quartile = 3
	else:
	quartile = 4

	sara_tuple = (ent[0], print_index, ent[1], ent[2], ent[3], ent[4], ent[5], quartile)
	sara_list_out.append(sara_tuple)
	print_index -= 1

	overlay = overlay[0:max_y, 0:max_x]
	text_overlay = text_overlay[0:max_y, 0:max_x]
	img = img[0:max_y, 0:max_x]

	# Create a blank grayscale image with the same dimensions as the original image
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	gray = cv2.merge([gray, gray, gray])

	gray = cv2.addWeighted(overlay, alpha, gray, 1-alpha, 0, gray)
	gray[text_overlay > 128] = text_overlay[text_overlay > 128]

	return gray, sara_list_out
	else:
	font = cv2.FONT_HERSHEY_SIMPLEX
	# print_index = 0
	print_index = len(sorted_seg_scores) - 1
	set_value = int(0.25 * len(sorted_seg_scores))
	color = (0, 0, 0)

	max_x = 0
	max_y = 0

	overlay = np.zeros_like(img, dtype=np.uint8)
	text_overlay = np.zeros_like(img, dtype=np.uint8)

	sara_list_out = []

	for ent in reversed(sorted_seg_scores):
	quartile = 0
	if mode == 0:
	color = (255, 255, 255)
	t = 4
	elif mode == 1:
	if print_index + 1 <= set_value:
	color = (0, 0, 255, 255)
	t = 2
	quartile = 1
	elif print_index + 1 <= set_value * 2:
	color = (0, 128, 255, 192)
	t = 4
	quartile = 2
	elif print_index + 1 <= set_value * 3:
	color = (0, 255, 255, 128)
	t = 4
	t = 6
	quartile = 3
	# elif print_index + 1 <= set_value * 4:
	# color = (0, 250, 0, 64)
	# t = 8
	# quartile = 4
	else:
	color = (0, 250, 0, 64)
	t = 8
	quartile = 4


	x1 = segments_coords[ent[0]][1]
	y1 = segments_coords[ent[0]][2]
	x2 = segments_coords[ent[0]][3]
	y2 = segments_coords[ent[0]][4]

	if x2 > max_x:
	max_x = x2
	if y2 > max_y:
	max_y = y2

	x = int((x1 + x2) / 2)
	y = int((y1 + y2) / 2)



	# fill rectangle
	cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)

	cv2.rectangle(overlay, (x1, y1), (x2, y2), (0, 0, 0), 1)
	# put text in the middle of the rectangle

	# white text
	cv2.putText(text_overlay, str(print_index), (x - 5, y),
	font, .4, (255, 255, 255), 1, cv2.LINE_AA)

	# Index, rank, score, entropy, entropy_sum, centre_bias, depth, quartile
	sara_tuple = (ent[0], print_index, ent[1], ent[2], ent[3], ent[4], ent[5], quartile)
	sara_list_out.append(sara_tuple)
	print_index -= 1

	# crop the overlay to up to x2 and y2
	overlay = overlay[0:max_y, 0:max_x]
	text_overlay = text_overlay[0:max_y, 0:max_x]
	img = img[0:max_y, 0:max_x]


	img = cv2.addWeighted(overlay, 0.3, img, 0.7, 0, img)

	img[text_overlay > 128] = text_overlay[text_overlay > 128]


	return img, sara_list_out

	def generate_sara(tex, tex_segments, mode=2):
	'''
	Generates the SaRa (Salient Region Annotation) output by calculating
	saliency scores for the segments of the given texture image tex. It
	returns the texture image with the heatmap overlay and a list of
	segment scores.
	'''

	gaussian_kernel_array = make_gaussian(seg_dim)
	gaussian1d = gaussian_kernel_array.ravel()

	dws = gen_blank_depth_weight(tex_segments)

	max_h, index = find_most_salient_segment(tex_segments, gaussian1d, dws)
	# dict_entropies = dict(segments_entropies)
	# segments_scores list with 5 elements, use index as key for dict and store rest as list of index
	dict_scores = {}

	for segment in segments_scores:
	# Index: score, entropy, sum, depth, centre-bias
	dict_scores[segment[0]] = [segment[1], segment[2], segment[3], segment[4], segment[5]]

	# sorted_entropies = sorted(dict_entropies.items(),
	# key=operator.itemgetter(1), reverse=True)


	# sorted_scores = sorted(dict_scores.items(),
	# key=operator.itemgetter(1), reverse=True)

	# Sort by first value in value list
	sorted_scores = sorted(dict_scores.items(), key=lambda x: x[1][0], reverse=True)

	# flatten
	sorted_scores = [[i[0], i[1][0], i[1][1], i[1][2], i[1][3], i[1][4]] for i in sorted_scores]

	# tex_out, sara_list_out = generate_heatmap(
	# tex, 1, sorted_entropies, segments_coords)

	tex_out, sara_list_out = generate_heatmap(
	tex, sorted_scores, segments_coords, mode = mode)

	sara_list_out = list(reversed(sara_list_out))

	return tex_out, sara_list_out


	def return_sara(input_img, grid, generator='itti', saliency_map=None, mode = 2):
	'''
	Computes the SaRa output for the given input image. It uses the
	generate_sara function internally. It returns the SaRa output image and
	a list of segment scores.
	'''

	global seg_dim
	seg_dim = grid

	if saliency_map is None:
	saliency_map = return_saliency(input_img, generator)

	tex_segments = generate_segments(saliency_map, seg_dim)

	# tex_segments = generate_segments(input_img, seg_dim)
	sara_output, sara_list_output = generate_sara(input_img, tex_segments, mode=mode)

	return sara_output, sara_list_output


	def mean_squared_error(image_a, image_b) -> float:
	'''
	Calculates the Mean Squared Error (MSE), i.e. sum of squared
	differences between two images image_a and image_b. It returns the MSE
	value.

	NOTE: The two images must have the same dimension
	'''

	err = np.sum((image_a.astype('float') - image_b.astype('float')) ** 2)
	err /= float(image_a.shape[0] * image_a.shape[1])

	return err


	def reset():
	'''
	Resets all global variables to their default values.
	'''

	# global segments_entropies, segments_scores, segments_coords, seg_dim, segments, gt_segments, dws, sara_list

	global segments_scores, segments_coords, seg_dim, segments, gt_segments, dws, sara_list

	# segments_entropies = []
	segments_scores = []
	segments_coords = []

	seg_dim = 0
	segments = []
	gt_segments = []
	dws = []
	sara_list = []



	def resize_based_on_important_ranks(img, sara_info, grid_size, rate=0.3):
	def generate_segments(image, seg_count) -> dict:
	"""
	Function to generate segments of an image

	Args:
	image: input image
	seg_count: number of segments to generate

	Returns:
	segments: dictionary of segments

	"""
	# Initializing segments dictionary
	segments = {}
	# Initializing segment index and segment count
	segment_count = seg_count
	index = 0

	# Retrieving image width and height
	h, w = image.shape[:2]

	# Calculating width and height intervals for segments from the segment count
	w_interval = w // segment_count
	h_interval = h // segment_count

	# Iterating through the image and generating segments
	for i in range(segment_count):
	for j in range(segment_count):
	# Calculating segment coordinates
	x1, y1 = j * w_interval, i * h_interval
	x2, y2 = x1 + w_interval, y1 + h_interval

	# Adding segment coordinates to segments dictionary
	segments[index] = (x1, y1, x2, y2)

	# Incrementing segment index
	index += 1

	# Returning segments dictionary
	return segments

	# Retrieving important ranks from SaRa
	sara_dict = {
	info[0]: {
	'score': info[2],
	'index': info[1]
	}
	for info in sara_info[1]
	}

	# Sorting important ranks by score
	sorted_sara_dict = sorted(sara_dict.items(), key=lambda item: item[1]['score'], reverse=True)

	# Generating segments
	index_info = generate_segments(img, grid_size)

	# Initializing most important ranks image
	most_imp_ranks = np.zeros_like(img)

	# Calculating maximum rank
	max_rank = int(grid_size * grid_size * rate)
	count = 0

	# Iterating through important ranks and adding them to most important ranks image
	for rank, info in sorted_sara_dict:
	# Checking if rank is within maximum rank
	if count <= max_rank:
	# Retrieving segment coordinates
	coords = index_info[rank]

	# Adding segment to most important ranks image by making it white
	most_imp_ranks[coords[1]:coords[3], coords[0]:coords[2]] = 255

	# Incrementing count
	count += 1
	else:
	break

	# Retrieving coordinates of most important ranks
	coords = np.argwhere(most_imp_ranks == 255)

	# Checking if no important ranks were found and returning original image
	if coords.size == 0:
	return img , most_imp_ranks, [0, 0, img.shape[0], img.shape[1]]

	# Cropping image based on most important ranks
	x0, y0 = coords.min(axis=0)[:2]
	x1, y1 = coords.max(axis=0)[:2] + 1
	cropped_img = img[x0:x1, y0:y1]
	return cropped_img , most_imp_ranks, [x0, y0, x1, y1]

	def sara_resize(img, sara_info, grid_size, rate=0.3, iterations=2):
	"""
	Function to resize an image based on SaRa

	Args:
	img: input image
	sara_info: SaRa information
	grid_size: size of the grid
	rate: rate of important ranks
	iterations: number of iterations to resize

	Returns:
	img: resized image
	"""
	# Iterating through iterations
	for _ in range(iterations):
	# Resizing image based on important ranks
	img, most_imp_ranks, coords = resize_based_on_important_ranks(img, sara_info, grid_size, rate=rate)

	# Returning resized image
	return img, most_imp_ranks, coords

	def plot_3D(img, sara_info, grid_size, rate=0.3):
	def generate_segments(image, seg_count) -> dict:
	"""
	Function to generate segments of an image

	Args:
	image: input image
	seg_count: number of segments to generate

	Returns:
	segments: dictionary of segments

	"""
	# Initializing segments dictionary
	segments = {}
	# Initializing segment index and segment count
	segment_count = seg_count
	index = 0

	# Retrieving image width and height
	h, w = image.shape[:2]

	# Calculating width and height intervals for segments from the segment count
	w_interval = w // segment_count
	h_interval = h // segment_count

	# Iterating through the image and generating segments
	for i in range(segment_count):
	for j in range(segment_count):
	# Calculating segment coordinates
	x1, y1 = j * w_interval, i * h_interval
	x2, y2 = x1 + w_interval, y1 + h_interval

	# Adding segment coordinates to segments dictionary
	segments[index] = (x1, y1, x2, y2)

	# Incrementing segment index
	index += 1

	# Returning segments dictionary
	return segments

	# Extracting heatmap from SaRa information
	heatmap = sara_info[0]
	heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)

	# Retrieving important ranks from SaRa
	sara_dict = {
	info[0]: {
	'score': info[2],
	'index': info[1]
	}
	for info in sara_info[1]
	}

	# Sorting important ranks by score
	sorted_sara_dict = sorted(sara_dict.items(), key=lambda item: item[1]['score'], reverse=True)

	# Generating segments
	index_info = generate_segments(img, grid_size)

	# Calculating maximum rank
	max_rank = int(grid_size * grid_size * rate)
	count = 0

	# Normalizing heatmap
	heatmap = heatmap.astype(float) / 255.0

	# Creating a figure
	fig = plt.figure(figsize=(20, 10))

	# Creating a 3D plot
	ax = fig.add_subplot(111, projection='3d')

	# Defining the x and y coordinates for the heatmap
	x_coords = np.linspace(0, 1, heatmap.shape[1])
	y_coords = np.linspace(0, 1, heatmap.shape[0])
	x, y = np.meshgrid(x_coords, y_coords)

	# Defining the z-coordinate for the heatmap (a constant, such as -5)
	z = np.asarray([[-10] * heatmap.shape[1]] * heatmap.shape[0])

	# Plotting the heatmap as a texture on the xy-plane
	ax.plot_surface(x, y, z, facecolors=heatmap, rstride=1, cstride=1, shade=False)

	# Initializing the single distribution array
	single_distribution = np.asarray([[1e-6] * heatmap.shape[1]] * heatmap.shape[0], dtype=float)

	importance = 0
	# Creating the single distribution by summing up Gaussian distributions for each segment
	for rank, info in sorted_sara_dict:
	# Retrieving segment coordinates
	coords = index_info[rank]

	# Creating a Gaussian distribution for the whole segment, i.e., arrange all the pixels in the segment in a 3D Gaussian distribution
	x_temp = np.linspace(0, 1, coords[2] - coords[0])
	y_temp = np.linspace(0, 1, coords[3] - coords[1])

	# Creating a meshgrid
	x_temp, y_temp = np.meshgrid(x_temp, y_temp)

	# Calculating the Gaussian distribution
	distribution = np.exp(-((x_temp - 0.5) 2 + (y_temp - 0.5) 2) / 0.1) * ((grid_size 2 - importance) / grid_size 2) # (constant)

	# Adding the Gaussian distribution to the single distribution
	single_distribution[coords[1]:coords[3], coords[0]:coords[2]] += distribution

	# Incrementing importance
	importance +=1

	# Based on the rate, calculating the minimum number for the most important ranks
	min_rank = int(grid_size * grid_size * rate)

	# Calculating the scale factor for the single distribution
	scale_factor = ((grid_size 2 - min_rank) / grid_size 2) * 5

	# Scaling the distribution
	single_distribution *= scale_factor

	# Retrieving the max and min values of the single distribution
	max_value = np.max(single_distribution)
	min_value = np.min(single_distribution)

	# Calculating the hyperplane
	hyperplane = np.asarray([[(max_value - min_value)* (1 - rate) + min_value] * heatmap.shape[1]] * heatmap.shape[0])

	# Plotting a horizontal plane at the minimum rank level (hyperplane)
	ax.plot_surface(x, y, hyperplane, rstride=1, cstride=1, color='red', alpha=0.3, shade=False)

	# Plotting the single distribution as a wireframe on the xy-plane
	ax.plot_surface(x, y, single_distribution, rstride=1, cstride=1, color='blue', shade=False)

	# Setting the title
	ax.set_title('SaRa 3D Heatmap Plot', fontsize=20)

	# Setting the labels
	ax.set_xlabel('X', fontsize=16)
	ax.set_ylabel('Y', fontsize=16)
	ax.set_zlabel('Z', fontsize=16)

	# Setting the viewing angle to look from the y, x diagonal position
	ax.view_init(elev=30, azim=45) # Adjust the elevation (elev) and azimuth (azim) angles as needed
	# ax.view_init(elev=0, azim=0) # View from the top

	# Adding legend to the plot
	# Creating Line2D objects for the legend
	legend_elements = [Line2D([0], [0], color='blue', lw=4, label='Rank Distribution'),
	Line2D([0], [0], color='red', lw=4, label='Threshold Hyperplane ({}%)'.format(rate*100)),
	Line2D([0], [0], color='green', lw=4, label='SaRa Heatmap')]

	# Creating the legend
	plt.subplots_adjust(right=0.5)
	ax.legend(handles=legend_elements, fontsize=16, loc='center left', bbox_to_anchor=(1, 0.5))

	# Inverting the x axis
	ax.invert_xaxis()

	# Removing labels
	ax.set_xticks([])
	ax.set_yticks([])
	ax.set_zticks([])

	# Showing the plot
	plt.show()