climateGAN / figures /bootstrap_ablation.py

initial commit from cc-ai/climateGAN

448ebbd over 2 years ago

15.5 kB

	"""
	This script evaluates the contribution of a technique from the ablation study for
	improving the masker evaluation metrics. The differences in the metrics are computed
	for all images of paired models, that is those which only differ in the inclusion or
	not of the given technique. Then, statistical inference is performed through the
	percentile bootstrap to obtain robust estimates of the differences in the metrics and
	confidence intervals. The script plots the distribution of the bootrstraped estimates.
	"""
	print("Imports...", end="")
	from argparse import ArgumentParser
	import yaml
	import os
	import numpy as np
	import pandas as pd
	import seaborn as sns
	from scipy.stats import trim_mean
	from tqdm import tqdm
	from pathlib import Path
	import matplotlib.pyplot as plt
	import matplotlib.patches as mpatches


	# -----------------------
	# ----- Constants -----
	# -----------------------

	dict_metrics = {
	"names": {
	"tpr": "TPR, Recall, Sensitivity",
	"tnr": "TNR, Specificity, Selectivity",
	"fpr": "FPR",
	"fpt": "False positives relative to image size",
	"fnr": "FNR, Miss rate",
	"fnt": "False negatives relative to image size",
	"mpr": "May positive rate (MPR)",
	"mnr": "May negative rate (MNR)",
	"accuracy": "Accuracy (ignoring may)",
	"error": "Error",
	"f05": "F05 score",
	"precision": "Precision",
	"edge_coherence": "Edge coherence",
	"accuracy_must_may": "Accuracy (ignoring cannot)",
	},
	"key_metrics": ["f05", "error", "edge_coherence"],
	}
	dict_techniques = {
	"depth": "depth",
	"segmentation": "seg",
	"seg": "seg",
	"dada_s": "dada_seg",
	"dada_seg": "dada_seg",
	"dada_segmentation": "dada_seg",
	"dada_m": "dada_masker",
	"dada_masker": "dada_masker",
	"spade": "spade",
	"pseudo": "pseudo",
	"pseudo-labels": "pseudo",
	"pseudo_labels": "pseudo",
	}

	# Model features
	model_feats = [
	"masker",
	"seg",
	"depth",
	"dada_seg",
	"dada_masker",
	"spade",
	"pseudo",
	"ground",
	"instagan",
	]

	# Colors
	palette_colorblind = sns.color_palette("colorblind")
	color_cat1 = palette_colorblind[0]
	color_cat2 = palette_colorblind[1]
	palette_lightest = [
	sns.light_palette(color_cat1, n_colors=20)[3],
	sns.light_palette(color_cat2, n_colors=20)[3],
	]
	palette_light = [
	sns.light_palette(color_cat1, n_colors=3)[1],
	sns.light_palette(color_cat2, n_colors=3)[1],
	]
	palette_medium = [color_cat1, color_cat2]
	palette_dark = [
	sns.dark_palette(color_cat1, n_colors=3)[1],
	sns.dark_palette(color_cat2, n_colors=3)[1],
	]
	palette_cat1 = [
	palette_lightest[0],
	palette_light[0],
	palette_medium[0],
	palette_dark[0],
	]
	palette_cat2 = [
	palette_lightest[1],
	palette_light[1],
	palette_medium[1],
	palette_dark[1],
	]
	color_cat1_light = palette_light[0]
	color_cat2_light = palette_light[1]


	def parsed_args():
	"""
	Parse and returns command-line args

	Returns:
	argparse.Namespace: the parsed arguments
	"""
	parser = ArgumentParser()
	parser.add_argument(
	"--input_csv",
	default="ablations_metrics_20210311.csv",
	type=str,
	help="CSV containing the results of the ablation study",
	)
	parser.add_argument(
	"--output_dir",
	default=None,
	type=str,
	help="Output directory",
	)
	parser.add_argument(
	"--technique",
	default=None,
	type=str,
	help="Keyword specifying the technique. One of: pseudo, depth, segmentation, dada_seg, dada_masker, spade",
	)
	parser.add_argument(
	"--dpi",
	default=200,
	type=int,
	help="DPI for the output images",
	)
	parser.add_argument(
	"--n_bs",
	default=1e6,
	type=int,
	help="Number of bootrstrap samples",
	)
	parser.add_argument(
	"--alpha",
	default=0.99,
	type=float,
	help="Confidence level",
	)
	parser.add_argument(
	"--bs_seed",
	default=17,
	type=int,
	help="Bootstrap random seed, for reproducibility",
	)

	return parser.parse_args()


	def add_ci_mean(
	ax, sample_measure, bs_mean, bs_std, ci, color, alpha, fontsize, invert=False
	):

	# Fill area between CI
	dist = ax.lines[0]
	dist_y = dist.get_ydata()
	dist_x = dist.get_xdata()
	linewidth = dist.get_linewidth()

	x_idx_low = np.argmin(np.abs(dist_x - ci[0]))
	x_idx_high = np.argmin(np.abs(dist_x - ci[1]))
	x_ci = dist_x[x_idx_low:x_idx_high]
	y_ci = dist_y[x_idx_low:x_idx_high]

	ax.fill_between(x_ci, 0, y_ci, facecolor=color, alpha=alpha)

	# Add vertical lines of CI
	ax.vlines(
	x=ci[0],
	ymin=0.0,
	ymax=y_ci[0],
	color=color,
	linewidth=linewidth,
	label="ci_low",
	)
	ax.vlines(
	x=ci[1],
	ymin=0.0,
	ymax=y_ci[-1],
	color=color,
	linewidth=linewidth,
	label="ci_high",
	)

	# Add annotations
	bbox_props = dict(boxstyle="round, pad=0.4", fc="w", ec="k", lw=2)

	if invert:
	ha_l = "right"
	ha_u = "left"
	else:
	ha_l = "left"
	ha_u = "right"
	ax.text(
	ci[0],
	0.0,
	s="L = {:.4f}".format(ci[0]),
	ha=ha_l,
	va="bottom",
	fontsize=fontsize,
	bbox=bbox_props,
	)
	ax.text(
	ci[1],
	0.0,
	s="U = {:.4f}".format(ci[1]),
	ha=ha_u,
	va="bottom",
	fontsize=fontsize,
	bbox=bbox_props,
	)

	# Add vertical line of bootstrap mean
	x_idx_mean = np.argmin(np.abs(dist_x - bs_mean))
	ax.vlines(
	x=bs_mean, ymin=0.0, ymax=dist_y[x_idx_mean], color="k", linewidth=linewidth
	)

	# Add annotation of bootstrap mean
	bbox_props = dict(boxstyle="round, pad=0.4", fc="w", ec="k", lw=2)

	ax.text(
	bs_mean,
	0.6 * dist_y[x_idx_mean],
	s="Bootstrap mean = {:.4f}".format(bs_mean),
	ha="center",
	va="center",
	fontsize=fontsize,
	bbox=bbox_props,
	)

	# Add vertical line of sample_measure
	x_idx_smeas = np.argmin(np.abs(dist_x - sample_measure))
	ax.vlines(
	x=sample_measure,
	ymin=0.0,
	ymax=dist_y[x_idx_smeas],
	color="k",
	linewidth=linewidth,
	linestyles="dotted",
	)

	# Add SD
	bbox_props = dict(boxstyle="darrow, pad=0.4", fc="w", ec="k", lw=2)

	ax.text(
	bs_mean,
	0.4 * dist_y[x_idx_mean],
	s="SD = {:.4f} = SE".format(bs_std),
	ha="center",
	va="center",
	fontsize=fontsize,
	bbox=bbox_props,
	)


	def add_null_pval(ax, null, color, alpha, fontsize):

	# Fill area between CI
	dist = ax.lines[0]
	dist_y = dist.get_ydata()
	dist_x = dist.get_xdata()
	linewidth = dist.get_linewidth()

	x_idx_null = np.argmin(np.abs(dist_x - null))
	if x_idx_null >= (len(dist_x) / 2.0):
	x_pval = dist_x[x_idx_null:]
	y_pval = dist_y[x_idx_null:]
	else:
	x_pval = dist_x[:x_idx_null]
	y_pval = dist_y[:x_idx_null]

	ax.fill_between(x_pval, 0, y_pval, facecolor=color, alpha=alpha)

	# Add vertical lines of null
	dist = ax.lines[0]
	linewidth = dist.get_linewidth()
	y_max = ax.get_ylim()[1]
	ax.vlines(
	x=null,
	ymin=0.0,
	ymax=y_max,
	color="k",
	linewidth=linewidth,
	linestyles="dotted",
	)

	# Add annotations
	bbox_props = dict(boxstyle="round, pad=0.4", fc="w", ec="k", lw=2)

	ax.text(
	null,
	0.75 * y_max,
	s="Null hypothesis = {:.1f}".format(null),
	ha="center",
	va="center",
	fontsize=fontsize,
	bbox=bbox_props,
	)


	def plot_bootstrap_distr(
	sample_measure, bs_samples, alpha, color_ci, color_pval=None, null=None
	):

	# Compute results from bootstrap
	q_low = (1.0 - alpha) / 2.0
	q_high = 1.0 - q_low
	ci = np.quantile(bs_samples, [q_low, q_high])
	bs_mean = np.mean(bs_samples)
	bs_std = np.std(bs_samples)

	if null is not None and color_pval is not None:
	pval_flag = True
	pval = np.min([[np.mean(bs_samples > null), np.mean(bs_samples < null)]]) * 2
	else:
	pval_flag = False

	# Set up plot
	sns.set(style="whitegrid")
	fontsize = 24
	font = {"family": "DejaVu Sans", "weight": "normal", "size": fontsize}
	plt.rc("font", **font)
	alpha_plot = 0.5

	# Initialize the matplotlib figure
	fig, ax = plt.subplots(figsize=(30, 12), dpi=args.dpi)

	# Plot distribution of bootstrap means
	sns.kdeplot(bs_samples, color="b", linewidth=5, gridsize=1000, ax=ax)

	y_lim = ax.get_ylim()

	# Change spines
	sns.despine(left=True, bottom=True)

	# Annotations
	add_ci_mean(
	ax,
	sample_measure,
	bs_mean,
	bs_std,
	ci,
	color=color_ci,
	alpha=alpha_plot,
	fontsize=fontsize,
	)

	if pval_flag:
	add_null_pval(ax, null, color=color_pval, alpha=alpha_plot, fontsize=fontsize)

	# Legend
	ci_patch = mpatches.Patch(
	facecolor=color_ci,
	edgecolor=None,
	alpha=alpha_plot,
	label="{:d} % confidence interval".format(int(100 * alpha)),
	)

	if pval_flag:
	if pval == 0.0:
	pval_patch = mpatches.Patch(
	facecolor=color_pval,
	edgecolor=None,
	alpha=alpha_plot,
	label="P value / 2 = {:.1f}".format(pval / 2.0),
	)
	elif np.around(pval / 2.0, decimals=4) > 0.0000:
	pval_patch = mpatches.Patch(
	facecolor=color_pval,
	edgecolor=None,
	alpha=alpha_plot,
	label="P value / 2 = {:.4f}".format(pval / 2.0),
	)
	else:
	pval_patch = mpatches.Patch(
	facecolor=color_pval,
	edgecolor=None,
	alpha=alpha_plot,
	label="P value / 2 < $10^{}$".format(np.ceil(np.log10(pval / 2.0))),
	)

	leg = ax.legend(
	handles=[ci_patch, pval_patch],
	ncol=1,
	loc="upper right",
	frameon=True,
	framealpha=1.0,
	title="",
	fontsize=fontsize,
	columnspacing=1.0,
	labelspacing=0.2,
	markerfirst=True,
	)
	else:
	leg = ax.legend(
	handles=[ci_patch],
	ncol=1,
	loc="upper right",
	frameon=True,
	framealpha=1.0,
	title="",
	fontsize=fontsize,
	columnspacing=1.0,
	labelspacing=0.2,
	markerfirst=True,
	)

	plt.setp(leg.get_title(), fontsize=fontsize, horizontalalignment="left")

	# Set X-label
	ax.set_xlabel("Bootstrap estimates", rotation=0, fontsize=fontsize, labelpad=10.0)

	# Set Y-label
	ax.set_ylabel("Density", rotation=90, fontsize=fontsize, labelpad=10.0)

	# Ticks
	plt.setp(ax.get_xticklabels(), fontsize=0.8 * fontsize, verticalalignment="top")
	plt.setp(ax.get_yticklabels(), fontsize=0.8 * fontsize)

	ax.set_ylim(y_lim)

	return fig, bs_mean, bs_std, ci, pval


	if __name__ == "__main__":
	# -----------------------------
	# ----- Parse arguments -----
	# -----------------------------
	args = parsed_args()
	print("Args:\n" + "\n".join([f" {k:20}: {v}" for k, v in vars(args).items()]))

	# Determine output dir
	if args.output_dir is None:
	output_dir = Path(os.environ["SLURM_TMPDIR"])
	else:
	output_dir = Path(args.output_dir)
	if not output_dir.exists():
	output_dir.mkdir(parents=True, exist_ok=False)

	# Store args
	output_yml = output_dir / "{}_bootstrap.yml".format(args.technique)
	with open(output_yml, "w") as f:
	yaml.dump(vars(args), f)

	# Determine technique
	if args.technique.lower() not in dict_techniques:
	raise ValueError("{} is not a valid technique".format(args.technique))
	else:
	technique = dict_techniques[args.technique.lower()]

	# Read CSV
	df = pd.read_csv(args.input_csv, index_col="model_img_idx")

	# Find relevant model pairs
	model_pairs = []
	for mi in df.loc[df[technique]].model_feats.unique():
	for mj in df.model_feats.unique():
	if mj == mi:
	continue

	if df.loc[df.model_feats == mj, technique].unique()[0]:
	continue

	is_pair = True
	for f in model_feats:
	if f == technique:
	continue
	elif (
	df.loc[df.model_feats == mj, f].unique()[0]
	!= df.loc[df.model_feats == mi, f].unique()[0]
	):
	is_pair = False
	break
	else:
	pass
	if is_pair:
	model_pairs.append((mi, mj))
	break

	print("\nModel pairs identified:\n")
	for pair in model_pairs:
	print("{} & {}".format(pair[0], pair[1]))

	df["base"] = ["N/A"] * len(df)
	for spp in model_pairs:
	df.loc[df.model_feats.isin(spp), "depth_base"] = spp[1]

	# Build bootstrap data
	data = {m: [] for m in dict_metrics["key_metrics"]}
	for m_with, m_without in model_pairs:
	df_with = df.loc[df.model_feats == m_with]
	df_without = df.loc[df.model_feats == m_without]
	for metric in data.keys():
	diff = (
	df_with.sort_values(by="img_idx")[metric].values
	- df_without.sort_values(by="img_idx")[metric].values
	)
	data[metric].extend(diff.tolist())

	# Run bootstrap
	measures = ["mean", "median", "20_trimmed_mean"]
	bs_data = {meas: {m: np.zeros(args.n_bs) for m in data.keys()} for meas in measures}

	np.random.seed(args.bs_seed)
	for m, data_m in data.items():
	for idx, s in enumerate(tqdm(range(args.n_bs))):
	# Sample with replacement
	bs_sample = np.random.choice(data_m, size=len(data_m), replace=True)

	# Store mean
	bs_data["mean"][m][idx] = np.mean(bs_sample)

	# Store median
	bs_data["median"][m][idx] = np.median(bs_sample)

	# Store 20 % trimmed mean
	bs_data["20_trimmed_mean"][m][idx] = trim_mean(bs_sample, 0.2)

	for metric in dict_metrics["key_metrics"]:
	sample_measure = trim_mean(data[metric], 0.2)
	fig, bs_mean, bs_std, ci, pval = plot_bootstrap_distr(
	sample_measure,
	bs_data["20_trimmed_mean"][metric],
	alpha=args.alpha,
	color_ci=color_cat1_light,
	color_pval=color_cat2_light,
	null=0.0,
	)

	# Save figure
	output_fig = output_dir / "{}_bootstrap_{}_{}.png".format(
	args.technique, metric, "20_trimmed_mean"
	)
	fig.savefig(output_fig, dpi=fig.dpi, bbox_inches="tight")

	# Store results
	output_results = output_dir / "{}_bootstrap_{}_{}.yml".format(
	args.technique, metric, "20_trimmed_mean"
	)
	results_dict = {
	"measure": "20_trimmed_mean",
	"sample_measure": float(sample_measure),
	"bs_mean": float(bs_mean),
	"bs_std": float(bs_std),
	"ci_left": float(ci[0]),
	"ci_right": float(ci[1]),
	"pval": float(pval),
	}
	with open(output_results, "w") as f:
	yaml.dump(results_dict, f)