# %% import numpy as np import torch from pathlib import Path import os import pandas as pd import seaborn as sns import matplotlib.pyplot as plt HEAD = Path(os.getcwd()).parent.parent if __name__=="__main__": # load baseline and LEDM data metrics = {"dice": [], "precision": [], "recall": [], "exp": [], "datasize": [], "dataset":[]} files_needed = ["JSRT_val_predictions.pt", "JSRT_test_predictions.pt", "NIH_predictions.pt", "Montgomery_predictions.pt",] head = HEAD / 'logs' for exp in ['baseline', 'LEDM']: for datasize in [1, 3, 6, 12, 24, 49, 98, 197]: if len(set(files_needed) - set(os.listdir(head / exp / str(datasize)))) == 0: print(f"Experiment {exp} {datasize}") output = torch.load(head / exp / str(datasize) / "JSRT_val_predictions.pt") print(f"{output['dice'].mean()}\t{output['dice'].std()}") for file in files_needed[1:]: output = torch.load(head / exp / str(datasize) / file) metrics_datasize = 197 if datasize == "None" else int(datasize) metrics["dice"].append(output["dice"].numpy()) metrics["precision"].append(output["precision"].numpy()) metrics["recall"].append(output["recall"].numpy()) metrics["exp"].append(np.array([exp] * len(output["dice"]))) metrics["datasize"].append(np.array([int(datasize)] * len(output["dice"]))) metrics["dataset"].append(np.array([file.split("_")[0]]*len(output["dice"]))) else: print(f"Experiment {exp} is missing files") for key in metrics: metrics[key] = np.concatenate([el.squeeze() for el in metrics[key]]) df = pd.DataFrame(metrics) df.head() # %% load TEDM data metrics3 = {"dice": [], "precision": [], "recall": [], "exp": [], "datasize": [], "dataset":[], } exp = "TEDM" for datasize in [1, 3, 6, 12, 24, 49, 98, 197]: if len(set(files_needed) - set(os.listdir(head / exp / str(datasize) ))) == 0: print(f"Experiment {datasize}") output = torch.load(head / exp / str(datasize)/ "JSRT_val_predictions.pt") print(f"{output['dice'].mean()}\t{output['dice'].std()}") for file in files_needed[1:]: output = torch.load(head / exp / str(datasize) / file) metrics_datasize = datasize if datasize is not None else 197 metrics3["dice"].append(output["dice"].numpy()) metrics3["precision"].append(output["precision"].numpy()) metrics3["recall"].append(output["recall"].numpy()) metrics3["exp"].append(np.array(['TEDM'] * len(output["dice"]))) metrics3["datasize"].append(np.array([metrics_datasize] * len(output["dice"]))) metrics3["dataset"].append(np.array([file.split("_")[0]]*len(output["dice"]))) else: print(f"Experiment {datasize} is missing files") for key in metrics3: metrics3[key] = np.concatenate(metrics3[key]).squeeze() print(key, metrics3[key].shape) df3 = pd.DataFrame(metrics3) # %% Boxplot of TEDM vs LEDM and baseline df4 = pd.concat([df, df3]) df4.datasize = df4.datasize.astype(int) m='dice' dataset="JSRT" fig, axs = plt.subplots(3, 3, figsize=(20, 20)) for j, m in enumerate(["dice", "precision", "recall"]): #axs[0,j].set_ylim(0.8, 1) #axs[0,j].set_ylim(0.6, 1) #axs[0,j].set_ylim(0.7, 1) for i, dataset in enumerate(["JSRT", "NIH", "Montgomery"]): temp_df = df4[(df4.dataset == dataset)] #sns.lineplot(data=df[df.dataset == dataset], x="datasize", y=m, hue="exp", ax=axs[i,j]) sns.boxplot(data=temp_df, x="datasize", y=m, ax=axs[i,j], hue="exp", showfliers=False, saturation=1, hue_order=['baseline', 'LEDM', 'TEDM']) axs[i,j].set_title(f"{dataset} {m}") axs[i,j].set_xlabel("Training dataset size") h, l = axs[i,j].get_legend_handles_labels() axs[i,j].legend(h, ['Baseline', 'LEDM', 'TEDM (ours)'], title="", loc='lower right') plt.tight_layout() plt.savefig("results_shared_weights.pdf") plt.show() # %% Load LEDMe and Step 1 metrics2 = {"dice": [], "precision": [], "recall": [], "exp": [], "datasize": [], "dataset":[], } for exp in ["LEDMe", 'Step_1']: for datasize in [1, 3, 6, 12, 24, 49, 98, 197]: if len(set(files_needed) - set(os.listdir(head / exp / str(datasize) ))) == 0: print(f"Experiment {exp} {datasize}") output = torch.load(head / exp / str(datasize)/ "JSRT_val_predictions.pt") print(f"{output['dice'].mean()}\t{output['dice'].std()}") for file in files_needed[1:]: output = torch.load(head / exp / str(datasize) / file) #print(f"{output['dice'].mean()*100:.3}\t{output['dice'].std()*100:.3}\t{output['precision'].mean()*100:.3}\t{output['precision'].std()*100:.3}\t{output['recall'].mean()*100:.3}\t{output['recall'].std()*100:.3}", # end="\n\n\n\n") metrics_datasize = 197 if datasize == "None" else datasize metrics2["dice"].append(output["dice"].numpy()) metrics2["precision"].append(output["precision"].numpy()) metrics2["recall"].append(output["recall"].numpy()) metrics2["exp"].append(np.array([exp] * len(output["dice"]))) metrics2["datasize"].append(np.array([int(metrics_datasize)] * len(output["dice"]))) metrics2["dataset"].append(np.array([file.split("_")[0]]*len(output["dice"]))) else: print(f"Experiment {exp} is missing files") for key in metrics2: metrics2[key] = np.concatenate(metrics2[key]).squeeze() print(key, metrics2[key].shape) df2 = pd.DataFrame(metrics2) # %% Boxplot of TEDM vs LEDM and baseline, Step 1 and LEDMe df4 = pd.concat([df, df3, df2]) df4.datasize = df4.datasize.astype(int) m='dice' dataset="JSRT" fig, axs = plt.subplots(3, 3, figsize=(20, 20)) for j, m in enumerate(["dice", "precision", "recall"]): for i, dataset in enumerate(["JSRT", "NIH", "Montgomery"]): temp_df = df4[(df4.dataset == dataset)] #sns.lineplot(data=df[df.dataset == dataset], x="datasize", y=m, hue="exp", ax=axs[i,j]) sns.boxplot(data=temp_df, x="datasize", y=m, ax=axs[i,j], hue="exp", showfliers=False, saturation=1, hue_order=['baseline', 'LEDM', 'Step_1', 'LEDMe', 'TEDM', ]) axs[i,j].set_title(f"{dataset} {m}") axs[i,j].set_xlabel("Training dataset size") h, l = axs[i,j].get_legend_handles_labels() axs[i,j].legend(h, ['Baseline', 'LEDM', 'Step 1', 'LEDMe', 'TEDM'], title="", loc='lower right') plt.tight_layout() plt.savefig("results_shared_weights.pdf") plt.show() # %% Load TEDM ablation studies metrics4 = {"dice": [], "precision": [], "recall": [], "exp": [], "datasize": [], "dataset":[], } exp = "TEDM" for datasize in [1, 3, 6, 12, 24, 49, 98, 197]: if len(set(files_needed) - set(os.listdir(head / exp / str(datasize)))) == 0: print(f"Experiment {datasize} ") for step in [1,10,25]: for file in files_needed[1:]: output = torch.load(head / exp / str(datasize) / file.replace("predictions", f"timestep{step}_predictions")) #print(f"{output['dice'].mean()*100:.3}\t{output['dice'].std()*100:.3}\t{output['precision'].mean()*100:.3}\t{output['precision'].std()*100:.3}\t{output['recall'].mean()*100:.3}\t{output['recall'].std()*100:.3}", # end="\n\n\n\n") metrics_datasize = datasize if datasize is not None else 197 metrics4["dice"].append(output["dice"].numpy()) metrics4["precision"].append(output["precision"].numpy()) metrics4["recall"].append(output["recall"].numpy()) metrics4["exp"].append(np.array([f'Step {step} (MLP)'] * len(output["dice"]))) metrics4["datasize"].append(np.array([metrics_datasize] * len(output["dice"]))) metrics4["dataset"].append(np.array([file.split("_")[0]]*len(output["dice"]))) #metrics3["timestep"].append(np.array(timestep * len(output["dice"]))) else: print(f"Experiment {datasize} is missing files") for key in metrics3: metrics4[key] = np.concatenate(metrics4[key]).squeeze() print(key, metrics4[key].shape) df4 = pd.DataFrame(metrics4) # %% Print inputs to paper table df_all = pd.concat([df, df3, df2, df4]) df_all.datasize = df_all.datasize.astype(int) for i, dataset in enumerate(["JSRT", "NIH", "Montgomery"]): temp_df = df_all.loc[(df_all.dataset == dataset) & (df_all.datasize.isin([1, 3, 6, 12, 197])), ["exp", "datasize", "dice"]] print(dataset) mean = temp_df.groupby(["exp", "datasize"]).mean().unstack() * 100 std = temp_df.groupby(["exp", "datasize"]).std().unstack() * 100 for exp, exp_name in zip(['baseline', 'LEDM','Step_1', 'Step 1 (MLP)', 'Step 10 (MLP)','Step 25 (MLP)', 'LEDMe', 'TEDM'], ['Baseline', 'DatasetDDPM', 'Step 1 (linear)','Step 1 (MLP)', 'Step 10 (MLP)','Step 25 (MLP)','DatasetDDPMe', 'Ours', ]): print(exp_name, end='&\t') print(f"{round(mean.loc[exp, ('dice', 1)],2):.3} $\pm$ {round(std.loc[exp, ('dice', 1)],1)}", end='&\t') print(f"{round(mean.loc[exp, ('dice', 3)], 2):.3} $\pm$ {round(std.loc[exp, ('dice', 3)],1)}", end='&\t') print(f"{round(mean.loc[exp, ('dice', 6)], 2):.3} $\pm$ {round(std.loc[exp, ('dice', 6)],1)}", end='&\t') print(f"{round(mean.loc[exp, ('dice', 12)], 2):.3} $\pm$ {round(std.loc[exp, ('dice', 12)],1)}", end='&\t') print(f"{round(mean.loc[exp, ('dice', 197)], 2):.3} $\pm$ {round(std.loc[exp, ('dice', 197)],1)}", end="""\\\\""") print() # %% Print inputs to paper appendix table for i, dataset in enumerate(["JSRT", "NIH", "Montgomery"]): print("\n" + dataset) for m in ["precision", "recall"]: temp_df = df_all.loc[(df_all.dataset == dataset) & (df_all.datasize.isin([1, 3, 6, 12, 24, 49, 98, 197])), ["exp", "datasize", m]] print("\n"+m) mean = temp_df.groupby(["exp", "datasize"]).mean().unstack() * 100 std = temp_df.groupby(["exp", "datasize"]).std().unstack() * 100 for exp, exp_name in zip(['baseline', 'LEDM','Step_1', 'LEDMe', 'TEDM'], ['Baseline', 'LEDM', 'Step 1 (linear)','LEDMe', 'TEDM (ours)',]): print(exp_name, end='&\t') print(f"{round(mean.loc[exp, (m, 1)],2):.3} $\pm$ {round(std.loc[exp, (m, 1)],1)}", end='&\t') print(f"{round(mean.loc[exp, (m, 3)],2):.3} $\pm$ {round(std.loc[exp, (m, 3)],1)}", end='&\t') print(f"{round(mean.loc[exp, (m, 6)],2):.3} $\pm$ {round(std.loc[exp, (m, 6)],1)}", end='&\t') print(f"{round(mean.loc[exp, (m, 12)],2):.3} $\pm$ {round(std.loc[exp, (m, 12)],1)}", end='&\t') print(f"{round(mean.loc[exp, (m, 197)],2):.3} $\pm$ {round(std.loc[exp, (m, 197)],1)}", end='\\\\') print() # %% Wilcoxon tests - to use interactively from scipy.stats import wilcoxon m ="precision" m='recall' dataset ="Montgomery" dssize =12 exp = "baseline" exp = 'Step_1' exp = "LEDM" exp="TEDM" exp_2= 'LEDMe' x = df_all.loc[(df_all.dataset == dataset) & (df_all.exp == exp_2) & (df_all.datasize == dssize), m].to_numpy() y = df_all.loc[(df_all.dataset == dataset) & (df_all.exp == exp)& (df_all.datasize == dssize), m].to_numpy() print(f"{m} - {dataset} - {dssize} - {exp_2}: {x.mean():.4}+/-{x.std():.3} ") print(f"{m} - {dataset} - {dssize} - {exp}: {y.mean():.4}+/-{y.std():.3} ") print(f"{m} - {dataset} - {dssize}: {wilcoxon(x, y=y, zero_method='wilcox', correction=False, alternative='two-sided',).pvalue:.3} obs given equal ") print(f"{m} - {dataset} - {dssize}: {wilcoxon(x, y=y, zero_method='wilcox', correction=False, alternative='greater',).pvalue:.3} obs given {exp_2} < {exp} ") print(f"{m} - {dataset} - {dssize}: {wilcoxon(x, y=y, zero_method='wilcox', correction=False, alternative='less',).pvalue:.3} obs given {exp_2} > {exp} ")