#!/usr/bin/env python import seaborn import numpy as np import os from collections import OrderedDict import pandas as pd import matplotlib.pyplot as plt import sys from termcolor import cprint # Load data # Global vars for tracking and labeling data at load time. exp_idx = 0 label_parser_dict = None smooth_factor = 10 leg_size = 30 subsample_step = 1 load_subsample_step = 50 default_colors = ["blue","orange","green","magenta", "brown", "red",'black',"grey",u'#ff7f0e', "cyan", "pink",'purple', u'#1f77b4', "darkorchid","sienna","lightpink", "indigo","mediumseagreen",'aqua', 'deeppink','silver','khaki','goldenrod','y','y','y','y','y','y','y','y','y','y','y','y' ] + ['y']*50 def get_all_runs(logdir, load_subsample_step=1): """ Recursively look through logdir for output files produced by Assumes that any file "progress.txt" is a valid hit. """ global exp_idx global units datasets = [] for root, _, files in os.walk(logdir): if 'log.csv' in files: run_name = root[8:] exp_name = None # try to load a config file containing hyperparameters config = None try: config_path = open(os.path.join(root,'config.json')) config = json.load(config_path) if 'exp_name' in config: exp_name = config['exp_name'] except: print('No file named config.json') exp_idx += 1 # load progress data try: print(os.path.join(root,'log.csv')) exp_data = pd.read_csv(os.path.join(root,'log.csv')) except: raise ValueError("CSV {} faulty".format(os.path.join(root, 'log.csv'))) exp_data = exp_data[::load_subsample_step] data_dict = exp_data.to_dict("list") data_dict['config'] = config nb_epochs = len(data_dict['frames']) print('{} -> {}'.format(run_name, nb_epochs)) datasets.append(data_dict) return datasets def get_datasets(rootdir, load_only="", load_subsample_step=1, ignore_pattern="ignore"): _, models_list, _ = next(os.walk(rootdir)) print(models_list) for dir_name in models_list.copy(): # add "ignore" in a directory name to avoid loading its content if ignore_pattern in dir_name or load_only not in dir_name: models_list.remove(dir_name) for expe_name in list(labels.keys()): if expe_name not in models_list: del labels[expe_name] # setting per-model type colors for i,m_name in enumerate(models_list): for m_type, m_color in per_model_colors.items(): if m_type in m_name: colors[m_name] = m_color print("extracting data for {}...".format(m_name)) m_id = m_name models_saves[m_id] = OrderedDict() models_saves[m_id]['data'] = get_all_runs(rootdir+m_name, load_subsample_step=load_subsample_step) print("done") if m_name not in labels: labels[m_name] = m_name """ retrieve all experiences located in "data to vizu" folder """ labels = OrderedDict() per_model_colors = OrderedDict() # per_model_colors = OrderedDict([('ALP-GMM',u'#1f77b4'), # ('hmn','pink'), # ('ADR','black')]) # LOAD DATA models_saves = OrderedDict() colors = OrderedDict() static_lines = {} # get_datasets("storage/",load_only="RERUN_WizardGuide") # get_datasets("storage/",load_only="RERUN_WizardTwoGuides") try: figure_id = eval(sys.argv[1]) except: figure_id = sys.argv[1] print("fig:", figure_id) if figure_id == 0: # train change env_type = "No_NPC_environment" fig_type = "train" get_datasets("storage/", "RERUN_WizardGuide_lang64_mm", load_subsample_step=load_subsample_step) get_datasets("storage/", "RERUN_WizardGuide_lang64_deaf_no_explo", load_subsample_step=load_subsample_step) get_datasets("storage/", "RERUN_WizardGuide_lang64_no_explo", load_subsample_step=load_subsample_step) get_datasets("storage/", "RERUN_WizardGuide_lang64_curr_dial", load_subsample_step=load_subsample_step) top_n = 16 elif figure_id == 1: # arch change env_type = "No_NPC_environment" fig_type = "arch" get_datasets("storage/", "RERUN_WizardGuide_lang64_mm", load_subsample_step=load_subsample_step) get_datasets("storage/", "RERUN_WizardGuide_lang64_bow", load_subsample_step=load_subsample_step) get_datasets("storage/", "RERUN_WizardGuide_lang64_no_mem", load_subsample_step=load_subsample_step) get_datasets("storage/", "RERUN_WizardGuide_lang64_bigru", load_subsample_step=load_subsample_step) get_datasets("storage/", "RERUN_WizardGuide_lang64_attgru", load_subsample_step=load_subsample_step) top_n = 16 elif figure_id == 2: # train change FULL env_type = "FULL_environment" fig_type = "train" get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_mm", load_subsample_step=load_subsample_step) get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_deaf_no_explo", load_subsample_step=load_subsample_step) get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_no_explo", load_subsample_step=load_subsample_step) get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_curr_dial", load_subsample_step=load_subsample_step) top_n = 16 elif figure_id == 3: # arch change FULL env_type = "FULL_environment" fig_type = "arch" get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_mm", load_subsample_step=load_subsample_step) get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_bow", load_subsample_step=load_subsample_step) get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_no_mem", load_subsample_step=load_subsample_step) get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_bigru", load_subsample_step=load_subsample_step) get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_attgru", load_subsample_step=load_subsample_step) top_n = 16 elif str(figure_id) == "ShowMe": get_datasets("storage/", "20-05_NeurIPS_ShowMe_ABL_CEB", load_subsample_step=load_subsample_step, ignore_pattern="tanh_0.3") get_datasets("storage/", "20-05_NeurIPS_ShowMe_NO_BONUS_ABL", load_subsample_step=load_subsample_step) get_datasets("storage/", "20-05_NeurIPS_ShowMe_CEB", load_subsample_step=load_subsample_step, ignore_pattern="tanh_0.3") get_datasets("storage/", "20-05_NeurIPS_ShowMe_NO_BONUS_env", load_subsample_step=load_subsample_step) label_parser_dict = { "20-05_NeurIPS_ShowMe_ABL_CEB" : "ShowMe_exp_bonus_no_social_skills_required", "20-05_NeurIPS_ShowMe_NO_BONUS_ABL" : "ShowMe_no_bonus_no_social_skills_required", "20-05_NeurIPS_ShowMe_CEB" : "ShowMe_exp_bonus", "20-05_NeurIPS_ShowMe_NO_BONUS_env" : "ShowMe_no_bonus", } env_type = str(figure_id) fig_type = "test" top_n = 16 elif str(figure_id) == "Help": # env_type = "Bobo" # get_datasets("storage/", "Bobo") get_datasets("storage/", "24-05_NeurIPS_Help", load_subsample_step=load_subsample_step, ignore_pattern="ABL") # get_datasets("storage/", "26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_ABL", load_subsample_step=load_subsample_step) get_datasets("storage/", "26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_env", load_subsample_step=load_subsample_step) label_parser_dict = { "Help_NO_BONUS_env": "PPO", "Help_BONUS_env": "PPO+Explo", # "Help_NO_BONUS_ABL_env": "ExiterRole_no_bonus_no_NPC", # "Help_BONUS_ABL_env": "ExiterRole_bonus_no_NPC", "26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_env": "Unsocial PPO", # "26-05_NeurIPS_gpu_Help_NoSocial_NO_BONUS_ABL": "ExiterRole_Insocial_ABL" } static_lines = { "PPO (helper)": (0.12, 0.05, "#1f77b4"), "PPO+Explo (helper)": (0.11, 0.04, "indianred"), # "Help_exp_bonus": (0.11525, 0.04916 , default_colors[2]), # "HelperRole_ABL_no_exp_bonus": (0.022375, 0.01848, default_colors[3]), "Unsocial PPO (helper)": (0.15, 0.06, "grey"), # "HelperRole_ABL_Insocial": (0.01775, 0.010544, default_colors[4]), } env_type = str(figure_id) fig_type = "test" top_n = 16 elif str(figure_id) == "TalkItOut": print("You mean Polite") exit() elif str(figure_id) == "TalkItOutPolite": # env_type = "TalkItOut" # get_datasets("storage/", "ORIENT_env_MiniGrid-TalkItOut") # env_type = "GuideThief" # get_datasets("storage/", "GuideThief") # env_type = "Bobo" # get_datasets("storage/", "Bobo") get_datasets("storage/", "20-05_NeurIPS_TalkItOutPolite", load_subsample_step=load_subsample_step) # get_datasets("storage/", "21-05_NeurIPS_small_bonus_TalkItOutPolite") get_datasets("storage/", "26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_env", load_subsample_step=load_subsample_step) get_datasets("storage/", "26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_NoLiar", load_subsample_step=load_subsample_step) label_parser_dict = { "TalkItOutPolite_NO_BONUS_env": "PPO", "TalkItOutPolite_e": "PPO+Explo", "TalkItOutPolite_NO_BONUS_NoLiar": "PPO (no liar)", "TalkItOutPolite_NoLiar_e": "PPO+Explo (no liar)", "26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_env": "Unsocial PPO", "26-05_NeurIPS_gpu_TalkItOutPolite_NoSocial_NO_BONUS_NoLiar": "Unsocial PPO (no liar)", } env_type = str(figure_id) fig_type = "test" top_n = 16 elif str(figure_id) == "DiverseExit": get_datasets("storage/", "24-05_NeurIPS_DiverseExit", load_subsample_step=load_subsample_step) get_datasets("storage/", "26-05_NeurIPS_gpu_DiverseExit", load_subsample_step=load_subsample_step) label_parser_dict = { "DiverseExit_NO_BONUS": "No_bonus", "DiverseExit_BONUS": "BOnus", "gpu_DiverseExit_NoSocial": "No_social", } env_type = str(figure_id) fig_type = "test" top_n = 16 else: get_datasets("storage/", str(figure_id), load_subsample_step=load_subsample_step) env_type = str(figure_id) fig_type = "test" top_n = 8 #### get_datasets("storage/", "RERUN_WizardGuide_lang64_nameless") #### get_datasets("storage/", "RERUN_WizardTwoGuides_lang64_nameless") if per_model_colors: # order runs for legend order as in per_models_colors, with corresponding colors ordered_labels = OrderedDict() for teacher_type in per_model_colors.keys(): for k,v in labels.items(): if teacher_type in k: ordered_labels[k] = v labels = ordered_labels else: print('not using per_model_color') for k in models_saves.keys(): labels[k] = k def plot_with_shade(subplot_nb, ax,x,y,err,color,shade_color,label, y_min=None,y_max=None, legend=False, leg_size=30, leg_loc='best', title=None, ylim=[0,100], xlim=[0,40], leg_args={}, leg_linewidth=13.0, linewidth=10.0, ticksize=20, zorder=None, xlabel='perf',ylabel='env steps'): #plt.rcParams.update({'font.size': 15}) ax.locator_params(axis='x', nbins=4) ax.locator_params(axis='y', nbins=3) ax.tick_params(axis='both', which='major', labelsize=ticksize) ax.plot(x,y, color=color, label=label,linewidth=linewidth,zorder=zorder) ax.fill_between(x,y-err,y+err,color=shade_color,alpha=0.2) if legend: leg = ax.legend(loc=leg_loc, **leg_args) #34 for legobj in leg.legendHandles: legobj.set_linewidth(leg_linewidth) ax.set_xlabel(xlabel, fontsize=30) if subplot_nb == 0: ax.set_ylabel(ylabel, fontsize=30,labelpad=-4) ax.set_xlim(xmin=xlim[0],xmax=xlim[1]) ax.set_ylim(bottom=ylim[0],top=ylim[1]) if title: ax.set_title(title, fontsize=22) # Plot utils def plot_with_shade_grg(subplot_nb, ax,x,y,err,color,shade_color,label, y_min=None,y_max=None, legend=False, leg_size=30, leg_loc='best', title=None, ylim=[0,100], xlim=[0,40], leg_args={}, leg_linewidth=13.0, linewidth=10.0, ticksize=20, zorder=None, xlabel='perf',ylabel='env steps', linestyle="-"): #plt.rcParams.update({'font.size': 15}) ax.locator_params(axis='x', nbins=4) ax.locator_params(axis='y', nbins=3) ax.tick_params(axis='both', which='major', labelsize=ticksize) ax.plot(x, y, color=color, label=label,linewidth=linewidth,zorder=zorder, linestyle=linestyle) ax.fill_between(x, y-err, y+err,color=shade_color,alpha=0.2) if legend: leg = ax.legend(loc=leg_loc, **leg_args) #34 for legobj in leg.legendHandles: legobj.set_linewidth(leg_linewidth) ax.set_xlabel(xlabel, fontsize=30) if subplot_nb == 0: ax.set_ylabel(ylabel, fontsize=30, labelpad=-4) ax.set_xlim(xmin=xlim[0],xmax=xlim[1]) ax.set_ylim(bottom=ylim[0],top=ylim[1]) if title: ax.set_title(title, fontsize=22) # Metric plot metric = 'bin_extrinsic_return_mean' # metric = 'mission_string_observed_mean' # metric = 'extrinsic_return_mean' # metric = 'extrinsic_return_max' # metric = "rreturn_mean" # metric = 'rreturn_max' # metric = 'FPS' f, ax = plt.subplots(1, 1, figsize=(10.0, 6.0)) ax = [ax] max_y = -np.inf min_y = np.inf # hardcoded min_y, max_y = 0.0, 1.0 max_steps = 0 exclude_patterns = [] include_patterns = [] def label_parser(label, figure_id, label_parser_dict=None): if label_parser_dict: if sum([1 for k, v in label_parser_dict.items() if k in label]) != 1: if label in label_parser_dict: # see if there is an exact match return label_parser_dict[label] else: print("ERROR multiple curves match a lable and there is no exact match") print(label) exit() for k, v in label_parser_dict.items(): if k in label: return v else: # return label.split("_env_")[1] if figure_id not in [1,2,3,4]: return label else: label_parser_dict = { "RERUN_WizardGuide_lang64_no_explo": "MH-BabyAI", "RERUN_WizardTwoGuides_lang64_no_explo": "MH-BabyAI", "RERUN_WizardGuide_lang64_mm_baby_short_rec_env": "MH-BabyAI-ExpBonus", "RERUN_WizardTwoGuides_lang64_mm_baby_short_rec_env": "MH-BabyAI-ExpBonus", "RERUN_WizardGuide_lang64_deaf_no_explo": "Deaf-MH-BabyAI", "RERUN_WizardTwoGuides_lang64_deaf_no_explo": "Deaf-MH-BabyAI", "RERUN_WizardGuide_lang64_bow": "MH-BabyAI-ExpBonus-BOW", "RERUN_WizardTwoGuides_lang64_bow": "MH-BabyAI-ExpBonus-BOW", "RERUN_WizardGuide_lang64_no_mem": "MH-BabyAI-ExpBonus-no-mem", "RERUN_WizardTwoGuides_lang64_no_mem": "MH-BabyAI-ExpBonus-no-mem", "RERUN_WizardGuide_lang64_bigru": "MH-BabyAI-ExpBonus-bigru", "RERUN_WizardTwoGuides_lang64_bigru": "MH-BabyAI-ExpBonus-bigru", "RERUN_WizardGuide_lang64_attgru": "MH-BabyAI-ExpBonus-attgru", "RERUN_WizardTwoGuides_lang64_attgru": "MH-BabyAI-ExpBonus-attgru", "RERUN_WizardGuide_lang64_curr_dial": "MH-BabyAI-ExpBonus-current-dialogue", "RERUN_WizardTwoGuides_lang64_curr_dial": "MH-BabyAI-ExpBonus-current-dialogue", "RERUN_WizardTwoGuides_lang64_mm_baby_short_rec_100M": "MH-BabyAI-ExpBonus-100M" } if sum([1 for k, v in label_parser_dict.items() if k in label]) != 1: print("ERROR multiple curves match a lable") print(label) exit() for k, v in label_parser_dict.items(): if k in label: return v return label per_seed=False for i, m_id in enumerate(models_saves.keys()): #excluding some experiments if any([ex_pat in m_id for ex_pat in exclude_patterns]): continue if len(include_patterns) > 0: if not any([in_pat in m_id for in_pat in include_patterns]): continue runs_data = models_saves[m_id]['data'] ys = [] # DIRTY FIX FOR FAULTY LOGGING print("m_id:", m_id) if runs_data[0]['frames'][1] == 'frames': runs_data[0]['frames'] = list(filter(('frames').__ne__, runs_data[0]['frames'])) ########################################### # determine minimal run length across seeds minimum = sorted([len(run['frames']) for run in runs_data if len(run['frames'])])[-top_n] min_len = np.min([len(run['frames']) for run in runs_data if len(run['frames']) >= minimum]) # min_len = np.min([len(run['frames']) for run in runs_data if len(run['frames']) > 10]) print("min_len:", min_len) #compute env steps (x axis) longest_id = np.argmax([len(rd['frames']) for rd in runs_data]) steps = np.array(runs_data[longest_id]['frames'], dtype=np.int) / 1000000 steps = steps[:min_len] for run in runs_data: data = run[metric] # DIRTY FIX FOR FAULTY LOGGING (headers in data) if data[1] == metric: data = np.array(list(filter((metric).__ne__, data)), dtype=np.float16) ########################################### if len(data) >= min_len: if len(data) > min_len: print("run has too many {} datapoints ({}). Discarding {}".format(m_id, len(data), len(data)-min_len)) data = data[0:min_len] ys.append(data) ys_same_len = ys # RUNS MUST HAVE SAME LEN # computes stats n_seeds = len(ys_same_len) sems = np.std(ys_same_len,axis=0)/np.sqrt(len(ys_same_len)) # sem stds = np.std(ys_same_len,axis=0) # std means = np.mean(ys_same_len,axis=0) color = default_colors[i] # per-metric adjusments ylabel=metric if metric == 'bin_extrinsic_return_mean': ylabel = "success rate" if metric == 'duration': ylabel = "time (hours)" means = means / 3600 sems = sems / 3600 stds = stds / 3600 #plot x y bounds curr_max_y = np.max(means) curr_min_y = np.min(means) curr_max_steps = np.max(steps) if curr_max_y > max_y: max_y = curr_max_y if curr_min_y < min_y: min_y = curr_min_y if curr_max_steps > max_steps: max_steps = curr_max_steps if subsample_step: steps = steps[0::subsample_step] means = means[0::subsample_step] stds = stds[0::subsample_step] sems = sems[0::subsample_step] ys_same_len = [y[0::subsample_step] for y in ys_same_len] # display seeds separtely if per_seed: for s_i, seed_ys in enumerate(ys_same_len): seed_c = default_colors[i+s_i] label = m_id#+"(s:{})".format(s_i) plot_with_shade(0, ax[0], steps, seed_ys, stds*0, seed_c, seed_c, label, legend=False, xlim=[0, max_steps], ylim=[min_y, max_y], leg_size=leg_size, xlabel="env steps (millions)", ylabel=ylabel, smooth_factor=smooth_factor, ) else: label = label_parser(m_id, figure_id, label_parser_dict=label_parser_dict) label = label #+"({})".format(n_seeds) def smooth(x_, n=50): if type(x_) == list: x_ = np.array(x_) return np.array([x_[max(i - n, 0):i + 1].mean() for i in range(len(x_))]) if smooth_factor: means = smooth(means,smooth_factor) stds = smooth(stds,smooth_factor) x_lim = 30 if figure_id == "TalkItOutPolite": leg_args = { 'ncol': 1, 'columnspacing': 1.0, 'handlelength': 1.0, 'frameon': False, # 'bbox_to_anchor': (0.00, 0.23, 0.10, .102), 'bbox_to_anchor': (0.55, 0.35, 0.10, .102), 'labelspacing': 0.2, 'fontsize': 27 } elif figure_id == "Help": leg_args = { 'ncol': 1, 'columnspacing': 1.0, 'handlelength': 1.0, 'frameon': False, # 'bbox_to_anchor': (0.00, 0.23, 0.10, .102), 'bbox_to_anchor': (0.39, 0.20, 0.10, .102), 'labelspacing': 0.2, 'fontsize': 27 } else: leg_args = {} color_code = dict([ ('PPO+Explo', 'indianred'), ('PPO', "#1f77b4"), ('Unsocial PPO', "grey"), ('PPO (no liar)', "#043252"), ('PPO+Explo (no liar)', "darkred"), ('Unsocial PPO (no liar)', "black"), ('PPO+Explo (helper)', 'indianred'), ('PPO (helper)', "#1f77b4"), ('Unsocial PPO (helper)', "grey")] ) color = color_code.get(label, np.random.choice(default_colors)) print("C:",color) plot_with_shade_grg( 0, ax[0], steps, means, stds, color, color, label, legend=True, xlim=[0, steps[-1] if not x_lim else x_lim], ylim=[0, 1.0], xlabel="env steps (millions)", ylabel=ylabel, title=None, leg_args =leg_args) # # plot_with_shade(0, ax[0], steps, means, stds, color, color,label, # legend=True, xlim=[0, max_steps], ylim=[min_y, max_y], # leg_size=leg_size, xlabel="Env steps (millions)", ylabel=ylabel, linewidth=5.0, smooth_factor=smooth_factor) for label, (mean, std, color) in static_lines.items(): plot_with_shade_grg( 0, ax[0], steps, np.array([mean]*len(steps)), np.array([std]*len(steps)), color, color, label, legend=True, xlim=[0, max_steps], ylim=[0, 1.0], xlabel="env steps (millions)", ylabel=ylabel, linestyle=":", leg_args=leg_args) plt.tight_layout() f.savefig('graphics/{}_results.svg'.format(str(figure_id))) f.savefig('graphics/{}_results.png'.format(str(figure_id))) plt.show()