#!/usr/bin/env python
import re
import itertools
import math
from itertools import chain
import time

# import seaborn
import numpy as np
import os
from collections import OrderedDict, defaultdict
import pandas as pd
import matplotlib.pyplot as plt
import sys
from termcolor import cprint, colored
from pathlib import Path
import pickle
from scipy import stats

save = True
show_plot = False

metrics = [
    'success_rate_mean',
    # 'FPS',
    # 'extrinsic_return_mean',
    # 'exploration_bonus_mean',
    # 'NPC_intro',
    # 'curriculum_param_mean',
    # 'curriculum_max_success_rate_mean',
    # 'rreturn_mean'
]


eval_metric = "test_success_rates"
# eval_metric = "exploration_bonus_mean"

super_title = ""
# super_title = "PPO - No exploration bonus"
# super_title = "Count Based exploration bonus (Grid Search)"
# super_title = "PPO + RND"
# super_title = "PPO + RIDE"

# statistical evaluation p-value
test_p = 0.05

agg_title = ""

color_dict = None
eval_filename = None

max_frames = 20_000_000

legend_show_n_seeds = False
draw_legend = True
per_seed = False

study_train = False
study_eval = True

plot_test = True

plot_aggregated_test = True
plot_only_aggregated_test = True


xnbins = 4
ynbins = 3

steps_denom = 1e6

# Global vas for tracking and labeling data at load time.
exp_idx = 0
label_parser_dict = None
label_parser = lambda l, _, label_parser_dict: l

smooth_factor = 10 # used
# smooth_factor = 0
print("smooth factor:", smooth_factor)
eval_smooth_factor = None
leg_size = 30

def smooth(x_, n=50):
    if n is None:
        return x_

    if type(x_) == list:
        x_ = np.array(x_)
    return np.array([x_[max(i - n, 0):i + 1].mean() for i in range(len(x_))])

sort_test = False

def sort_test_set(env_name):
    helps = [
        "LanguageFeedback",
        "LanguageColor",
        "Pointing",
        "Emulation",
    ]
    problems = [
        "Boxes",
        "Switches",
        "Generators",
        "Marble",
        "Doors",
        "Levers",
    ]

    env_names = []
    for p in problems:
        for h in helps:
            env_names.append(h+p)

    env_names.extend([
        "LeverDoorColl",
        "MarblePushColl",
        "MarblePassColl",
        "AppleStealing"
    ])

    for i, en in enumerate(env_names):
        if en in env_name:
            return i

    raise ValueError(f"Test env {env_name} not known")


subsample_step = 1
load_subsample_step = 1

x_lim = 0
max_x_lim = np.inf

summary_dict = {}
summary_dict_colors = {}
to_plot_dict = {}


default_colors_ = ["blue","orange","green","magenta", "brown", "red",'black',"grey",u'#ff7f0e',
                  "cyan", "pink",'purple', u'#1f77b4',
                  "darkorchid","sienna","lightpink", "indigo","mediumseagreen",'aqua',
                  'deeppink','silver','khaki','goldenrod'] * 100


def get_eval_data(logdir, eval_metric):
    eval_data = defaultdict(lambda :defaultdict(list))

    for root, _, files in os.walk(logdir):
        for file in files:
            if 'testing_' in file:
                assert ".pkl" in file
                test_env_name = file.lstrip("testing_").rstrip(".pkl")
                try:
                    with open(root+"/"+file, "rb") as f:
                        seed_eval_data = pickle.load(f)
                except:
                    print("Pickle not loaded: ", root+"/"+file)
                    time.sleep(1)
                    continue

                eval_data[test_env_name]["values"].append(seed_eval_data[eval_metric])
                eval_data[test_env_name]["steps"].append(seed_eval_data["test_step_nb"])

    for test_env, seed_data in eval_data.items():
        min_len_seed = min([len(s) for s in seed_data['steps']])
        eval_data[test_env]["values"] = np.array([s[:min_len_seed] for s in eval_data[test_env]["values"]])
        eval_data[test_env]["steps"] = np.array([s[:min_len_seed] for s in eval_data[test_env]["steps"]])

    return eval_data

def get_all_runs(logdir, load_subsample_step=1):
    """
    Recursively look through logdir for output files produced by
    Assumes that any file "log.csv" is a valid hit.
    """
    global exp_idx
    global units
    datasets = []
    for root, _, files in os.walk(logdir):
        if 'log.csv' in files:
            if (Path(root) / 'log.csv').stat().st_size == 0:
                print("CSV {} empty".format(os.path.join(root, 'log.csv')))
                continue

            run_name = root[8:]

            exp_name = None

            config = None
            exp_idx += 1

            # load progress data
            try:
                exp_data = pd.read_csv(os.path.join(root, 'log.csv'))
                print("Loaded:", os.path.join(root, 'log.csv'))
            except:
                raise ValueError("CSV {} faulty".format(os.path.join(root, 'log.csv')))

            exp_data = exp_data[::load_subsample_step]
            data_dict = exp_data.to_dict("list")

            data_dict['config'] = config
            nb_epochs = len(data_dict['frames'])
            if nb_epochs == 1:
                print(f'{run_name} -> {colored(f"nb_epochs {nb_epochs}", "red")}')
            else:
                print('{} -> nb_epochs {}'.format(run_name, nb_epochs))

            datasets.append(data_dict)

    return datasets


def get_datasets(rootdir, load_only="", load_subsample_step=1, ignore_patterns=("ignore"), require_patterns=()):
    _, models_list, _ = next(os.walk(rootdir))
    for dir_name in models_list.copy():
        # add "ignore" in a directory name to avoid loading its content
        for ignore_pattern in ignore_patterns:
            if ignore_pattern in dir_name or load_only not in dir_name:
                if dir_name in models_list:
                    models_list.remove(dir_name)

        if len(require_patterns) > 0:
            if not any([require_pattern in dir_name for require_pattern in require_patterns]):
                if dir_name in models_list:
                    models_list.remove(dir_name)

    for expe_name in list(labels.keys()):
        if expe_name not in models_list:
            del labels[expe_name]

    # setting per-model type colors
    for i, m_name in enumerate(models_list):
        for m_type, m_color in per_model_colors.items():
            if m_type in m_name:
                colors[m_name] = m_color
        print("extracting data for {}...".format(m_name))
        m_id = m_name
        models_saves[m_id] = OrderedDict()
        models_saves[m_id]['data'] = get_all_runs(rootdir+m_name, load_subsample_step=load_subsample_step)
        print("done")

        if m_name not in labels:
            labels[m_name] = m_name

        model_eval_data[m_id] = get_eval_data(logdir=rootdir+m_name, eval_metric=eval_metric)

    """
    retrieve all experiences located in "data to vizu" folder
    """
labels = OrderedDict()
per_model_colors = OrderedDict()

# LOAD DATA
models_saves = OrderedDict()
colors = OrderedDict()
model_eval_data = OrderedDict()

static_lines = {}

ignore_patterns = ["_ignore_"]

to_compare = None
load_pattern = sys.argv[1]

test_envs_to_plot = None  # plot all

min_y, max_y = 0.0, 1.1


def label_parser(label):
    label = label.replace("04-01_Pointing_CB_heldout_doors", "PPO_CB")
    label = label.replace("19-01_Color_CB_heldout_doors", "PPO_CBL")
    label = label.replace("19-01_Feedback_CB_heldout_doors_20M", "PPO_CBL")

    label = label.replace("20-01_JA_Color_CB_heldout_doors", "JA_PPO_CBL")

    label = label.replace("05-01_scaffolding_50M_no_acl", "PPO_no_scaf")
    label = label.replace("05-01_scaffolding_50M_acl_4_acl-type_intro_seq", "PPO_scaf_4")
    label = label.replace("05-01_scaffolding_50M_acl_8_acl-type_intro_seq_scaf", "PPO_scaf_8")


    label = label.replace("03-01_RR_ft_single_CB_marble_pass_A_soc_exp", "PPO_CB_role_B")
    label = label.replace("03-01_RR_ft_single_CB_marble_pass_A_asoc_contr", "PPO_CB_asocial")

    label = label.replace("05-01_RR_ft_group_50M_CB_marble_pass_A_soc_exp", "PPO_CB_role_B")
    label = label.replace("05-01_RR_ft_group_50M_CB_marble_pass_A_asoc_contr", "PPO_CB_asocial")

    label = label.replace("20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.25_50",
                          "PPO_CB_0.25")
    label = label.replace("20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.5_50",
                          "PPO_CB_0.5")
    label = label.replace("20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__1_50",
                          "PPO_CB_1")

    return label

color_dict = {
    'PPO_CB': "blue",
    'PPO_CB(train)': "blue",
    "PPO_CB(test)": "orange",

    'PPO_no_bonus': "orange",

    'PPO_CBL': "blue",
    'PPO_CBL(train)': "blue",
    "PPO_CBL(test)": "orange",
    'JA_PPO_CBL': "green",

    "PPO_CB_role_B": "blue",
    "PPO_CB_asocial": "orange",

    'PPO_CB_0.25': "blue",
    'PPO_CB_0.5': "green",
    'PPO_CB_1': "orange",

}

if load_pattern == "RR_single":
    save = False
    show_plot = True
    load_pattern = "_"

    plot_path = "../case_studies_final_figures/RR_dummy_single"

    require_patterns = [
        "03-01_RR_ft_single_CB_marble_pass_A_asoc_contr",
        "03-01_RR_ft_single_CB_marble_pass_A_soc_exp",
    ]

    plot_aggregated_test = False
    plot_only_aggregated_test = False
    study_train = True
    study_eval = False

elif load_pattern == "RR_group":

    load_pattern = "_"

    plot_path = "../case_studies_final_figures/RR_dummy_group"

    require_patterns = [
        "05-01_RR_ft_group_50M_CB_marble_pass_A_asoc_contr",
        "05-01_RR_ft_group_50M_CB_marble_pass_A_soc_exp",
    ]

    plot_aggregated_test = False
    plot_only_aggregated_test = False
    study_train = True
    study_eval = False


elif load_pattern == "scaffolding":
    load_pattern = "_"

    plot_path = "../case_studies_final_figures/Scaffolding_test"

    require_patterns = [
        "05-01_scaffolding_50M_no_acl",
        "05-01_scaffolding_50M_acl_4_acl-type_intro_seq",
        "05-01_scaffolding_50M_acl_8_acl-type_intro_seq_scaf",
    ]

    test_envs_to_plot = None  # aggregate all of them
    plot_aggregated_test = True
    plot_only_aggregated_test = True
    study_train = False
    study_eval = True

    to_compare = [
        ("05-01_scaffolding_50M_acl_4_acl-type_intro_seq_agg_test", "05-01_scaffolding_50M_no_acl_agg_test", "auto_color"),
        ("05-01_scaffolding_50M_acl_8_acl-type_intro_seq_scaf_agg_test", "05-01_scaffolding_50M_no_acl_agg_test", "auto_color"),
    ]

elif load_pattern == "pointing":
    study_train = True
    study_eval = True

    plot_aggregated_test = False
    plot_only_aggregated_test = False

    load_pattern = "_"

    test_envs_to_plot = [
        "SocialAI-EPointingDoorsTestInformationSeekingParamEnv-v1",
    ]

    plot_path = "../case_studies_final_figures/Pointing_train_test"

    require_patterns = [
        "04-01_Pointing_CB_heldout_doors",
    ]

    to_compare = [
        ("04-01_Pointing_CB_heldout_doors", "04-01_Pointing_CB_heldout_doors_SocialAI-EPointingDoorsTestInformationSeekingParamEnv-v1", "black")
    ]

elif load_pattern == "color":
    study_train = True
    study_eval = True

    plot_aggregated_test = False
    plot_only_aggregated_test = False

    max_x_lim = 18

    load_pattern = "_"

    test_envs_to_plot = [
        "SocialAI-ELangColorDoorsTestInformationSeekingParamEnv-v1",
    ]

    plot_path = "../case_studies_final_figures/Color_train_test"

    require_patterns = [
        "19-01_Color_CB_heldout_doors",
    ]

    to_compare = [
        ("19-01_Color_CB_heldout_doors", "19-01_Color_CB_heldout_doors_SocialAI-ELangColorDoorsTestInformationSeekingParamEnv-v1", "black")
    ]

elif load_pattern == "ja_color":

    study_train = True
    study_eval = False

    plot_aggregated_test = False
    plot_only_aggregated_test = False

    max_x_lim = 18

    load_pattern = "_"

    test_envs_to_plot = None
    plot_path = "../case_studies_final_figures/JA_Color_train"

    require_patterns = [
        "19-01_Color_CB_heldout_doors",
        "20-01_JA_Color_CB_heldout_doors",
    ]

    to_compare = [
        ("19-01_Color_CB_heldout_doors", "20-01_JA_Color_CB_heldout_doors", "black")
    ]

elif load_pattern == "feedback_per_seed":
    study_train = True
    study_eval = False
    per_seed = True
    draw_legend = False

    plot_aggregated_test = False
    plot_only_aggregated_test = False
    max_x_lim = 18

    load_pattern = "_"

    test_envs_to_plot = [
        "SocialAI-ELangFeedbackDoorsTestInformationSeekingParamEnv-v1",
    ]

    plot_path = "../case_studies_final_figures/Feedback_train_per_seed"

    require_patterns = [
        "19-01_Feedback_CB_heldout_doors",
    ]

    to_compare = None

elif load_pattern == "feedback":
    study_train = True
    study_eval = True

    plot_aggregated_test = False
    plot_only_aggregated_test = False
    max_x_lim = 18

    load_pattern = "_"

    test_envs_to_plot = [
        "SocialAI-ELangFeedbackDoorsTestInformationSeekingParamEnv-v1",
    ]

    plot_path = "../case_studies_final_figures/Feedback_train_test"

    require_patterns = [
        "19-01_Feedback_CB_heldout_doors",
    ]

    to_compare = [
        ("19-01_Feedback_CB_heldout_doors_20M", "19-01_Feedback_CB_heldout_doors_20M_SocialAI-ELangFeedbackDoorsTestInformationSeekingParamEnv-v1", "black")
    ]

elif load_pattern == "imitation_train":

    study_train = True
    study_eval = False

    plot_aggregated_test = False
    plot_only_aggregated_test = False

    max_x_lim = 18

    load_pattern = "_"

    test_envs_to_plot = None
    plot_path = "../case_studies_final_figures/Imitation_train"

    require_patterns = [
        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.25_50",
        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.5_50",
        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__1_50",
    ]

    # to_compare = [
    #     ("19-01_Color_CB_heldout_doors", "20-01_JA_Color_CB_heldout_doors", "black")
    # ]
    to_compare = None

elif load_pattern == "imitation_train_intro":

    metrics = ["NPC_intro"]

    show_plot = False
    save = True

    study_train = True
    study_eval = False

    plot_aggregated_test = False
    plot_only_aggregated_test = False

    max_x_lim = 18

    load_pattern = "_"

    test_envs_to_plot = None
    plot_path = "../case_studies_final_figures/Imitation_train_intro"

    require_patterns = [
        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.25_50",
        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.5_50",
        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__1_50",
    ]

    # to_compare = [
    #     ("19-01_Color_CB_heldout_doors", "20-01_JA_Color_CB_heldout_doors", "black")
    # ]
    to_compare = None

elif load_pattern == "imitation_test":

    study_train = False
    study_eval = True

    plot_aggregated_test = False
    plot_only_aggregated_test = False

    max_x_lim = 18

    load_pattern = "_"

    test_envs_to_plot = None
    plot_path = "../case_studies_final_figures/Imitation_test"

    require_patterns = [
        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.25_50",
        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__0.5_50",
        "20-01_Imitation_PPO_CB_exploration-bonus-type_cell_exploration-bonus-params__1_50",
    ]

    # to_compare = [
    #     ("19-01_Color_CB_heldout_doors", "20-01_JA_Color_CB_heldout_doors", "black")
    # ]
    to_compare = None

elif load_pattern == "pilot_pointing":

    study_train = True
    study_eval = False

    show_plot = False
    save = True
    plot_path = "../case_studies_final_figures/pilot_pointing"

    load_pattern = "29-10_SAI_Pointing_CS_PPO_"

    require_patterns = [
        "29-10_SAI_Pointing_CS_PPO_CB_env_SocialAI-EPointingInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_cell_exploration-bonus-params__2_50_exploration-bonus-tanh_0.6",
        "29-10_SAI_Pointing_CS_PPO_CBL_env_SocialAI-EPointingInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_lang_exploration-bonus-params__10_50_exploration-bonus-tanh_0.6",
        "29-10_SAI_Pointing_CS_PPO_no_env_SocialAI-EPointingInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4",
        "29-10_SAI_Pointing_CS_PPO_RIDE_env_SocialAI-EPointingInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_ride_intrinsic-reward-coef_0.01",
        "29-10_SAI_Pointing_CS_PPO_RND_env_SocialAI-EPointingInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_rnd_intrinsic-reward-coef_0.005",
    ]

    color_dict = {
        "PPO_RIDE": "orange",
        "PPO_RND": "magenta",
        "PPO_no": "maroon",
        "PPO_CBL": "green",
        "PPO_CB": "blue",
    }

    def label_parser(label):
        label = label.split("_env_")[0].split("SAI_")[1]
        label=label.replace("Pointing_CS_", "")
        return label

    to_compare = None

elif load_pattern == "pilot_color":

    study_train = True
    study_eval = False

    show_plot = False
    save = True
    plot_path = "../case_studies_final_figures/pilot_color"

    load_pattern = "29-10_SAI_LangColor_CS"

    require_patterns = [
        "29-10_SAI_LangColor_CS_PPO_CB_env_SocialAI-ELangColorInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_cell_exploration-bonus-params__2_50_exploration-bonus-tanh_0.6",
        "29-10_SAI_LangColor_CS_PPO_CBL_env_SocialAI-ELangColorInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_lang_exploration-bonus-params__10_50_exploration-bonus-tanh_0.6",
        "29-10_SAI_LangColor_CS_PPO_no_env_SocialAI-ELangColorInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4",
        "29-10_SAI_LangColor_CS_PPO_RIDE_env_SocialAI-ELangColorInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_ride_intrinsic-reward-coef_0.01",
        "29-10_SAI_LangColor_CS_PPO_RND_env_SocialAI-ELangColorInformationSeekingParamEnv-v1_recurrence_5_lr_1e-4_exploration-bonus-type_rnd_intrinsic-reward-coef_0.005"
    ]
    color_dict = {
        "PPO_RIDE": "orange",
        "PPO_RND": "magenta",
        "PPO_no": "maroon",
        "PPO_CBL": "green",
        "PPO_CB": "blue",
    }

    def label_parser(label):
        label = label.split("_env_")[0].split("SAI_")[1]
        label=label.replace("LangColor_CS_", "")
        return label

    to_compare = None

elif load_pattern == "formats_train":

    study_train = True
    study_eval = False

    plot_aggregated_test = False
    plot_only_aggregated_test = False

    max_x_lim = 45

    load_pattern = "_"

    test_envs_to_plot = None
    plot_path = "../case_studies_final_figures/Formats_train"

    require_patterns = [
        "21-01_formats_50M_CBL",
        "05-01_scaffolding_50M_no_acl",
    ]

    to_compare = [
        ("21-01_formats_50M_CBL", "05-01_scaffolding_50M_no_acl", "black")
    ]


    def label_parser(label):
        label = label.replace("05-01_scaffolding_50M_no_acl", "PPO_no_bonus")
        label = label.replace("21-01_formats_50M_CBL", "PPO_CBL")
        return label

elif load_pattern == "adversarial":

    show_plot = False
    save = True

    study_train = True
    study_eval = False

    plot_aggregated_test = False
    plot_only_aggregated_test = False

    # max_x_lim = 45

    smooth_factor = 0

    load_pattern = "_"

    test_envs_to_plot = None
    plot_path = "../case_studies_final_figures/adversarial"

    require_patterns = [
        "26-01_Adversarial_2M_PPO_CB_hidden_npc",
        "26-01_Adversarial_2M_PPO_CB_asoc",
        "26-01_Adversarial_2M_PPO_CB",
    ]

    to_compare = [
        ("26-01_Adversarial_2M_PPO_CB", "26-01_Adversarial_2M_PPO_CB_hidden_npc", "orange"),
        ("26-01_Adversarial_2M_PPO_CB", "26-01_Adversarial_2M_PPO_CB_asoc", "green")
    ]

    def label_parser(label):
        label = label.replace("26-01_Adversarial_2M_PPO_CB_hidden_npc", "PPO_CB_invisible_peer")
        label = label.replace("26-01_Adversarial_2M_PPO_CB_asoc", "PPO_CB_no_peer")
        label = label.replace("26-01_Adversarial_2M_PPO_CB", "PPO_CB")
        return label

    color_dict = {
        "PPO_CB": "blue",
        "PPO_CB_invisible_peer": "orange",
        "PPO_CB_no_peer": "green",
    }

elif load_pattern == "adversarial_stumps":


    study_train = True
    study_eval = False

    plot_aggregated_test = False
    plot_only_aggregated_test = False

    # max_x_lim = 45

    smooth_factor = 0

    load_pattern = "_"

    test_envs_to_plot = None
    plot_path = "../case_studies_final_figures/adversarial_stumps"

    require_patterns = [
        "26-01_Adversarial_5M_Stumps_PPO_CB_hidden_npc",
        "26-01_Adversarial_5M_Stumps_PPO_CB_asoc",
        "26-01_Adversarial_5M_Stumps_PPO_CB",
    ]

    to_compare = [
        ("26-01_Adversarial_5M_Stumps_PPO_CB", "26-01_Adversarial_5M_Stumps_PPO_CB_hidden_npc", "orange"),
        ("26-01_Adversarial_5M_Stumps_PPO_CB", "26-01_Adversarial_5M_Stumps_PPO_CB_asoc", "green")
    ]

    def label_parser(label):
        label = label.replace("26-01_Adversarial_5M_Stumps_PPO_CB_hidden_npc", "PPO_CB_invisible_peer")
        label = label.replace("26-01_Adversarial_5M_Stumps_PPO_CB_asoc", "PPO_CB_no_peer")
        label = label.replace("26-01_Adversarial_5M_Stumps_PPO_CB", "PPO_CB")
        return label

    color_dict = {
        "PPO_CB": "blue",
        "PPO_CB_invisible_peer": "orange",
        "PPO_CB_no_peer": "green",
    }

else:
    plot_path = "plots/testplot"

    require_patterns = [
        "_",
        # pointing
        # "04-01_Pointing_CB_heldout_doors",
    ]

if to_compare is None and len(require_patterns) == 2 and "_" not in require_patterns:
    # if only two curves compare those two automatically
    to_compare = [(require_patterns[0], require_patterns[1], "black")]


save=False
show_plot = True


# all of those
include_patterns = []
#include_patterns = ["rec_5"]

fontsize = 20
legend_fontsize = 20
linewidth = 5
# linewidth = 1

leg_args = {
    'fontsize': legend_fontsize
}

title_fontsize = int(fontsize*1.2)


storage_dir = "storage/"
if load_pattern.startswith(storage_dir):
    load_pattern = load_pattern[len(storage_dir):]

if load_pattern.startswith("./storage/"):
    load_pattern = load_pattern[len("./storage/"):]

get_datasets(storage_dir, str(load_pattern), load_subsample_step=load_subsample_step, ignore_patterns=ignore_patterns, require_patterns=require_patterns)

label_parser_dict = {
    # "PPO_CB": "PPO_CB",
    # "02-06_AppleStealing_experiments_cb_bonus_angle_occ_env_SocialAI-OthersPerceptionInferenceParamEnv-v1_exploration-bonus-type_cell": "NPC_visible",
}

env_type = str(load_pattern)

fig_type = "test"
try:
    top_n = int(sys.argv[2])
except:
    top_n = 8

to_remove = []

for tr_ in to_remove:
    if tr_ in models_saves:
        del models_saves[tr_]

print("Loaded:")
print("\n".join(list(models_saves.keys())))


if per_model_colors:  # order runs for legend order as in per_models_colors, with corresponding colors
    ordered_labels = OrderedDict()
    for teacher_type in per_model_colors.keys():
        for k,v in labels.items():
            if teacher_type in k:
                ordered_labels[k] = v
    labels = ordered_labels
else:
    print('not using per_model_color')
    for k in models_saves.keys():
        labels[k] = k

# Plot utils
def plot_with_shade(subplot_nb, ax, x, y, err, color, shade_color, label,
                    legend=False, leg_loc='best', title=None,
                    ylim=[0, 100], xlim=[0, 40], leg_args={}, leg_linewidth=13.0, linewidth=10.0, labelsize=20, fontsize=20, title_fontsize=30,
                    zorder=None, xlabel='Perf', ylabel='Env steps', linestyle="-", xnbins=3, ynbins=3):

    #plt.rcParams.update({'font.size': 15})
    ax.locator_params(axis='x', nbins=xnbins)
    ax.locator_params(axis='y', nbins=ynbins)

    ax.tick_params(axis='y', which='both', labelsize=labelsize)
    ax.tick_params(axis='x', which='both', labelsize=labelsize*0.8)
    # ax.tick_params(axis='both', which='both', labelsize="small")

    # ax.scatter(x, y, color=color,linewidth=linewidth,zorder=zorder, linestyle=linestyle)
    ax.plot(x, y, color=color, label=label, linewidth=linewidth, zorder=zorder, linestyle=linestyle)

    if not np.array_equal(err, np.zeros_like(err)):
        ax.fill_between(x, y-err, y+err, color=shade_color, alpha=0.2)

    if legend:
        leg = ax.legend(loc=leg_loc, **leg_args)  # 34
        for legobj in leg.legendHandles:
            legobj.set_linewidth(leg_linewidth)

    ax.set_xlabel(xlabel, fontsize=fontsize)
    if subplot_nb == 0:
        ax.set_ylabel(ylabel, fontsize=fontsize, labelpad=2)

    ax.set_xlim(xmin=xlim[0], xmax=xlim[1])
    ax.set_ylim(bottom=ylim[0], top=ylim[1])
    if title:
        ax.set_title(title, fontsize=title_fontsize)


# only one figure is drawn -> maybe we can add loops later
assert len(metrics) == 1

f, ax = plt.subplots(1, 1, figsize=(9.0, 9.0))

if len(metrics) == 1:
    ax = [ax]

# max_y = -np.inf
min_y = np.inf

max_steps = 0
exclude_patterns = []

metric = metrics[0]

ylabel = {
    "success_rate_mean": "Success rate (%)",
    "exploration_bonus_mean": "Exploration bonus",
    "NPC_intro": "Successful introduction (%)",
}.get(metric, metric)

# for metric_i, metric in enumerate(metrics):
default_colors = default_colors_.copy()

if study_train:
    for model_i, model_id in enumerate(models_saves.keys()):

        #excluding some experiments
        if any([ex_pat in model_id for ex_pat in exclude_patterns]):
            continue

        if len(include_patterns) > 0:
            if not any([in_pat in model_id for in_pat in include_patterns]):
                continue

        runs_data = models_saves[model_id]['data']
        ys = []

        if runs_data[0]['frames'][1] == 'frames':
            runs_data[0]['frames'] = list(filter(('frames').__ne__, runs_data[0]['frames']))

        if per_seed:
            min_len = None

        else:
            # determine minimal run length across seeds
            lens = [len(run['frames']) for run in runs_data if len(run['frames'])]
            minimum = sorted(lens)[-min(top_n, len(lens))]
            min_len = np.min([len(run['frames']) for run in runs_data if len(run['frames']) >= minimum])

            # keep only top k
            runs_data = [run for run in runs_data if len(run['frames']) >= minimum]

            # min_len = np.min([len(run['frames']) for run in runs_data if len(run['frames']) > 10])

        # compute env steps (x axis)
        longest_id = np.argmax([len(rd['frames']) for rd in runs_data])
        steps = np.array(runs_data[longest_id]['frames'], dtype=np.int) / steps_denom
        steps = steps[:min_len]

        for run in runs_data:
            if metric not in run:
                raise ValueError(f"Metric {metric} not found. Possible metrics: {list(run.keys())}")

            data = run[metric]

            # checking for header
            if data[1] == metric:
                data = np.array(list(filter((metric).__ne__, data)), dtype=np.float16)

            if per_seed:
                ys.append(data)

            else:
                if len(data) >= min_len:
                    # discard extra
                    if len(data) > min_len:
                        print("run has too many {} datapoints ({}). Discarding {}".format(model_id, len(data),
                                                                                          len(data) - min_len))
                        data = data[0:min_len]
                    ys.append(data)
                else:
                    raise ValueError("How can data be < min_len if it was capped above")

        ys_same_len = ys

        # computes stats
        n_seeds = len(ys_same_len)

        if per_seed:
            sems = np.array(ys_same_len)
            means = np.array(ys_same_len)
            stds = np.zeros_like(means)
            color = default_colors[model_i]

        else:
            sems = np.std(ys_same_len, axis=0)/np.sqrt(len(ys_same_len))  # sem
            stds = np.std(ys_same_len, axis=0)  # std
            means = np.mean(ys_same_len, axis=0)
            color = default_colors[model_i]

        if metric == 'duration':
            means = means / 3600
            sems = sems / 3600
            stds = stds / 3600

        if per_seed:
            # plot x y bounds
            curr_max_steps = np.max(np.max(steps))

        else:
            # plot x y bounds
            curr_max_steps = np.max(steps)

        if curr_max_steps > max_steps:
            max_steps = curr_max_steps

        if subsample_step:
            steps = steps[0::subsample_step]
            means = means[0::subsample_step]
            stds = stds[0::subsample_step]
            sems = sems[0::subsample_step]
            ys_same_len = [y[0::subsample_step] for y in ys_same_len]

        # display seeds separately
        if per_seed:
            for s_i, seed_ys in enumerate(ys_same_len):

                label = label_parser(model_id)

                if study_eval:
                   label = label + "_train_"

                label = label + f"(s:{s_i})"

                if label in color_dict:
                    color = color_dict[label]
                else:
                    color = default_colors[model_i*20+s_i]

                curve_ID = f"{model_id}_{s_i}"
                assert np.array_equal(stds, np.zeros_like(stds))

                if smooth_factor:
                    means = smooth(means, smooth_factor)

                to_plot_dict[curve_ID] = {
                    "label": label,
                    "steps": steps,
                    "means": seed_ys,
                    "stds": stds,
                    "ys": ys_same_len,
                    "color": color
                }

        else:
            label = label_parser(model_id)

            if study_eval:
                label = label+"(train)"

            if color_dict:
                color = color_dict[label]
            else:
                color = default_colors[model_i]

            if smooth_factor:
                means = smooth(means, smooth_factor)
                stds = smooth(stds, smooth_factor)

            to_plot_dict[model_id] = {
                "label": label,
                "steps": steps,
                "means": means,
                "stds": stds,
                "sems": sems,
                "ys": ys_same_len,
                "color": color,
            }


if study_eval:
    print("Evaluation")
    # evaluation sets
    number_of_eval_envs = max(list([len(v.keys()) for v in model_eval_data.values()]))

    if plot_aggregated_test:
        number_of_eval_envs += 1

    if number_of_eval_envs == 0:
        print("No eval envs")
        exit()

    default_colors = default_colors_.copy()

    test_summary_dict = defaultdict(dict)
    test_summary_dict_colors = defaultdict(dict)

    for model_i, model_id in enumerate(model_eval_data.keys()):
        # excluding some experiments
        if any([ex_pat in model_id for ex_pat in exclude_patterns]):
            continue
        if len(include_patterns) > 0:
            if not any([in_pat in model_id for in_pat in include_patterns]):
                continue

        # test envs
        test_envs = model_eval_data[model_id].items()

        # filter unwanted eval envs
        if test_envs_to_plot is not None:
            test_envs = [(name, data) for name, data in test_envs if name in test_envs_to_plot]

        # computes stats
        if sort_test:
            test_envs_sorted = list(sorted(test_envs, key=lambda kv: sort_test_set(kv[0])))
        else:
            test_envs_sorted = list(test_envs)

        if plot_aggregated_test:
            agg_means = []

        for env_i, (test_env, env_data) in enumerate(test_envs_sorted):
            ys_same_len = env_data["values"]
            steps = env_data["steps"].mean(0) / steps_denom
            n_seeds = len(ys_same_len)

            if per_seed:
                sems = np.array(ys_same_len)
                stds = np.array(ys_same_len)
                means = np.array(ys_same_len)
                color = default_colors[model_i]

                # plot x y bounds
                curr_max_steps = np.max(np.max(steps))

            else:
                sems = np.std(ys_same_len, axis=0) / np.sqrt(len(ys_same_len))  # sem
                stds = np.std(ys_same_len, axis=0)  # std
                means = np.mean(ys_same_len, axis=0)
                color = default_colors[model_i]

                curr_max_steps = np.max(steps)

            if plot_aggregated_test:
                agg_means.append(means)


            x_lim = max(steps[-1], x_lim)
            x_lim = min(max_x_lim, x_lim)

            eval_metric_name = {
                "test_success_rates": "Success rate",
                'exploration_bonus_mean': "Exploration bonus",
            }.get(eval_metric, eval_metric)

            test_env_name = test_env.replace("Env", "").replace("Test", "")

            env_types = ["InformationSeeking", "Collaboration", "PerspectiveTaking"]
            for env_type in env_types:
                if env_type in test_env_name:
                    test_env_name = test_env_name.replace(env_type, "")
                    test_env_name += f"\n({env_type})"

            if per_seed:
                for s_i, seed_ys in enumerate(ys_same_len):
                    label = label_parser(model_id) + f"_{test_env}" + f"(s:{s_i})"

                    if eval_smooth_factor:
                        seed_ys = smooth(seed_ys, eval_smooth_factor)

                    curve_ID = f"{model_id}_{test_env}_{s_i}"

                    to_plot_dict[curve_ID] = {
                        "label": label,
                        "steps": steps,
                        "means": seed_ys,
                        "stds": np.zeros_like(seed_ys),
                        "ys": ys_same_len,
                        "color": color
                    }
            else:
                if len(test_envs_sorted) > 1:
                    label = label_parser(model_id) + f"_{test_env}"
                else:
                    label = label_parser(model_id)

                if study_train:
                    label=label+"(test)"

                if not plot_only_aggregated_test:

                    if label in color_dict:
                        color = color_dict[label]
                    else:
                        color = default_colors[model_i*len(test_envs_sorted)+env_i]

                    if legend_show_n_seeds:
                        label = label + "({})".format(n_seeds)

                    if eval_smooth_factor:
                        means = smooth(means, eval_smooth_factor)
                        stds = smooth(stds, eval_smooth_factor)
                        sems = smooth(sems, eval_smooth_factor)

                    to_plot_dict[model_id+f"_{test_env}"] = {
                        "label": label,
                        "steps": steps,
                        "means": means,
                        "stds": stds,
                        "sems": sems,
                        "ys": ys_same_len,
                        "color": color,
                    }

        if plot_aggregated_test:

            ys_same_len = agg_means
            agg_means = np.array(agg_means)
            agg_mean = agg_means.mean(axis=0)
            agg_std = agg_means.std(axis=0)  # std
            agg_sems = ...

            label = label_parser(model_id)

            if study_train:
                label = label + "(train)"

            if eval_smooth_factor:
                agg_mean = smooth(agg_mean, eval_smooth_factor)
                agg_std = smooth(agg_std, eval_smooth_factor)
                agg_sems = smooth(agg_sems, eval_smooth_factor)

            if per_seed:
                print("Not smooth aggregated because of per seed")
                for s_i, (seed_ys, seed_st) in enumerate(zip(agg_mean, agg_std)):
                    seed_c = default_colors[model_i + s_i]
                    label = str(s_i)

                    to_plot_dict[curve_ID] = {
                        "label": label,
                        "steps": steps,
                        "means": seed_ys,
                        "stds": seed_st,
                        "ys": ys_same_len,
                        "color": color
                    }
            else:

                if label in color_dict:
                    color = color_dict[label]

                else:
                    color = default_colors[model_i]

                to_plot_dict[model_id+"_agg_test"] = {
                    "label": label,
                    "steps": steps,
                    "means": agg_mean,
                    "stds": agg_std,
                    "sems": agg_sems,
                    "ys": ys_same_len,
                    "color": color,
                }


# should be labels
to_scatter_dict = {}

if to_compare is not None:
    for comp_i, (a_model_id, b_model_id, color) in enumerate(to_compare):

        a_data = to_plot_dict[a_model_id]["ys"]
        b_data = to_plot_dict[b_model_id]["ys"]

        steps = to_plot_dict[a_model_id]["steps"]

        if color == "auto_color":
            color = to_plot_dict[a_model_id]["color"]

        if len(a_data[0]) != len(b_data[0]):
            # extract steps present in both
            a_steps = to_plot_dict[a_model_id]["steps"]
            b_steps = to_plot_dict[b_model_id]["steps"]

            steps = list(set(a_steps) & set(b_steps))

            # keep only the values for those steps
            mask_a = [(a_s in steps) for a_s in a_steps]
            a_data = np.array(a_data)[:, mask_a]

            mask_b = [(b_s in steps) for b_s in b_steps]
            b_data = np.array(b_data)[:, mask_b]

        p = stats.ttest_ind(
            a_data,
            b_data,
            equal_var=False
        ).pvalue

        steps = [s for s, p in zip(steps, p) if p < test_p]

        ys = [1.02+0.02*comp_i]*len(steps)

        to_scatter_dict[f"compare_{a_model_id}_{b_model_id}"] = {
            "label": "",
            "xs": steps,
            "ys": ys,
            "color": color,
        }

for scatter_i, (scatter_ID, scatter_id_data) in enumerate(to_scatter_dict.items()):

    # unpack data
    label, xs, ys, color = (
        scatter_id_data["label"],
        scatter_id_data["xs"],
        scatter_id_data["ys"],
        scatter_id_data["color"],
    )

    xlabel = f"Env steps (1e6)"

    plt.scatter(
        xs,
        ys,
        color=color,
        marker="x"
    )

    summary_dict[label] = xs[-1]
    summary_dict_colors[label] = color

for curve_i, (curve_ID, model_id_data) in enumerate(to_plot_dict.items()):

    # unpack data
    label, steps, means, stds, sems, ys, color = (
        model_id_data["label"],
        model_id_data["steps"],
        model_id_data["means"],
        model_id_data["stds"],
        model_id_data["sems"],
        model_id_data["ys"],
        model_id_data["color"]
    )

    # if smooth_factor:
    #     means = smooth(means, smooth_factor)
    #     stds = smooth(stds, smooth_factor)

    if legend_show_n_seeds:
        n_seeds = len(ys)
        label = label+"({})".format(n_seeds)


    x_lim = max(steps[-1], x_lim)
    x_lim = min(max_x_lim, x_lim)

    xlabel = f"Env steps (1e6)"


    plot_with_shade(
        0, ax[0], steps, means, stds, color, color, label,
        # 0, ax[0], steps, means, sems, color, color, label,
        legend=draw_legend,
        xlim=[0, x_lim],
        ylim=[0, max_y],
        xlabel=xlabel,
        ylabel=ylabel,
        title=None,
        labelsize=fontsize,
        fontsize=fontsize,
        title_fontsize=title_fontsize,
        linewidth=linewidth,
        leg_linewidth=5,
        leg_args=leg_args,
        xnbins=xnbins,
        ynbins=ynbins,
    )

    summary_dict[label] = means[-1]
    summary_dict_colors[label] = color

# plot static lines
if static_lines:
    for label, (mean, std, color) in static_lines.items():

        if label == "":
            label = None

        plot_with_shade(
            0, ax[0], steps, np.array([mean]*len(steps)), np.array([std]*len(steps)), color, color, label,
            legend=True,
            xlim=[0, x_lim],
            ylim=[0, 1.0],
            xlabel=f"Env steps (1e6)",
            ylabel=ylabel,
            linestyle=":",
            leg_args=leg_args,
            fontsize=fontsize,
            title_fontsize=title_fontsize,
            xnbins=xnbins,
            ynbins=ynbins,
        )


if plot_path:
    f.savefig(plot_path+".png")
    f.savefig(plot_path+".svg")
    print(f"Plot saved to {plot_path}.[png/svg].")


# Summary dict
if len(summary_dict) == 0:
    raise ValueError(f"No experiments found for {load_pattern}.")
else:
    # print summary
    best = max(summary_dict.values())

    pc = 0.3
    n = int(len(summary_dict)*pc)
    print("top n: ", n)

    top_pc = sorted(summary_dict.values())[-n:]
    bottom_pc = sorted(summary_dict.values())[:n]

    print("legend:")
    cprint("\tbest", "green")
    cprint("\ttop {} %".format(pc), "blue")
    cprint("\tbottom {} %".format(pc), "red")
    print("\tothers")
    print()

    for l, p in sorted(summary_dict.items(), key=lambda kv: kv[1]):

        c = summary_dict_colors[l]
        if p == best:
            cprint("label: {} ({})".format(l, c), "green")
            cprint("\t {}:{}".format(metric, p), "green")

        elif p in top_pc:
            cprint("label: {} ({})".format(l, c), "blue")
            cprint("\t {}:{}".format(metric, p), "blue")

        elif p in bottom_pc:
            cprint("label: {} ({})".format(l, c), "red")
            cprint("\t {}:{}".format(metric, p), "red")

        else:
            print("label: {} ({})".format(l, c))
            print("\t {}:{}".format(metric, p))


if show_plot:
    plt.tight_layout()
    plt.subplots_adjust(hspace=1.5, wspace=0.5, left=0.1, right=0.9, bottom=0.1, top=0.85)
    plt.suptitle(super_title)
    plt.show()
plt.close()