Spaces:
Runtime error
Runtime error
| import functools | |
| import operator | |
| import pandas as pd | |
| def correlations_for_group(group): | |
| REL_METRICS = [col.split("_")[0] for col in group.columns if col.endswith("_related")] | |
| IND_METRICS = [col.split("_")[0] for col in group.columns if col.endswith("_independent")] | |
| AGGR_METRICS = [col.split("_")[0] for col in group.columns if col.endswith("_aggr")] | |
| correlations = [] | |
| for rel_metric in REL_METRICS: | |
| for ind_metric in IND_METRICS: | |
| correlations.append({ | |
| f"rel_{rel_metric}_ind_{ind_metric}_pearson": group[f"{rel_metric}_related"].corr( | |
| group[f"{ind_metric}_independent"], method="pearson"), | |
| f"rel_{rel_metric}_ind_{ind_metric}_spearman": group[f"{rel_metric}_related"].corr( | |
| group[f"{ind_metric}_independent"], method="spearman"), | |
| }) | |
| for aggr_metric in AGGR_METRICS: | |
| correlations.append({ | |
| f"rel_{rel_metric}_aggr_{aggr_metric}_pearson": group[f"{rel_metric}_related"].corr( | |
| group[f"{aggr_metric}_aggr"], method="pearson"), | |
| f"rel_{rel_metric}_aggr_{aggr_metric}_spearman": group[f"{rel_metric}_related"].corr( | |
| group[f"{aggr_metric}_aggr"], method="spearman"), | |
| }) | |
| return pd.Series(functools.reduce(operator.ior, correlations, {})) | |
| def split_metrics_string(s): | |
| tokens = s.split("_") | |
| return tokens[1], tokens[3] | |
| def get_correlations_df(df, right_side): | |
| correlations_raw = correlations_for_group(df) | |
| idx = list(set("_".join(col.split("_")[:-1]) for col in correlations_raw.index if right_side in col)) | |
| data = [] | |
| for metrics in idx: | |
| data.append( | |
| {"metrics": metrics, | |
| "spearman": correlations_raw[f"{metrics}_spearman"], | |
| "pearson": correlations_raw[f"{metrics}_pearson"], | |
| } | |
| ) | |
| result = pd.DataFrame.from_records(data=data, index="metrics").sort_index() | |
| result.index = pd.MultiIndex.from_tuples(result.index.map(split_metrics_string).tolist()) | |
| result.index.set_names(["relative", "independent"], inplace=True) | |
| return result | |
| def get_correlations_for_groups(df, right_side): | |
| correlations = {"all": get_correlations_df(df, right_side=right_side)} | |
| for e2s in (False, True): | |
| for s2e in (False, True): | |
| group = "golden" | |
| if e2s: | |
| group += "+e2s" | |
| if s2e: | |
| group += "+s2e" | |
| subdf = df[((df["end_to_start"] == e2s) & (df["start_to_end"] == s2e)) | ( | |
| (df["end_to_start"] == False) & (df["start_to_end"] == False))] | |
| subdf_corr = get_correlations_df(subdf, right_side=right_side) | |
| correlations[group] = subdf_corr | |
| correlations = pd.concat(correlations, axis=1) | |
| return correlations | |