Petr Tsvetkov
Pretty-print all the correlations in the visualization app
a01d3ba
raw
history blame
1.54 kB
import pandas as pd
from generation_steps.metrics_analysis import correlations_for_group
def split_metrics_string(s):
tokens = s.split("_")
return tokens[1], tokens[3]
def get_ref_only_correlations_df(df):
correlations_raw = correlations_for_group(df)
idx = list(set("_".join(col.split("_")[:-1]) for col in correlations_raw.index))
data = []
for metrics in idx:
data.append(
{"metrics": metrics,
"spearman": correlations_raw[f"{metrics}_spearman"],
"pearson": correlations_raw[f"{metrics}_pearson"],
}
)
result = pd.DataFrame.from_records(data=data, index="metrics").sort_index()
result.index = pd.MultiIndex.from_tuples(result.index.map(split_metrics_string).tolist())
result.index.set_names(["relative", "independent"], inplace=True)
return result
def get_ref_only_correlations_for_groups(df):
noref_correlations = {"all": get_ref_only_correlations_df(df)}
for e2s in (False, True):
for s2e in (False, True):
suffix = ""
if e2s:
suffix += "+e2s"
if s2e:
suffix += "+s2e"
if suffix == "":
suffix = "golden"
subdf = df[(df["end_to_start"] == e2s) & (df["start_to_end"] == s2e)]
subdf_noref_corr = get_ref_only_correlations_df(subdf)
noref_correlations[suffix] = subdf_noref_corr
noref_correlations = pd.concat(noref_correlations, axis=1)
return noref_correlations