Spaces:

JetBrains-Research
/

commit-message-editing-visualization

Runtime error

Petr Tsvetkov

Pretty-print all the correlations in the visualization app

a01d3ba over 1 year ago

1.54 kB

	import pandas as pd

	from generation_steps.metrics_analysis import correlations_for_group


	def split_metrics_string(s):
	tokens = s.split("_")
	return tokens[1], tokens[3]


	def get_ref_only_correlations_df(df):
	correlations_raw = correlations_for_group(df)

	idx = list(set("_".join(col.split("_")[:-1]) for col in correlations_raw.index))

	data = []
	for metrics in idx:
	data.append(
	{"metrics": metrics,
	"spearman": correlations_raw[f"{metrics}_spearman"],
	"pearson": correlations_raw[f"{metrics}_pearson"],
	}
	)

	result = pd.DataFrame.from_records(data=data, index="metrics").sort_index()
	result.index = pd.MultiIndex.from_tuples(result.index.map(split_metrics_string).tolist())
	result.index.set_names(["relative", "independent"], inplace=True)

	return result


	def get_ref_only_correlations_for_groups(df):
	noref_correlations = {"all": get_ref_only_correlations_df(df)}

	for e2s in (False, True):
	for s2e in (False, True):
	suffix = ""
	if e2s:
	suffix += "+e2s"
	if s2e:
	suffix += "+s2e"
	if suffix == "":
	suffix = "golden"

	subdf = df[(df["end_to_start"] == e2s) & (df["start_to_end"] == s2e)]
	subdf_noref_corr = get_ref_only_correlations_df(subdf)
	noref_correlations[suffix] = subdf_noref_corr

	noref_correlations = pd.concat(noref_correlations, axis=1)
	return noref_correlations