import functools import operator import evaluate import pandas as pd from tqdm import tqdm import config from api_wrappers import hf_data_loader from custom_metrics import gpt_eval BLEU = evaluate.load('bleu', cache_dir=config.CACHE_DIR) def bleu_fn(pred, ref): return BLEU.compute(predictions=[pred], references=[ref])["bleu"] METEOR = evaluate.load('meteor', cache_dir=config.CACHE_DIR) def meteor_fn(pred, ref): return METEOR.compute(predictions=[pred], references=[ref])["meteor"] ROUGE = evaluate.load('rouge', cache_dir=config.CACHE_DIR) def rouge1_fn(pred, ref): return ROUGE.compute(predictions=[pred], references=[ref])["rouge1"] def rouge2_fn(pred, ref): return ROUGE.compute(predictions=[pred], references=[ref])["rouge2"] def rougeL_fn(pred, ref): return ROUGE.compute(predictions=[pred], references=[ref])["rougeL"] BERTSCORE = evaluate.load('bertscore', cache_dir=config.CACHE_DIR) def bertscore_fn(pred, ref): return BERTSCORE.compute(predictions=[pred], references=[ref], model_type="distilbert-base-uncased")["f1"][0] def gptscore_fn(pred, ref): return gpt_eval.compute(prediction=pred, reference=ref) CHRF = evaluate.load("chrf") def chrf_fn(pred, ref): return CHRF.compute(predictions=[pred], references=[[ref]])["score"] TER = evaluate.load("ter") def ter_fn(pred, ref): return TER.compute(predictions=[pred], references=[[ref]])["score"] METRICS = { # "gptscore": gptscore_fn, "bleu": bleu_fn, "meteor": meteor_fn, "rouge1": rouge1_fn, "rouge2": rouge2_fn, "rougeL": rougeL_fn, "bertscore": bertscore_fn, "chrF": chrf_fn, "ter": ter_fn } def attach_references(df): reference_df = hf_data_loader.load_full_commit_as_pandas().set_index(["hash", "repo"])[["reference"]] df = df.set_index(["hash", "repo"]) return df.join(other=reference_df, how="left").reset_index() def compute_metrics(df): tqdm.pandas() def apply_metric_fn_to_row(row, fn, col_pred, col_ref): return fn(row[col_pred], row[col_ref]) for metric in METRICS: print(f"Computing {metric}") metric_fn = METRICS[metric] df[f"{metric}_related"] = df.progress_apply( lambda row: apply_metric_fn_to_row(row=row, fn=metric_fn, col_pred="commit_msg_start", col_ref="commit_msg_end"), axis=1 ) df[f"{metric}_independent"] = df.progress_apply( lambda row: apply_metric_fn_to_row(row=row, fn=metric_fn, col_pred="commit_msg_start", col_ref="reference"), axis=1 ) df[f"{metric}_pearson"] = df[f"{metric}_related"].corr(df[f"{metric}_independent"], method="pearson") df[f"{metric}_spearman"] = df[f"{metric}_related"].corr(df[f"{metric}_independent"], method="spearman") return df def correlations_for_group(group): correlations = [] for metric in METRICS: correlations.append({ f"{metric}_pearson": group[f"{metric}_related"].corr(group[f"{metric}_independent"], method="pearson"), f"{metric}_spearman": group[f"{metric}_related"].corr(group[f"{metric}_independent"], method="spearman") }) for other_metric in METRICS: correlations.append({ f"ind_{metric}_rel_{other_metric}_pearson": group[f"{other_metric}_related"].corr( group[f"{metric}_independent"], method="pearson"), f"ind_{metric}_rel_{other_metric}_spearman": group[f"{other_metric}_related"].corr( group[f"{metric}_independent"], method="spearman") }) return pd.Series(functools.reduce(operator.ior, correlations, {})) def compute_correlations(df: pd.DataFrame): grouped_df = df.groupby(by=["end_to_start", "start_to_end"]) correlations = grouped_df.apply(correlations_for_group, include_groups=False) return correlations def transform(df): print("Computing metrics") df = attach_references(df) df = compute_metrics(df) correlations_for_groups = compute_correlations(df) correlations_for_groups.to_csv(config.METRICS_CORRELATIONS_ARTIFACT) df.to_csv(config.SYNTHETIC_DATASET_ARTIFACT) print("Done") return df def main(): df = pd.read_csv(config.START_TO_END_ARTIFACT, index_col=[0]) transform(df) if __name__ == '__main__': main()