Spaces:

JetBrains-Research
/

commit-message-editing-visualization

Sleeping

App Files Files Community

Petr Tsvetkov commited on May 3

Commit

303303b

•

1 Parent(s): ff76f88

Add distribution charts; add more detailed statistics; compute multi-reference TER as mean of TERs for each reference to improve the performance

Browse files

Files changed (4) hide show

change_visualizer.py +76 -9
generation_steps/metrics_analysis.py +6 -4
requirements.txt +4 -0
statistics.py +31 -14

change_visualizer.py CHANGED Viewed

@@ -12,8 +12,35 @@ n_diffs_manual = len(df_manual)
 df_synthetic = generate_annotated_diffs.synthetic_data_with_annotated_diffs()
 n_diffs_synthetic = len(df_synthetic)
 STATISTICS = {"manual": statistics.get_statistics_for_df(df_manual),
-              "synthetic": statistics.get_statistics_for_df(df_synthetic)}
 def update_dataset_view(diff_idx, df):
@@ -92,20 +119,60 @@ if __name__ == '__main__':
             def layout_for_statistics(statistics_group_name):
                 gr.Markdown(f"### {statistics_group_name}")
                 stats = STATISTICS[statistics_group_name]
-                gr.Number(label="Average deletions number (rel to the initial message length)", interactive=False,
-                          value=stats['deletions'].mean().item(), precision=3)
-                gr.Number(label="Average insertions number (rel to the result length)", interactive=False,
-                          value=stats['insertions'].mean().item(), precision=3)
-                gr.Number(label="Average changes number (rel to the result length)", interactive=False,
-                          value=stats['changes'].mean().item(), precision=3)
             with gr.Row():
-                with gr.Column(scale=1):
                     layout_for_statistics("manual")
                 with gr.Column(scale=1):
-                    layout_for_statistics("synthetic")
             gr.Markdown(f"### Reference-only correlations")
             gr.Markdown(value=analysis_util.get_correlations_for_groups(df_synthetic, right_side="ind").to_markdown())

 df_synthetic = generate_annotated_diffs.synthetic_data_with_annotated_diffs()
 n_diffs_synthetic = len(df_synthetic)
+def golden():
+    return df_manual
+def e2s():
+    return df_synthetic[(df_synthetic['end_to_start'] == True) & (df_synthetic['start_to_end'] == False)]
+def s2e():
+    return df_synthetic[(df_synthetic['end_to_start'] == False) & (df_synthetic['start_to_end'] == True)]
+def e2s_s2e():
+    return df_synthetic[(df_synthetic['end_to_start'] == True) & (df_synthetic['start_to_end'] == True)]
+def synthetic():
+    return df_synthetic[(df_synthetic['end_to_start'] == True) | (df_synthetic['start_to_end'] == True)]
 STATISTICS = {"manual": statistics.get_statistics_for_df(df_manual),
+              "e2s": statistics.get_statistics_for_df(e2s()),
+              "s2e": statistics.get_statistics_for_df(s2e()),
+              "e2s_s2e": statistics.get_statistics_for_df(e2s_s2e()),
+              "synthetic": statistics.get_statistics_for_df(synthetic()),
+              "all": statistics.get_statistics_for_df(df_synthetic)}
+STAT_NAMES = list(STATISTICS['manual'].keys())
 def update_dataset_view(diff_idx, df):
             def layout_for_statistics(statistics_group_name):
                 gr.Markdown(f"### {statistics_group_name}")
                 stats = STATISTICS[statistics_group_name]
+                gr.Number(label="Count", interactive=False,
+                          value=len(stats['deletions_norm']), min_width=00)
+                gr.Number(label="Avg deletions number (rel to the initial msg length)", interactive=False,
+                          value=stats['deletions_norm'].mean().item(), precision=3, min_width=00)
+                gr.Number(label="Avg insertions number (rel to the result length)", interactive=False,
+                          value=stats['insertions_norm'].mean().item(), precision=3, min_width=00)
+                gr.Number(label="Avg changes number (rel to the initial msg length)", interactive=False,
+                          value=stats['changes_norm'].mean().item(), precision=3, min_width=00)
+                gr.Number(label="Avg deletions number", interactive=False,
+                          value=stats['deletions'].mean().item(), precision=3, min_width=00)
+                gr.Number(label="Avg insertions number", interactive=False,
+                          value=stats['insertions'].mean().item(), precision=3, min_width=00)
+                gr.Number(label="Avg changes number", interactive=False,
+                          value=stats['changes'].mean().item(), precision=3, min_width=00)
             with gr.Row():
+                with gr.Column(scale=1, min_width=100):
                     layout_for_statistics("manual")
+                with gr.Column(scale=1, min_width=100):
+                    layout_for_statistics("e2s")
+                with gr.Column(scale=1, min_width=100):
+                    layout_for_statistics("s2e")
+                with gr.Column(scale=1, min_width=100):
+                    layout_for_statistics("e2s_s2e")
+                with gr.Column(scale=1, min_width=100):
+                    layout_for_statistics("synthetic")
+                with gr.Column(scale=1, min_width=100):
+                    layout_for_statistics("all")
+            with gr.Row():
                 with gr.Column(scale=1):
+                    for stat_name in filter(lambda s: "_norm" not in s, STAT_NAMES):
+                        chart = statistics.build_plotly_chart(
+                            stat_golden=STATISTICS['manual'][stat_name],
+                            stat_e2s=STATISTICS['e2s'][stat_name],
+                            stat_s2e=STATISTICS['s2e'][stat_name],
+                            stat_e2s_s2e=STATISTICS['e2s_s2e'][stat_name],
+                            stat_name=stat_name
+                        )
+                        gr.Plot(value=chart)
+                with gr.Column(scale=1):
+                    with gr.Column(scale=1):
+                        for stat_name in filter(lambda s: "_norm" in s, STAT_NAMES):
+                            chart = statistics.build_plotly_chart(
+                                stat_golden=STATISTICS['manual'][stat_name],
+                                stat_e2s=STATISTICS['e2s'][stat_name],
+                                stat_s2e=STATISTICS['s2e'][stat_name],
+                                stat_e2s_s2e=STATISTICS['e2s_s2e'][stat_name],
+                                stat_name=stat_name
+                            )
+                            gr.Plot(value=chart)
             gr.Markdown(f"### Reference-only correlations")
             gr.Markdown(value=analysis_util.get_correlations_for_groups(df_synthetic, right_side="ind").to_markdown())

generation_steps/metrics_analysis.py CHANGED Viewed

@@ -72,7 +72,8 @@ TER = evaluate.load("ter")
 def ter_fn(pred, ref, **kwargs):
     if "refs" in kwargs:
-        return TER.compute(predictions=[pred], references=[kwargs["refs"]])["score"]
     return TER.compute(predictions=[pred], references=[[ref]])["score"]
@@ -130,10 +131,10 @@ def gptscore_noref_5_fn(pred, ref, **kwargs):
 IND_METRICS = {
     "editdist": edit_distance_fn,
     "editdist-norm": edit_distance_norm_fn,
-    "gptscore-ref-1-req": gptscore_ref_1_fn,
     # "gptscore-ref-3-req": gptscore_ref_3_fn,
     # "gptscore-ref-5-req": gptscore_ref_5_fn,
-    "gptscore-noref-1-req": gptscore_noref_1_fn,
     # "gptscore-noref-3-req": gptscore_noref_3_fn,
     # "gptscore-noref-5-req": gptscore_noref_5_fn,
     "bleu": bleu_fn,
@@ -174,7 +175,8 @@ def compute_metrics(df):
         values = []
         for i, row in tqdm(df.iterrows(), total=len(df)):
             others = df[(df["hash"] == row["hash"]) & (df["repo"] == row["repo"]) & (
-                    df["commit_msg_start"] != row["commit_msg_start"])]['commit_msg_end'].to_list()
             others.append(row["reference"])
             others = list(set(others))
             metric_fn = AGGR_METRICS[metric]

 def ter_fn(pred, ref, **kwargs):
     if "refs" in kwargs:
+        scores = [TER.compute(predictions=[pred], references=[[ref]])["score"] for ref in kwargs["refs"]]
+        return sum(scores) / len(scores)
     return TER.compute(predictions=[pred], references=[[ref]])["score"]
 IND_METRICS = {
     "editdist": edit_distance_fn,
     "editdist-norm": edit_distance_norm_fn,
+    # "gptscore-ref-1-req": gptscore_ref_1_fn,
     # "gptscore-ref-3-req": gptscore_ref_3_fn,
     # "gptscore-ref-5-req": gptscore_ref_5_fn,
+    # "gptscore-noref-1-req": gptscore_noref_1_fn,
     # "gptscore-noref-3-req": gptscore_noref_3_fn,
     # "gptscore-noref-5-req": gptscore_noref_5_fn,
     "bleu": bleu_fn,
         values = []
         for i, row in tqdm(df.iterrows(), total=len(df)):
             others = df[(df["hash"] == row["hash"]) & (df["repo"] == row["repo"]) & (
+                    df["commit_msg_start"] != row["commit_msg_start"]) & (
+                    df["commit_msg_end"] != row["commit_msg_end"])]['commit_msg_end'].to_list()
             others.append(row["reference"])
             others = list(set(others))
             metric_fn = AGGR_METRICS[metric]

requirements.txt CHANGED Viewed

@@ -160,3 +160,7 @@ widgetsnbextension==4.0.10
 xxhash==3.4.1
 yarl==1.9.4
 zipp==3.18.1

 xxhash==3.4.1
 yarl==1.9.4
 zipp==3.18.1
+plotly==5.22.0
+tenacity==8.2.3
+Levenshtein==0.25.1

statistics.py CHANGED Viewed

@@ -1,24 +1,27 @@
 import numpy as np
 import pandas as pd
 def get_statistics(start_msg, end_msg, annotated_msg):
-    sum_deletions = 0
-    sum_insertions = 0
-    for text, change_type in annotated_msg:
-        if change_type == '-':
-            sum_deletions += len(text)
-        elif change_type == '+':
-            sum_insertions += len(text)
-    sum_changes = sum_deletions + sum_insertions
-    end_length = len(end_msg)
-    start_length = len(start_msg)
     return {
-        "deletions": sum_deletions / start_length,
-        "insertions": sum_insertions / end_length,
-        "changes": sum_changes / end_length
     }
@@ -29,3 +32,17 @@ def get_statistics_for_df(df: pd.DataFrame):
     assert len(stats) > 0
     return {stat_name: np.asarray([e[stat_name] for e in stats]) for stat_name in stats[0]}

+import Levenshtein
 import numpy as np
 import pandas as pd
+import plotly.figure_factory as ff
 def get_statistics(start_msg, end_msg, annotated_msg):
+    edit_ops = Levenshtein.editops(start_msg, end_msg)
+    n_deletes = sum([1 if op == 'delete' else 0 for op, _, _ in edit_ops])
+    n_inserts = sum([1 if op == 'insert' else 0 for op, _, _ in edit_ops])
+    n_replaces = sum([1 if op == 'replace' else 0 for op, _, _ in edit_ops])
+    n_changes = n_deletes + n_inserts + n_replaces
+    n_deletes += n_replaces
+    n_inserts += n_replaces
     return {
+        "deletions": n_deletes,
+        "insertions": n_inserts,
+        "changes": n_changes,
+        "deletions_norm": n_deletes / len(start_msg),
+        "insertions_norm": n_inserts / len(end_msg),
+        "changes_norm": n_changes / len(end_msg),
     }
     assert len(stats) > 0
     return {stat_name: np.asarray([e[stat_name] for e in stats]) for stat_name in stats[0]}
+def build_plotly_chart(stat_golden, stat_e2s, stat_s2e, stat_e2s_s2e, stat_name):
+    hist_data = [stat_golden, stat_e2s, stat_s2e, stat_e2s_s2e,
+                 np.concatenate((stat_e2s, stat_s2e, stat_e2s_s2e), axis=0)]
+    group_labels = ['Golden', 'e2s', 's2e', 'e2s+s 2e', 'Synthetic']
+    fig = ff.create_distplot(hist_data, group_labels,
+                             bin_size=.1, show_rug=False, show_hist=True)
+    fig.update_layout(title_text=stat_name)
+    return fig