Petr Tsvetkov commited on
Commit
4017643
β€’
1 Parent(s): f26a894

# of deletions rel to initial message length

Browse files
Files changed (2) hide show
  1. change_visualizer.py +1 -1
  2. statistics.py +6 -4
change_visualizer.py CHANGED
@@ -82,7 +82,7 @@ if __name__ == '__main__':
82
  def layout_for_statistics(statistics_group_name):
83
  gr.Markdown(f"### {statistics_group_name}")
84
  stats = STATISTICS[statistics_group_name]
85
- gr.Number(label="Average deletions number (rel to the result length)", interactive=False,
86
  value=stats['deletions'].mean().item(), precision=3)
87
  gr.Number(label="Average insertions number (rel to the result length)", interactive=False,
88
  value=stats['insertions'].mean().item(), precision=3)
 
82
  def layout_for_statistics(statistics_group_name):
83
  gr.Markdown(f"### {statistics_group_name}")
84
  stats = STATISTICS[statistics_group_name]
85
+ gr.Number(label="Average deletions number (rel to the initial message length)", interactive=False,
86
  value=stats['deletions'].mean().item(), precision=3)
87
  gr.Number(label="Average insertions number (rel to the result length)", interactive=False,
88
  value=stats['insertions'].mean().item(), precision=3)
statistics.py CHANGED
@@ -2,7 +2,7 @@ import numpy as np
2
  import pandas as pd
3
 
4
 
5
- def get_statistics_for_df(df: pd.DataFrame, end_col, annotated_col):
6
  relative_deletions = []
7
  relative_insertions = []
8
  relative_changes = []
@@ -18,8 +18,9 @@ def get_statistics_for_df(df: pd.DataFrame, end_col, annotated_col):
18
 
19
  sum_changes = sum_deletions + sum_insertions
20
  end_length = len(row[end_col])
 
21
 
22
- relative_deletions.append(sum_deletions / end_length)
23
  relative_insertions.append(sum_insertions / end_length)
24
  relative_changes.append(sum_changes / end_length)
25
 
@@ -31,8 +32,9 @@ def get_statistics_for_df(df: pd.DataFrame, end_col, annotated_col):
31
 
32
 
33
  def get_statistics_for_manual_df(df):
34
- return get_statistics_for_df(df, end_col='commit_msg_end', annotated_col='annotated_diff')
 
35
 
36
 
37
  def get_statistics_for_synthetic_df(df):
38
- return get_statistics_for_df(df, end_col='reference', annotated_col='annotated_diff')
 
2
  import pandas as pd
3
 
4
 
5
+ def get_statistics_for_df(df: pd.DataFrame, start_col, end_col, annotated_col):
6
  relative_deletions = []
7
  relative_insertions = []
8
  relative_changes = []
 
18
 
19
  sum_changes = sum_deletions + sum_insertions
20
  end_length = len(row[end_col])
21
+ start_length = len(row[start_col])
22
 
23
+ relative_deletions.append(sum_deletions / start_length)
24
  relative_insertions.append(sum_insertions / end_length)
25
  relative_changes.append(sum_changes / end_length)
26
 
 
32
 
33
 
34
  def get_statistics_for_manual_df(df):
35
+ return get_statistics_for_df(df, start_col="commit_msg_start", end_col='commit_msg_end',
36
+ annotated_col='annotated_diff')
37
 
38
 
39
  def get_statistics_for_synthetic_df(df):
40
+ return get_statistics_for_df(df, start_col="commit_msg_start", end_col='reference', annotated_col='annotated_diff')