Petr Tsvetkov commited on
Commit
c151bb0
β€’
1 Parent(s): 0b259d2

Fix the statistics in visualization

Browse files
Files changed (2) hide show
  1. change_visualizer.py +2 -2
  2. statistics.py +3 -11
change_visualizer.py CHANGED
@@ -9,8 +9,8 @@ n_diffs_manual = len(df_manual)
9
  df_synthetic = generate_annotated_diffs.synthetic_data_with_annotated_diffs()
10
  n_diffs_synthetic = len(df_synthetic)
11
 
12
- STATISTICS = {"manual": statistics.get_statistics_for_manual_df(df_manual),
13
- "synthetic": statistics.get_statistics_for_synthetic_df(df_synthetic)}
14
 
15
 
16
  def update_dataset_view(diff_idx):
 
9
  df_synthetic = generate_annotated_diffs.synthetic_data_with_annotated_diffs()
10
  n_diffs_synthetic = len(df_synthetic)
11
 
12
+ STATISTICS = {"manual": statistics.get_statistics_for_df(df_manual),
13
+ "synthetic": statistics.get_statistics_for_df(df_synthetic)}
14
 
15
 
16
  def update_dataset_view(diff_idx):
statistics.py CHANGED
@@ -22,18 +22,10 @@ def get_statistics(start_msg, end_msg, annotated_msg):
22
  }
23
 
24
 
25
- def get_statistics_for_df(df: pd.DataFrame, start_col, end_col, annotated_col):
26
- stats = [get_statistics(row[start_col], row[end_col], row[annotated_col]) for _, row in df.iterrows()]
 
27
 
28
  assert len(stats) > 0
29
 
30
  return {stat_name: np.asarray([e[stat_name] for e in stats]) for stat_name in stats[0]}
31
-
32
-
33
- def get_statistics_for_manual_df(df):
34
- return get_statistics_for_df(df, start_col="commit_msg_start", end_col='commit_msg_end',
35
- annotated_col='annotated_diff')
36
-
37
-
38
- def get_statistics_for_synthetic_df(df):
39
- return get_statistics_for_df(df, start_col="initial_msg_pred", end_col='reference', annotated_col='annotated_diff')
 
22
  }
23
 
24
 
25
+ def get_statistics_for_df(df: pd.DataFrame):
26
+ stats = [get_statistics(row["commit_msg_start"], row["commit_msg_end"], row["annotated_diff"]) for _, row in
27
+ df.iterrows()]
28
 
29
  assert len(stats) > 0
30
 
31
  return {stat_name: np.asarray([e[stat_name] for e in stats]) for stat_name in stats[0]}