Petr Tsvetkov commited on
Commit
a7bba68
1 Parent(s): 7ab7be2

Latest version of the code; config updated to JetBrains-Research

Browse files
analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
analysis_util.py CHANGED
@@ -59,15 +59,14 @@ def get_correlations_for_groups(df, right_side):
59
 
60
  for e2s in (False, True):
61
  for s2e in (False, True):
62
- group = ""
63
  if e2s:
64
  group += "+e2s"
65
  if s2e:
66
  group += "+s2e"
67
- if group == "":
68
- group = "golden"
69
 
70
- subdf = df[(df["end_to_start"] == e2s) & (df["start_to_end"] == s2e)]
 
71
  subdf_corr = get_correlations_df(subdf, right_side=right_side)
72
  correlations[group] = subdf_corr
73
 
 
59
 
60
  for e2s in (False, True):
61
  for s2e in (False, True):
62
+ group = "golden"
63
  if e2s:
64
  group += "+e2s"
65
  if s2e:
66
  group += "+s2e"
 
 
67
 
68
+ subdf = df[((df["end_to_start"] == e2s) & (df["start_to_end"] == s2e)) | (
69
+ (df["end_to_start"] == False) & (df["start_to_end"] == False))]
70
  subdf_corr = get_correlations_df(subdf, right_side=right_side)
71
  correlations[group] = subdf_corr
72
 
chart_processing.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
config.py CHANGED
@@ -8,7 +8,7 @@ GRAZIE_TIMEOUT_SEC = 1.0
8
 
9
  HF_TOKEN = os.environ.get('HF_TOKEN')
10
 
11
- HF_RAW_DATASET_NAME = "petrtsv-jb/commit-msg-rewriting"
12
  HF_RAW_DATASET_SPLIT = 'train'
13
 
14
  HF_FULL_COMMITS_DATASET_NAME = "JetBrains-Research/lca-commit-message-generation"
@@ -19,7 +19,7 @@ HF_PREDICTIONS_DATASET_NAME = "JetBrains-Research/lca-results"
19
  HF_PREDICTIONS_DATASET_SUBNAME = "cmg_gpt_4_0613"
20
  HF_PREDICTIONS_DATASET_SPLIT = "test"
21
 
22
- HF_SYNTHETIC_DATASET_NAME = "petrtsv-jb/synthetic-commit-msg-rewriting"
23
  HF_SYNTHETIC_DATASET_SPLIT = 'train'
24
 
25
  LLM_MODEL = "gpt-4-1106-preview"
 
8
 
9
  HF_TOKEN = os.environ.get('HF_TOKEN')
10
 
11
+ HF_RAW_DATASET_NAME = "JetBrains-Research/commit-msg-rewriting"
12
  HF_RAW_DATASET_SPLIT = 'train'
13
 
14
  HF_FULL_COMMITS_DATASET_NAME = "JetBrains-Research/lca-commit-message-generation"
 
19
  HF_PREDICTIONS_DATASET_SUBNAME = "cmg_gpt_4_0613"
20
  HF_PREDICTIONS_DATASET_SPLIT = "test"
21
 
22
+ HF_SYNTHETIC_DATASET_NAME = "JetBrains-Research/synthetic-commit-msg-rewriting"
23
  HF_SYNTHETIC_DATASET_SPLIT = 'train'
24
 
25
  LLM_MODEL = "gpt-4-1106-preview"
generation_steps/metrics_analysis.py CHANGED
@@ -147,7 +147,8 @@ IND_METRICS = {
147
  "ter": ter_fn,
148
  }
149
 
150
- AGGR_METRICS = IND_METRICS.copy()
 
151
  # del AGGR_METRICS["gptscore-ref-1-req"]
152
  # del AGGR_METRICS["gptscore-noref-1-req"]
153
 
 
147
  "ter": ter_fn,
148
  }
149
 
150
+ AGGR_METRICS = {}
151
+ # AGGR_METRICS = IND_METRICS.copy()
152
  # del AGGR_METRICS["gptscore-ref-1-req"]
153
  # del AGGR_METRICS["gptscore-noref-1-req"]
154