File size: 1,273 Bytes
305e536
aab3281
305e536
a8a595d
 
0c136d8
a8a595d
0c136d8
305e536
5f3a4af
a7bba68
305e536
aab3281
5f3a4af
 
 
 
6676c5a
 
 
 
a7bba68
5f3a4af
 
f5faae7
 
aab3281
 
 
 
 
 
e027012
 
0c136d8
e027012
6676c5a
7ab7be2
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
from pathlib import Path

RANDOM_STATE = 42

GRAZIE_API_JWT_TOKEN = os.environ.get("GRAZIE_API_JWT_TOKEN")
GRAZIE_TIMEOUT_SEC = 1.0

HF_TOKEN = os.environ.get('HF_TOKEN')

HF_RAW_DATASET_NAME = "JetBrains-Research/commit-msg-rewriting"
HF_RAW_DATASET_SPLIT = 'train'

HF_FULL_COMMITS_DATASET_NAME = "JetBrains-Research/lca-commit-message-generation"
HF_FULL_COMMITS_DATASET_SUBNAME = "commitchronicle-py-long"
HF_FULL_COMMITS_DATASET_SPLIT = "test"

HF_PREDICTIONS_DATASET_NAME = "JetBrains-Research/lca-results"
HF_PREDICTIONS_DATASET_SUBNAME = "cmg_gpt_4_0613"
HF_PREDICTIONS_DATASET_SPLIT = "test"

HF_SYNTHETIC_DATASET_NAME = "JetBrains-Research/synthetic-commit-msg-rewriting"
HF_SYNTHETIC_DATASET_SPLIT = 'train'

LLM_MODEL = "gpt-4-1106-preview"

CACHE_DIR = Path("cache")
CACHE_DIR.mkdir(exist_ok=True)

OUTPUT_DIR = Path("output")
OUTPUT_DIR.mkdir(exist_ok=True)

END_TO_START_ARTIFACT = OUTPUT_DIR / "end_to_start.csv"
START_TO_END_ARTIFACT = OUTPUT_DIR / "start_to_end.csv"
SYNTHETIC_DATASET_ARTIFACT = OUTPUT_DIR / "synthetic.csv"
METRICS_CORRELATIONS_ARTIFACT = OUTPUT_DIR / "metrics_correlations.csv"
DATA_FOR_LABELING_ARTIFACT = OUTPUT_DIR / "data_for_labeling.csv"

OUTPUT_CHARTS_DIR = OUTPUT_DIR / "charts"
OUTPUT_CHARTS_DIR.mkdir(exist_ok=True)