import os from pathlib import Path RANDOM_STATE = 42 GRAZIE_API_JWT_TOKEN = os.environ.get("GRAZIE_API_JWT_TOKEN") GRAZIE_TIMEOUT_SEC = 1.0 HF_TOKEN = os.environ.get('HF_TOKEN') HF_RAW_DATASET_NAME = "JetBrains-Research/commit-msg-rewriting" HF_RAW_DATASET_SPLIT = 'train' HF_FULL_COMMITS_DATASET_NAME = "JetBrains-Research/lca-commit-message-generation" HF_FULL_COMMITS_DATASET_SUBNAME = "commitchronicle-py-long" HF_FULL_COMMITS_DATASET_SPLIT = "test" HF_PREDICTIONS_DATASET_NAME = "JetBrains-Research/lca-results" HF_PREDICTIONS_DATASET_SUBNAME = "cmg_gpt_4_0613" HF_PREDICTIONS_DATASET_SPLIT = "test" HF_SYNTHETIC_DATASET_NAME = "JetBrains-Research/synthetic-commit-msg-rewriting" HF_SYNTHETIC_DATASET_SPLIT = 'train' LLM_MODEL = "gpt-4-1106-preview" CACHE_DIR = Path("cache") CACHE_DIR.mkdir(exist_ok=True) OUTPUT_DIR = Path("output") OUTPUT_DIR.mkdir(exist_ok=True) END_TO_START_ARTIFACT = OUTPUT_DIR / "end_to_start.csv" START_TO_END_ARTIFACT = OUTPUT_DIR / "start_to_end.csv" SYNTHETIC_DATASET_ARTIFACT = OUTPUT_DIR / "synthetic.csv" METRICS_CORRELATIONS_ARTIFACT = OUTPUT_DIR / "metrics_correlations.csv" DATA_FOR_LABELING_ARTIFACT = OUTPUT_DIR / "data_for_labeling.csv" OUTPUT_CHARTS_DIR = OUTPUT_DIR / "charts" OUTPUT_CHARTS_DIR.mkdir(exist_ok=True)