Petr Tsvetkov
Synthetic dataset visualization
5f3a4af
raw
history blame
695 Bytes
import os
from pathlib import Path
GRAZIE_API_JWT_TOKEN = os.environ.get("GRAZIE_API_JWT_TOKEN")
HF_TOKEN = os.environ.get('HF_TOKEN')
HF_RAW_DATASET_NAME = "petrtsv-jb/commit-msg-rewriting"
HF_RAW_DATASET_SPLIT = 'train'
HF_FULL_COMMITS_DATASET_NAME = "JetBrains-Research/lca-commit-message-generation"
HF_FULL_COMMITS_DATASET_SUBNAME = "commitchronicle-py-long"
HF_FULL_COMMITS_DATASET_SPLIT = "test"
HF_SYNTHETIC_DATASET_NAME = "petrtsv-jb/synthetic-commit-msg-rewriting"
HF_SYNTHETIC_DATASET_SPLIT = 'train'
CACHE_DIR = Path("cache")
CACHE_DIR.mkdir(exist_ok=True)
OUTPUT_DIR = Path("output")
OUTPUT_DIR.mkdir(exist_ok=True)
SYNTHETIC_DATASET_ARTIFACT = OUTPUT_DIR / "synthetic.csv"