from datasets import load_dataset import config def load_raw_rewriting_dataset_as_pandas(): return load_dataset(config.HF_RAW_DATASET_NAME, split=config.HF_RAW_DATASET_SPLIT, token=config.HF_TOKEN, cache_dir=config.CACHE_DIR).to_pandas() def load_full_commit_dataset_as_pandas(): return load_dataset(path=config.HF_FULL_COMMITS_DATASET_NAME, name=config.HF_FULL_COMMITS_DATASET_SUBNAME, split=config.HF_FULL_COMMITS_DATASET_SPLIT, cache_dir=config.CACHE_DIR).to_pandas().rename( columns={'message': 'reference'}) def load_synthetic_dataset_as_pandas(): return load_dataset(config.HF_SYNTHETIC_DATASET_NAME, split=config.HF_SYNTHETIC_DATASET_SPLIT, token=config.HF_TOKEN, cache_dir=config.CACHE_DIR).to_pandas()