Spaces:
Runtime error
Runtime error
from datasets import load_dataset | |
import config | |
def load_raw_rewriting_dataset_as_pandas(): | |
return load_dataset(config.HF_RAW_DATASET_NAME, | |
split=config.HF_RAW_DATASET_SPLIT, | |
token=config.HF_TOKEN, | |
cache_dir=config.CACHE_DIR).to_pandas() | |
def load_full_commit_dataset_as_pandas(): | |
return load_dataset(path=config.HF_FULL_COMMITS_DATASET_NAME, | |
name=config.HF_FULL_COMMITS_DATASET_SUBNAME, | |
split=config.HF_FULL_COMMITS_DATASET_SPLIT, | |
cache_dir=config.CACHE_DIR).to_pandas().rename( | |
columns={'message': 'reference'}) | |
def load_processed_rewriting_dataset_as_pandas(): | |
manual_rewriting = load_raw_rewriting_dataset_as_pandas()[ | |
["hash", "repo", "commit_msg_start", "commit_msg_end", "session"]] | |
manual_rewriting.set_index(["hash", "repo"], inplace=True) | |
mods_dataset = load_full_commit_dataset_as_pandas()[["hash", "repo", "mods"]] | |
mods_dataset.set_index(["hash", "repo"], inplace=True) | |
return manual_rewriting.join(other=mods_dataset, how='left').reset_index() | |
def load_synthetic_dataset_as_pandas(): | |
return load_dataset(config.HF_SYNTHETIC_DATASET_NAME, | |
split=config.HF_SYNTHETIC_DATASET_SPLIT, | |
token=config.HF_TOKEN, | |
cache_dir=config.CACHE_DIR).to_pandas() | |