Petr Tsvetkov commited on
Commit
305e536
β€’
1 Parent(s): 6e5778f

Download the commit-rewriting dataset

Browse files
Files changed (2) hide show
  1. config.py +6 -0
  2. hf_data_loader.py +10 -0
config.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ HF_TOKEN = os.environ.get('HF_TOKEN')
4
+ HF_RAW_DATASET_NAME = "petrtsv-jb/commit-msg-rewriting"
5
+ HF_RAW_DATASET_SPLIT = 'train'
6
+ CACHE_DIR = "cache"
hf_data_loader.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+
3
+ import config
4
+
5
+
6
+ def load_raw_dataset_as_pandas():
7
+ return load_dataset(config.HF_RAW_DATASET_NAME,
8
+ split=config.HF_RAW_DATASET_SPLIT,
9
+ token=config.HF_TOKEN,
10
+ cache_dir=config.CACHE_DIR).to_pandas()