Martijn van Beers commited on
Commit
38c1d39
1 Parent(s): f4a1b77

Use huggingface datasets library

Browse files
Files changed (1) hide show
  1. crowspairs.py +3 -6
crowspairs.py CHANGED
@@ -4,18 +4,15 @@ from torch.utils.data import Dataset
4
  from tqdm import tqdm
5
 
6
  from evaluation.tasks.auto_task import AutoTask
 
7
 
8
 
9
  class CrowSPairsDataset(Dataset):
10
  def __init__(self):
11
  super().__init__()
12
 
13
- # TODO: maybe implement using HuggingFace Datasets
14
- # https://huggingface.co/datasets/crows_pairs
15
-
16
- # Load CrowS-Pairs dataset from URL
17
- url = "https://raw.githubusercontent.com/nyu-mll/crows-pairs/master/data/crows_pairs_anonymized.csv"
18
- df = pd.read_csv(url)
19
 
20
  # if direction is stereo, sent1, sent2 are sent_more, sent_less respectively,
21
  # otherwise the other way around
 
4
  from tqdm import tqdm
5
 
6
  from evaluation.tasks.auto_task import AutoTask
7
+ import datasets
8
 
9
 
10
  class CrowSPairsDataset(Dataset):
11
  def __init__(self):
12
  super().__init__()
13
 
14
+ dataset = datasets.load_dataset("BigScienceBiasEval/crows_pairs_multilingual")
15
+ df = dataset['test'].to_pandas()
 
 
 
 
16
 
17
  # if direction is stereo, sent1, sent2 are sent_more, sent_less respectively,
18
  # otherwise the other way around