roberta-swedish / combinedataset.py
birgermoell's picture
Saving weights and logs of step 29001
774265c
raw
history blame contribute delete
330 Bytes
from datasets import interleave_datasets
from itertools import islice
en_dataset = load_dataset('oscar', "unshuffled_deduplicated_en", split='train', streaming=True)
fr_dataset = load_dataset('oscar', "unshuffled_deduplicated_fr", split='train', streaming=True)
multilingual_dataset = interleave_datasets([en_dataset, fr_dataset])