File size: 330 Bytes
774265c |
1 2 3 4 5 |
from datasets import interleave_datasets
from itertools import islice
en_dataset = load_dataset('oscar', "unshuffled_deduplicated_en", split='train', streaming=True)
fr_dataset = load_dataset('oscar', "unshuffled_deduplicated_fr", split='train', streaming=True)
multilingual_dataset = interleave_datasets([en_dataset, fr_dataset]) |