from datasets import interleave_datasets | |
from itertools import islice | |
en_dataset = load_dataset('oscar', "unshuffled_deduplicated_en", split='train', streaming=True) | |
fr_dataset = load_dataset('oscar', "unshuffled_deduplicated_fr", split='train', streaming=True) | |
multilingual_dataset = interleave_datasets([en_dataset, fr_dataset]) |