from datasets import interleave_datasets from itertools import islice en_dataset = load_dataset('oscar', "unshuffled_deduplicated_en", split='train', streaming=True) fr_dataset = load_dataset('oscar', "unshuffled_deduplicated_fr", split='train', streaming=True) multilingual_dataset = interleave_datasets([en_dataset, fr_dataset])