Spaces:
Runtime error
Runtime error
File size: 593 Bytes
12f2e48 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
import argparse
from datasets import load_dataset, concatenate_datasets
def main(args):
dss = []
for dataset_path in args.dataset:
dataset = load_dataset(dataset_path, split="train", data_files="*.arrow")
dss.append(dataset)
ds = concatenate_datasets(dss)
ds = ds.shuffle()
ds.save_to_disk(args.output_folder)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--dataset", type=str, action="append")
parser.add_argument("-o", "--output_folder", type=str)
args = parser.parse_args()
main(args)
|