File size: 496 Bytes
cc8e143 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import argparse
from datasets import load_dataset
parser = argparse.ArgumentParser()
parser.add_argument("path_prefix")
parser.add_argument("output_path")
args = parser.parse_args()
path_prefix: str = args.path_prefix
output_path: str = args.output_path
dataset = load_dataset(
"text",
data_files={
"train": [path_prefix + "_train_text.txt"],
"validation": [path_prefix + "_val_text.txt"],
},
cache_dir="/dev/shm/.cache",
)
dataset.save_to_disk(output_path)
|