tokenizer-midi / jsonl_to_parquet.py
Phương
Upload folder using huggingface_hub
0dc226d
raw
history blame contribute delete
411 Bytes
from datasets import load_dataset
# Load the dataset with only the "text" column
dataset = load_dataset("nRuaif/MusicLM", split="train", columns=["text"], streaming=True)
# Open a file to write the text data to
with open("output.txt", "w") as f:
# Stream through the dataset and write the "text" column to the file, separated by newline
for example in dataset:
f.write(example["text"] + "\n")