mcding
New initial commit
8e1a6c6
import os
from huggingface_hub import login
from datasets import load_dataset
from renumics import spotlight
DATASET_LIST = [
(os.environ["HF_USERNAME"] + "/Easy2Hard-AMC", "v1", "default"),
(os.environ["HF_USERNAME"] + "/Easy2Hard-Lichess", "v1", "default"),
(os.environ["HF_USERNAME"] + "/Easy2Hard-ARC", "v1", "test"),
(os.environ["HF_USERNAME"] + "/Easy2Hard-GSM8K", "v1", "test"),
(os.environ["HF_USERNAME"] + "/Easy2Hard-HellaSwag", "v1", "validation"),
(os.environ["HF_USERNAME"] + "/Easy2Hard-Winogrande", "v1", "validation"),
(os.environ["HF_USERNAME"] + "/Easy2Hard-Leaderboard", "v1", "default"),
]
if __name__ == "__main__":
# Load dataset and save
login(token=os.environ["HF_TOKEN"])
for dataset in DATASET_LIST:
ds = load_dataset(dataset[0], dataset[1], split=dataset[2])
ds.to_parquet(
f"{dataset[0].replace('/', '_')}_{dataset[1]}_{dataset[2]}.parquet"
)
view = spotlight.show(
dataset=ds.to_pandas(),
folder=".",
port=7860,
host="0.0.0.0",
allow_filebrowsing=True,
wait="forever",
)