preview_dataset / main.py
jleibs's picture
Rename vars for clarity
a162ed1
raw
history blame
1.25 kB
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import logging
import rerun as rr
from datasets import load_dataset
from dataset_conversion import log_dataset_to_rerun
logger = logging.getLogger(__name__)
def main() -> None:
# Ensure the logging gets written to stderr:
logging.getLogger().addHandler(logging.StreamHandler())
logging.getLogger().setLevel(logging.INFO)
parser = argparse.ArgumentParser(description="Log a HuggingFace dataset to Rerun.")
parser.add_argument("--dataset", default="lerobot/pusht", help="The name of the dataset to load")
parser.add_argument("--episode-index", default=1, help="Which episode to select")
args = parser.parse_args()
print("Loading dataset…")
dataset = load_dataset(args.dataset, split="train", streaming=True)
# This is for LeRobot datasets (https://huggingface.co/lerobot):
ds_subset = dataset.filter(
lambda frame: "episode_index" not in frame or frame["episode_index"] == args.episode_index
)
print("Starting Rerun…")
rr.init(f"rerun_example_huggingface {args.dataset}", spawn=True)
print("Logging to Rerun…")
log_dataset_to_rerun(ds_subset)
if __name__ == "__main__":
main()