#!/usr/bin/env python3 import json # 1) Load the LayoutSAM image list with open('./images_to_download.json') as f: images = json.load(f) # 2) Figure out which shards we need shards = sorted({img.split('/')[0] + '.tar' for img in images}) # 3) Write out clean HF URLs—no headers, no query strings base = "https://huggingface.co/datasets/Aber-r/SA-1B_backup/resolve/main/" with open('layoutsam_shard_links.txt', 'w') as out: for shard in shards: out.write(f"{shard}\t{base}{shard}\n") print(f"Wrote {len(shards)} entries to layoutsam_shard_links.txt")