#!/usr/bin/env python3 | |
import json | |
# 1) Load the LayoutSAM image list | |
with open('./images_to_download.json') as f: | |
images = json.load(f) | |
# 2) Figure out which shards we need | |
shards = sorted({img.split('/')[0] + '.tar' for img in images}) | |
# 3) Write out clean HF URLs—no headers, no query strings | |
base = "https://huggingface.co/datasets/Aber-r/SA-1B_backup/resolve/main/" | |
with open('layoutsam_shard_links.txt', 'w') as out: | |
for shard in shards: | |
out.write(f"{shard}\t{base}{shard}\n") | |
print(f"Wrote {len(shards)} entries to layoutsam_shard_links.txt") |