PUMP / datasets /web_images.py
Philippe Weinzaepfel
huggingface demo
3ef85e9
# Copyright 2022-present NAVER Corp.
# CC BY-NC-SA 4.0
# Available only for non-commercial use
from pdb import set_trace as bb
import os, os.path as osp
from tqdm import trange
from .image_set import ImageSet, verify_img
class RandomWebImages (ImageSet):
""" 1 million distractors from Oxford and Paris Revisited
see http://ptak.felk.cvut.cz/revisitop/revisitop1m/
"""
def __init__(self, start=0, end=52, root="datasets/revisitop1m"):
bar = None
imgs = []
for i in range(start, end):
try:
# read cached list
img_list_path = osp.join(root, "image_list_%d.txt"%i)
cached_imgs = [e.strip() for e in open(img_list_path)]
assert cached_imgs, f"Cache '{img_list_path}' is empty!"
imgs += cached_imgs
except IOError:
if bar is None:
bar = trange(start, 4*end, desc='Caching')
bar.update(4*i)
# create it
imgs = []
for d in range(i*4,(i+1)*4): # 4096 folders in total, on average 256 each
key = hex(d)[2:].zfill(3)
folder = osp.join(root, key)
if not osp.isdir(folder): continue
imgs += [f for f in os.listdir(folder) if verify_img(osp.join(folder, f), exts='.jpg')]
bar.update(1)
assert imgs, f"No images found in {folder}/"
open(img_list_path,'w').write('\n'.join(imgs))
imgs += imgs
if bar: bar.update(bar.total - bar.n)
super().__init__(root, imgs)
def get_image_path(self, idx):
key = self.imgs[idx]
return osp.join(self.root, key[:3], key)