File size: 2,123 Bytes
437b5f6
 
 
 
 
 
 
 
 
 
4c12b36
 
 
437b5f6
4c12b36
437b5f6
 
 
4c12b36
437b5f6
4c12b36
437b5f6
4c12b36
437b5f6
4c12b36
437b5f6
 
 
 
 
4c12b36
 
 
 
437b5f6
 
4c12b36
 
 
437b5f6
 
4c12b36
 
 
437b5f6
 
4c12b36
437b5f6
 
4c12b36
 
437b5f6
 
 
 
 
 
 
 
 
4c12b36
 
 
437b5f6
4c12b36
 
437b5f6
4c12b36
437b5f6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# Copyright 2019-present NAVER Corp.
# CC BY-NC-SA 3.0
# Available only for non-commercial use

import os, pdb
from tqdm import trange

from .dataset import Dataset


class RandomWebImages(Dataset):
    """1 million distractors from Oxford and Paris Revisited
    see http://ptak.felk.cvut.cz/revisitop/revisitop1m/
    """

    def __init__(self, start=0, end=1024, root="data/revisitop1m"):
        Dataset.__init__(self)
        self.root = root

        bar = None
        self.imgs = []
        for i in range(start, end):
            try:
                # read cached list
                img_list_path = os.path.join(self.root, "image_list_%d.txt" % i)
                cached_imgs = [e.strip() for e in open(img_list_path)]
                assert cached_imgs, f"Cache '{img_list_path}' is empty!"
                self.imgs += cached_imgs

            except IOError:
                if bar is None:
                    bar = trange(start, 4 * end, desc="Caching")
                    bar.update(4 * i)

                # create it
                imgs = []
                for d in range(
                    i * 4, (i + 1) * 4
                ):  # 4096 folders in total, on average 256 each
                    key = hex(d)[2:].zfill(3)
                    folder = os.path.join(self.root, key)
                    if not os.path.isdir(folder):
                        continue
                    imgs += [f for f in os.listdir(folder) if verify_img(folder, f)]
                    bar.update(1)
                assert imgs, f"No images found in {folder}/"
                open(img_list_path, "w").write("\n".join(imgs))
                self.imgs += imgs

        if bar:
            bar.update(bar.total - bar.n)
        self.nimg = len(self.imgs)

    def get_key(self, i):
        key = self.imgs[i]
        return os.path.join(key[:3], key)


def verify_img(folder, f):
    path = os.path.join(folder, f)
    if not f.endswith(".jpg"):
        return False
    try:
        from PIL import Image

        Image.open(path).convert("RGB")  # try to open it
        return True
    except:
        return False