File size: 1,590 Bytes
e78c13e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

import io
import os
import struct
from PIL import Image

class BigFileMemoryLoader(object):
    def __load_bigfile(self):
        print('start load bigfile (%0.02f GB) into memory' % (os.path.getsize(self.file_path)/1024/1024/1024))
        with open(self.file_path, 'rb') as fid:
            self.img_num = struct.unpack('i', fid.read(4))[0]
            self.img_names = []
            self.img_bytes = []
            print('find total %d images' % self.img_num)
            for i in range(self.img_num):
                img_name_len = struct.unpack('i', fid.read(4))[0]
                img_name = fid.read(img_name_len).decode('utf-8')
                self.img_names.append(img_name)
                img_bytes_len = struct.unpack('i', fid.read(4))[0]
                self.img_bytes.append(fid.read(img_bytes_len))
                if i % 5000 == 0:
                    print('load %d images done' % i)
            print('load all %d images done' % self.img_num)

    def __init__(self, file_path):
        super(BigFileMemoryLoader, self).__init__()
        self.file_path = file_path
        self.__load_bigfile()

    def __getitem__(self, index):
        try:
            img = Image.open(io.BytesIO(self.img_bytes[index])).convert('RGB')
            return self.img_names[index], img
        except Exception:
            print('Image read error for index %d: %s' % (index, self.img_names[index]))
            return self.__getitem__((index+1)%self.img_num)


    def __len__(self):
        return self.img_num