manhkhanhUIT's picture
Init
e78c13e
raw
history blame
No virus
1.59 kB
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import io
import os
import struct
from PIL import Image
class BigFileMemoryLoader(object):
def __load_bigfile(self):
print('start load bigfile (%0.02f GB) into memory' % (os.path.getsize(self.file_path)/1024/1024/1024))
with open(self.file_path, 'rb') as fid:
self.img_num = struct.unpack('i', fid.read(4))[0]
self.img_names = []
self.img_bytes = []
print('find total %d images' % self.img_num)
for i in range(self.img_num):
img_name_len = struct.unpack('i', fid.read(4))[0]
img_name = fid.read(img_name_len).decode('utf-8')
self.img_names.append(img_name)
img_bytes_len = struct.unpack('i', fid.read(4))[0]
self.img_bytes.append(fid.read(img_bytes_len))
if i % 5000 == 0:
print('load %d images done' % i)
print('load all %d images done' % self.img_num)
def __init__(self, file_path):
super(BigFileMemoryLoader, self).__init__()
self.file_path = file_path
self.__load_bigfile()
def __getitem__(self, index):
try:
img = Image.open(io.BytesIO(self.img_bytes[index])).convert('RGB')
return self.img_names[index], img
except Exception:
print('Image read error for index %d: %s' % (index, self.img_names[index]))
return self.__getitem__((index+1)%self.img_num)
def __len__(self):
return self.img_num