manhkhanhUIT's picture
Init
e78c13e
raw
history blame
No virus
1.95 kB
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import os
import struct
from PIL import Image
IMG_EXTENSIONS = [
'.jpg', '.JPG', '.jpeg', '.JPEG',
'.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
]
def is_image_file(filename):
return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
def make_dataset(dir):
images = []
assert os.path.isdir(dir), '%s is not a valid directory' % dir
for root, _, fnames in sorted(os.walk(dir)):
for fname in fnames:
if is_image_file(fname):
#print(fname)
path = os.path.join(root, fname)
images.append(path)
return images
### Modify these 3 lines in your own environment
indir="/home/ziyuwan/workspace/data/temp_old"
target_folders=['VOC','Real_L_old','Real_RGB_old']
out_dir ="/home/ziyuwan/workspace/data/temp_old"
###
if os.path.exists(out_dir) is False:
os.makedirs(out_dir)
#
for target_folder in target_folders:
curr_indir = os.path.join(indir, target_folder)
curr_out_file = os.path.join(os.path.join(out_dir, '%s.bigfile'%(target_folder)))
image_lists = make_dataset(curr_indir)
image_lists.sort()
with open(curr_out_file, 'wb') as wfid:
# write total image number
wfid.write(struct.pack('i', len(image_lists)))
for i, img_path in enumerate(image_lists):
# write file name first
img_name = os.path.basename(img_path)
img_name_bytes = img_name.encode('utf-8')
wfid.write(struct.pack('i', len(img_name_bytes)))
wfid.write(img_name_bytes)
#
# # write image data in
with open(img_path, 'rb') as img_fid:
img_bytes = img_fid.read()
wfid.write(struct.pack('i', len(img_bytes)))
wfid.write(img_bytes)
if i % 1000 == 0:
print('write %d images done' % i)