# -*- coding:utf-8 -*- from PIL import Image from ssd_tools.ssd_utils import BBoxUtility from ssd_tools.ssd import SSD300 import cv2 import argparse import os from keras.applications.imagenet_utils import preprocess_input from keras.preprocessing import image import numpy as np import gc import glob import json from keras import backend as K K.clear_session() os.environ["OPENCV_IO_ENABLE_JASPER"] = "true" np.set_printoptions(suppress=True) # パラメータ batch_size = 10 NUM_CLASSES = 2 input_shape = (300, 300, 3) model = SSD300(input_shape, num_classes=NUM_CLASSES) bbox_util = BBoxUtility(NUM_CLASSES) dpiinfo = {} def cv2pil(image): ''' OpenCV型 -> PIL型 ''' new_image = image.copy() if new_image.ndim == 2: # モノクロ pass elif new_image.shape[2] == 3: # カラー new_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) elif new_image.shape[2] == 4: # 透過 new_image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA) new_image = Image.fromarray(new_image) return new_image def resize_pil(pil_img, short): w, h = pil_img.size if w < h: h = int(h*short/w+0.5) w = short else: w = int(w*short/h+0.5) h = short return (pil_img.resize((w, h))) def divide_facing_page(input, input_path=None, output="NO_DUMP", left='_01', right='_02', single='_00', ext='.jpg', quality=100, # output jpeg quality short=None, debug=False, log='trim_pos.tsv', conf_th=0.2, with_cli=False): if not with_cli: model.load_weights(os.path.join('ssd_tools', 'weights.hdf5'), by_name=True) if log: if not os.path.exists(log): with open(log, mode='a') as f: line = 'image_name\ttrimming_x\n' f.write(line) imglist = [] filenames = [] if with_cli: if type(input) is np.ndarray: imglist = [input] elif type(input) is not list: raise ValueError( 'input for divide_facing_page_with_cli must be np.array or list.') if type(input_path) is str: filenames = [input_path] elif type(input_path) is not list: raise ValueError( 'input_path for divide_facing_page_with_cli must be str or list.') else: filenames = input_path else: # without_cli if os.path.isdir(input): imgpathlist = list(glob.glob(os.path.join(input, "*"))) else: imgpathlist = [input] for imgpath in imgpathlist: imglist.append(cv2.imread(imgpath, cv2.IMREAD_COLOR)) filenames.append(os.path.basename(imgpath)) cnt = 0 while cnt < len(imglist): inputs = [] images = [] for cv_img in imglist[cnt:min(cnt+batch_size, len(imglist))]: img = image.img_to_array(cv2pil(cv_img).resize((300, 300))) images.append(cv_img) # original size images inputs.append(img.copy()) # resized to (300, 300) inputs = preprocess_input(np.array(inputs)) preds = model.predict(inputs, batch_size=1, verbose=1) results = bbox_util.detection_out(preds) # results[i][b, p] ... i: image index; b: bbox index; p: [label, confidence, xmin, ymin, xmax, ymax] cnt += batch_size for i, cvimg in enumerate(images): if len(results[i]) == 0: top_conf = 0.0 else: top_conf = results[i][0, 1] top_xmin = results[i][0, 2] top_xmax = results[i][0, 4] print('img {} top conf: {}'.format(i, top_conf)) div_x = 0 basename, ext_ori = os.path.splitext( os.path.basename(filenames[i])) if ext == "SAME": ext = ext_ori if top_conf <= conf_th: # save log if log: with open(log, mode='a') as f: line = '{}\t{}\n'.format(basename+single+ext, 0) f.write(line) if with_cli: return [cvimg] elif output != "NO_DUMP": im = cv2pil(cvimg) if short: im = resize_pil(im, short) im.save(os.path.join(output, basename+single+ext), dpi=(dpiinfo["width_dpi"], dpiinfo["height_dpi"]), quality=100) else: xmin = int(round(top_xmin * cvimg.shape[1])) xmax = int(round(top_xmax * cvimg.shape[1])) div_x = (xmin+xmax)//2 # save log if log: with open(log, mode='a') as f: line = '{}\t{}\n'.format(basename+left+ext, div_x-1) f.write(line) line = '{}\t{}\n'.format(basename+right+ext, div_x) f.write(line) # save split images if with_cli: return [cvimg[:, :div_x, :], cvimg[:, div_x:, :]] else: if output != "NO_DUMP": im1 = cv2pil(cvimg[:, :div_x, :]) im2 = cv2pil(cvimg[:, div_x:, :]) if short: im1 = resize_pil(im1, short) im2 = resize_pil(im2, short) im1.save(os.path.join(output, basename+left+ext), dpi=(dpiinfo["width_dpi"], dpiinfo["height_dpi"]), quality=quality) im2.save(os.path.join(output, basename+right+ext), dpi=(dpiinfo["width_dpi"], dpiinfo["height_dpi"]), quality=quality) # (debug) add bounding box and gutter line to the image if debug: for k in range(len(results[i])): xmin = int(round(results[i][k, 2] * cvimg.shape[1])) ymin = int(round(results[i][k, 3] * cvimg.shape[0])) xmax = int(round(results[i][k, 4] * cvimg.shape[1])) ymax = int(round(results[i][k, 5] * cvimg.shape[0])) print(results[i][k, :]) bgr = (0, 0, 255) t = 2 if k == 0: if top_conf > 0.2: t = 5 cv2.line(cvimg, ((xmin+xmax)//2, 0), ((xmin+xmax)//2, cvimg.shape[0]), color=(255, 0, 0), thickness=t) cv2.rectangle(cvimg, (xmin, ymin), (xmax, ymax), bgr, thickness=t) im = cv2pil(cvimg) os.makedirs(output+'_rect', exist_ok=True) im.save(os.path.join(output+'_rect', basename+ext), dpi=(dpiinfo["width_dpi"], dpiinfo["height_dpi"]), quality=quality) del inputs, images gc.collect() def divide_facing_page_with_cli(input, input_path, left='_01', right='_02', single='_00', ext='.jpg', quality=100, # output jpeg quality short=None, conf_th=0.2, log='trim_pos.tsv'): return divide_facing_page(input=input, input_path=input_path, output="NO_DUMP", left=left, right=right, single=single, ext=ext, quality=quality, # output jpeg quality short=short, debug=False, log=log, conf_th=conf_th, with_cli=True) def load_weightfile(model_path): model.load_weights(model_path, by_name=True) def parse_args(): usage = 'python3 {} [-i INPUT] [-o OUTPUT] [-l LEFT] [-r RIGHT] [-s SINGLE] \ [-e EXT] [-q QUALITY]'.format(__file__) argparser = argparse.ArgumentParser( usage=usage, description='Divide facing images at the gutter', formatter_class=argparse.RawTextHelpFormatter) argparser.add_argument( '-i', '--input', default='inference_input', help='input image file or directory path\n' '(default: inference_input)', type=str) argparser.add_argument( '-o', '--out', default='inference_output', help='directory path (default: inference_output)\n' 'if OUT is "NO_DUMP", no images is output', type=str) argparser.add_argument( '-l', '--left', default='_01', help='file name footer of left side page image to be output\n' 'e.g) input image: input.jpg, LEFT: _01(default)\n' ' output image: input_01.jpg', type=str) argparser.add_argument( '-r', '--right', default='_02', help='file name footer of right side page image to be output\n' 'e.g) input image: input.jpg, RIGHT: _02(default)\n' ' output image: input_02.jpg', type=str) argparser.add_argument( '-s', '--single', default='_00', help='file name footer of the image with no detected gutters to be output\n' 'e.g) input image: input.jpg, SINGLE: _00(default)\n' ' output image: input_00.jpg', type=str) argparser.add_argument( '-e', '--ext', default='.jpg', help='output image extension. default: .jpg \n' 'if EXT is \"SAME\", the same extension as the input image will be used.', type=str) argparser.add_argument( '-q', '--quality', default=100, dest='quality', help='output jpeg image quality.\n' '1 is worst quality and smallest file size,\n' 'and 100 is best quality and largest file size.\n' '[1, 100], default: 100', type=int) argparser.add_argument( '--short', default=None, dest='short', help='the length of the short side of the output image.', type=int) argparser.add_argument( '--debug', action='store_true') argparser.add_argument( '-lg', '--log', default=None, help='path of the tsv file that records the split x position' 'output format:' 'file name trimming_x', type=str) return argparser.parse_args() if __name__ == '__main__': args = parse_args() with open(os.path.join('ssd_tools', 'dpiconfig.json'))as f: dpiinfo = json.load(f) if args.out != "NO_DUMP": os.makedirs(args.out, exist_ok=True) else: print('Not dump split images') if args.debug: print('Run in debug mode: dump images added bounding box and gutter lines') if args.log is not None: print('Export estimated gutter position to {}'.format(args.log)) divide_facing_page(input=args.input, output=args.out, left=args.left, right=args.right, single=args.single, ext=args.ext, quality=args.quality, short=args.short, debug=args.debug, log=args.log)