Spaces:

tomofi
/

NDLOCR

Build error

File size: 11,967 Bytes

c9019cd

# -*- coding:utf-8 -*-
from PIL import Image
from ssd_tools.ssd_utils import BBoxUtility
from ssd_tools.ssd import SSD300
import cv2
import argparse
import os
from keras.applications.imagenet_utils import preprocess_input
from keras.preprocessing import image
import numpy as np
import gc
import glob
import json
from keras import backend as K
K.clear_session()


os.environ["OPENCV_IO_ENABLE_JASPER"] = "true"
np.set_printoptions(suppress=True)

# パラメータ
batch_size = 10
NUM_CLASSES = 2
input_shape = (300, 300, 3)


model = SSD300(input_shape, num_classes=NUM_CLASSES)
bbox_util = BBoxUtility(NUM_CLASSES)


dpiinfo = {}


def cv2pil(image):
    ''' OpenCV型 -> PIL型 '''
    new_image = image.copy()
    if new_image.ndim == 2:  # モノクロ
        pass
    elif new_image.shape[2] == 3:  # カラー
        new_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    elif new_image.shape[2] == 4:  # 透過
        new_image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA)
    new_image = Image.fromarray(new_image)
    return new_image


def resize_pil(pil_img, short):
    w, h = pil_img.size
    if w < h:
        h = int(h*short/w+0.5)
        w = short
    else:
        w = int(w*short/h+0.5)
        h = short
    return (pil_img.resize((w, h)))


def divide_facing_page(input, input_path=None, output="NO_DUMP",
                    left='_01', right='_02', single='_00', ext='.jpg',
                    quality=100,  # output jpeg quality
                    short=None,
                    debug=False,
                    log='trim_pos.tsv',
                    conf_th=0.2,
                    with_cli=False):
    if not with_cli:
        model.load_weights(os.path.join('ssd_tools', 'weights.hdf5'), by_name=True)

    if log:
        if not os.path.exists(log):
            with open(log, mode='a') as f:
                line = 'image_name\ttrimming_x\n'
                f.write(line)

    imglist = []
    filenames = []
    if with_cli:
        if type(input) is np.ndarray:
            imglist = [input]
        elif type(input) is not list:
            raise ValueError(
                'input for divide_facing_page_with_cli must be np.array or list.')

        if type(input_path) is str:
            filenames = [input_path]
        elif type(input_path) is not list:
            raise ValueError(
                'input_path for divide_facing_page_with_cli must be str or list.')
        else:
            filenames = input_path

    else: # without_cli
        if os.path.isdir(input):
            imgpathlist = list(glob.glob(os.path.join(input, "*")))
        else:
            imgpathlist = [input]
        for imgpath in imgpathlist:
            imglist.append(cv2.imread(imgpath, cv2.IMREAD_COLOR))
            filenames.append(os.path.basename(imgpath))

    cnt = 0
    while cnt < len(imglist):
        inputs = []
        images = []
        for cv_img in imglist[cnt:min(cnt+batch_size, len(imglist))]:
            img = image.img_to_array(cv2pil(cv_img).resize((300, 300)))
            images.append(cv_img)     # original size images
            inputs.append(img.copy())  # resized to (300, 300)

        inputs = preprocess_input(np.array(inputs))
        preds = model.predict(inputs, batch_size=1, verbose=1)
        results = bbox_util.detection_out(preds)
        # results[i][b, p] ... i: image index; b: bbox index; p: [label, confidence, xmin, ymin, xmax, ymax]
        cnt += batch_size
        for i, cvimg in enumerate(images):
            if len(results[i]) == 0:
                top_conf = 0.0
            else:
                top_conf = results[i][0, 1]
                top_xmin = results[i][0, 2]
                top_xmax = results[i][0, 4]
            print('img {} top conf: {}'.format(i, top_conf))

            div_x = 0
            basename, ext_ori = os.path.splitext(
                os.path.basename(filenames[i]))
            if ext == "SAME":
                ext = ext_ori

            if top_conf <= conf_th:
                # save log
                if log:
                    with open(log, mode='a') as f:
                        line = '{}\t{}\n'.format(basename+single+ext, 0)
                        f.write(line)
                if with_cli:
                    return [cvimg]
                elif output != "NO_DUMP":
                    im = cv2pil(cvimg)
                    if short:
                        im = resize_pil(im, short)
                    im.save(os.path.join(output, basename+single+ext),
                            dpi=(dpiinfo["width_dpi"], dpiinfo["height_dpi"]), quality=100)

            else:
                xmin = int(round(top_xmin * cvimg.shape[1]))
                xmax = int(round(top_xmax * cvimg.shape[1]))
                div_x = (xmin+xmax)//2
                # save log
                if log:
                    with open(log, mode='a') as f:
                        line = '{}\t{}\n'.format(basename+left+ext, div_x-1)
                        f.write(line)
                        line = '{}\t{}\n'.format(basename+right+ext, div_x)
                        f.write(line)
                # save split images
                if with_cli:
                    return [cvimg[:, :div_x, :], cvimg[:,  div_x:, :]]
                else:
                    if output != "NO_DUMP":
                        im1 = cv2pil(cvimg[:, :div_x, :])
                        im2 = cv2pil(cvimg[:,  div_x:, :])

                        if short:
                            im1 = resize_pil(im1, short)
                            im2 = resize_pil(im2, short)
                        im1.save(os.path.join(output, basename+left+ext),
                                dpi=(dpiinfo["width_dpi"], dpiinfo["height_dpi"]),
                                quality=quality)
                        im2.save(os.path.join(output, basename+right+ext),
                                dpi=(dpiinfo["width_dpi"], dpiinfo["height_dpi"]),
                                quality=quality)
                    # (debug) add bounding box and gutter line to the image
                    if debug:
                        for k in range(len(results[i])):
                            xmin = int(round(results[i][k, 2] * cvimg.shape[1]))
                            ymin = int(round(results[i][k, 3] * cvimg.shape[0]))
                            xmax = int(round(results[i][k, 4] * cvimg.shape[1]))
                            ymax = int(round(results[i][k, 5] * cvimg.shape[0]))
                            print(results[i][k, :])
                            bgr = (0, 0, 255)
                            t = 2
                            if k == 0:
                                if top_conf > 0.2:
                                    t = 5
                                cv2.line(cvimg, ((xmin+xmax)//2, 0), ((xmin+xmax)//2, cvimg.shape[0]),
                                        color=(255, 0, 0), thickness=t)
                            cv2.rectangle(cvimg, (xmin, ymin),
                                        (xmax, ymax), bgr, thickness=t)
                        im = cv2pil(cvimg)
                        os.makedirs(output+'_rect', exist_ok=True)
                        im.save(os.path.join(output+'_rect', basename+ext),
                                dpi=(dpiinfo["width_dpi"], dpiinfo["height_dpi"]),
                                quality=quality)

        del inputs, images
        gc.collect()


def divide_facing_page_with_cli(input, input_path,
                                left='_01', right='_02', single='_00', ext='.jpg',
                                quality=100,  # output jpeg quality
                                short=None,
                                conf_th=0.2,
                                log='trim_pos.tsv'):

    return divide_facing_page(input=input,
                              input_path=input_path,
                              output="NO_DUMP",
                              left=left, right=right, single=single, ext=ext,
                              quality=quality,  # output jpeg quality
                              short=short,
                              debug=False,
                              log=log,
                              conf_th=conf_th,
                              with_cli=True)


def load_weightfile(model_path):
    model.load_weights(model_path, by_name=True)


def parse_args():
    usage = 'python3 {} [-i INPUT] [-o OUTPUT] [-l LEFT] [-r RIGHT] [-s SINGLE] \
             [-e EXT] [-q QUALITY]'.format(__file__)
    argparser = argparse.ArgumentParser(
        usage=usage,
        description='Divide facing images at the gutter',
        formatter_class=argparse.RawTextHelpFormatter)
    argparser.add_argument(
        '-i',
        '--input',
        default='inference_input',
        help='input image file or directory path\n'
             '(default: inference_input)',
        type=str)
    argparser.add_argument(
        '-o',
        '--out',
        default='inference_output',
        help='directory path (default: inference_output)\n'
             'if OUT is "NO_DUMP", no images is output',
        type=str)
    argparser.add_argument(
        '-l',
        '--left',
        default='_01',
        help='file name footer of left side page image to be output\n'
             'e.g) input image:  input.jpg, LEFT: _01(default)\n'
             '     output image: input_01.jpg',
        type=str)
    argparser.add_argument(
        '-r',
        '--right',
        default='_02',
        help='file name footer of right side page image to be output\n'
             'e.g) input image:  input.jpg, RIGHT: _02(default)\n'
             '     output image: input_02.jpg',
        type=str)
    argparser.add_argument(
        '-s',
        '--single',
        default='_00',
        help='file name footer of the image with no detected gutters to be output\n'
             'e.g) input image:  input.jpg, SINGLE: _00(default)\n'
             '     output image: input_00.jpg',
        type=str)
    argparser.add_argument(
        '-e',
        '--ext',
        default='.jpg',
        help='output image extension. default: .jpg \n'
             'if EXT is \"SAME\", the same extension as the input image will be used.',
        type=str)
    argparser.add_argument(
        '-q', '--quality',
        default=100,
        dest='quality',
        help='output jpeg image quality.\n'
             '1 is worst quality and smallest file size,\n'
             'and 100 is best quality and largest file size.\n'
             '[1, 100], default: 100',
        type=int)
    argparser.add_argument(
        '--short',
        default=None,
        dest='short',
        help='the length of the short side of the output image.',
        type=int)
    argparser.add_argument(
        '--debug',
        action='store_true')
    argparser.add_argument(
        '-lg', '--log',
        default=None,
        help='path of the tsv file that records the split x position'
             'output format:'
             'file name <tab> trimming_x',
        type=str)

    return argparser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    with open(os.path.join('ssd_tools', 'dpiconfig.json'))as f:
        dpiinfo = json.load(f)

    if args.out != "NO_DUMP":
        os.makedirs(args.out, exist_ok=True)
    else:
        print('Not dump split images')

    if args.debug:
        print('Run in debug mode: dump images added bounding box and gutter lines')
    if args.log is not None:
        print('Export estimated gutter position to {}'.format(args.log))

    divide_facing_page(input=args.input,
                       output=args.out,
                       left=args.left,
                       right=args.right,
                       single=args.single,
                       ext=args.ext,
                       quality=args.quality,
                       short=args.short,
                       debug=args.debug,
                       log=args.log)