|
|
|
import cv2 |
|
import os |
|
import keras |
|
from keras.applications.imagenet_utils import preprocess_input |
|
from keras.backend.tensorflow_backend import set_session |
|
from keras.models import Model |
|
from keras.preprocessing import image |
|
import matplotlib |
|
matplotlib.use('Agg') |
|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
import pickle |
|
from random import shuffle |
|
from scipy.misc import imread |
|
from scipy.misc import imresize |
|
import tensorflow as tf |
|
import random |
|
from ssd_tools.ssd import SSD300 |
|
from ssd_tools.ssd_training import MultiboxLoss |
|
from ssd_tools.ssd_utils import BBoxUtility |
|
|
|
|
|
|
|
|
|
IMAGE_DIR=os.path.join('training','img') |
|
ANOTATION_FILE=os.path.join('training','page_layout.pkl') |
|
|
|
np.set_printoptions(suppress=True) |
|
random.seed(77) |
|
NUM_CLASSES = 2 |
|
input_shape = (300, 300, 3) |
|
|
|
priors = pickle.load(open(os.path.join('ssd_tools','prior_boxes_ssd300.pkl'), 'rb')) |
|
bbox_util = BBoxUtility(NUM_CLASSES, priors) |
|
|
|
gt = pickle.load(open(ANOTATION_FILE, 'rb')) |
|
keys = sorted(gt.keys()) |
|
random.shuffle(keys) |
|
num_train = int(round(0.9 * len(keys))) |
|
train_keys = keys[:num_train] |
|
val_keys = keys[num_train:] |
|
num_val = len(val_keys) |
|
|
|
class Generator(object): |
|
def __init__(self, gt, bbox_util, |
|
batch_size, path_prefix, |
|
train_keys, val_keys, image_size, |
|
saturation_var=0.5, |
|
brightness_var=0.5, |
|
contrast_var=0.5, |
|
lighting_std=0.5, |
|
hflip_prob=0.5, |
|
vflip_prob=0.5, |
|
do_crop=True, |
|
crop_area_range=[0.8, 1.0], |
|
aspect_ratio_range=[1.0,1.0]): |
|
self.gt = gt |
|
self.bbox_util = bbox_util |
|
self.batch_size = batch_size |
|
self.path_prefix = path_prefix |
|
self.train_keys = train_keys |
|
self.val_keys = val_keys |
|
self.train_batches = len(train_keys) |
|
self.val_batches = len(val_keys) |
|
self.image_size = image_size |
|
self.color_jitter = [] |
|
if saturation_var: |
|
self.saturation_var = saturation_var |
|
self.color_jitter.append(self.saturation) |
|
if brightness_var: |
|
self.brightness_var = brightness_var |
|
self.color_jitter.append(self.brightness) |
|
if contrast_var: |
|
self.contrast_var = contrast_var |
|
self.color_jitter.append(self.contrast) |
|
self.lighting_std = lighting_std |
|
self.hflip_prob = hflip_prob |
|
self.vflip_prob = vflip_prob |
|
self.do_crop = do_crop |
|
self.crop_area_range = crop_area_range |
|
self.aspect_ratio_range = aspect_ratio_range |
|
|
|
def grayscale(self, rgb): |
|
return rgb.dot([0.299, 0.587, 0.114]) |
|
|
|
|
|
def saturation(self, rgb): |
|
gs = self.grayscale(rgb) |
|
alpha = 2 * np.random.random() * self.saturation_var |
|
alpha += 1 - self.saturation_var |
|
rgb = rgb * alpha + (1 - alpha) * gs[:, :, None] |
|
return np.clip(rgb, 0, 255) |
|
|
|
def brightness(self, rgb): |
|
alpha = 2 * np.random.random() * self.brightness_var |
|
alpha += 1 - self.saturation_var |
|
rgb = rgb * alpha |
|
return np.clip(rgb, 0, 255) |
|
|
|
def contrast(self, rgb): |
|
gs = self.grayscale(rgb).mean() * np.ones_like(rgb) |
|
alpha = 2 * np.random.random() * self.contrast_var |
|
alpha += 1 - self.contrast_var |
|
rgb = rgb * alpha + (1 - alpha) * gs |
|
return np.clip(rgb, 0, 255) |
|
|
|
def lighting(self, img): |
|
cov = np.cov(img.reshape(-1, 3) / 255.0, rowvar=False) |
|
eigval, eigvec = np.linalg.eigh(cov) |
|
noise = np.random.randn(3) * self.lighting_std |
|
noise = eigvec.dot(eigval * noise) * 255 |
|
img += noise |
|
return np.clip(img, 0, 255) |
|
|
|
def horizontal_flip(self, img, y): |
|
if np.random.random() < self.hflip_prob: |
|
img = img[:, ::-1] |
|
y[:, [0, 2]] = 1 - y[:, [2, 0]] |
|
return img, y |
|
|
|
def vertical_flip(self, img, y): |
|
if np.random.random() < self.vflip_prob: |
|
img = img[::-1] |
|
y[:, [1, 3]] = 1 - y[:, [3, 1]] |
|
return img, y |
|
|
|
def random_sized_crop(self, img, targets): |
|
img_w = img.shape[1] |
|
img_h = img.shape[0] |
|
img_area = img_w * img_h |
|
random_scale = np.random.random() |
|
random_scale *= (self.crop_area_range[1] - |
|
self.crop_area_range[0]) |
|
random_scale += self.crop_area_range[0] |
|
target_area = random_scale * img_area |
|
random_ratio = np.random.random() |
|
random_ratio *= (self.aspect_ratio_range[1] - |
|
self.aspect_ratio_range[0]) |
|
random_ratio += self.aspect_ratio_range[0] |
|
w = np.round(np.sqrt(target_area * random_ratio)) |
|
h = np.round(np.sqrt(target_area / random_ratio)) |
|
if np.random.random() < 0.5: |
|
w, h = h, w |
|
w = min(w, img_w) |
|
w_rel = w / img_w |
|
w = int(w) |
|
h = min(h, img_h) |
|
h_rel = h / img_h |
|
h = int(h) |
|
x = np.random.random() * (img_w - w) |
|
x_rel = x / img_w |
|
x = int(x) |
|
y = np.random.random() * (img_h - h) |
|
y_rel = y / img_h |
|
y = int(y) |
|
img = img[y:y+h, x:x+w] |
|
new_targets = [] |
|
for box in targets: |
|
cx = 0.5 * (box[0] + box[2]) |
|
cy = 0.5 * (box[1] + box[3]) |
|
if (x_rel < cx < x_rel + w_rel and |
|
y_rel < cy < y_rel + h_rel): |
|
xmin = (box[0] - x_rel) / w_rel |
|
ymin = (box[1] - y_rel) / h_rel |
|
xmax = (box[2] - x_rel) / w_rel |
|
ymax = (box[3] - y_rel) / h_rel |
|
xmin = max(0, xmin) |
|
ymin = max(0, ymin) |
|
xmax = min(1, xmax) |
|
ymax = min(1, ymax) |
|
box[:4] = [xmin, ymin, xmax, ymax] |
|
new_targets.append(box) |
|
new_targets = np.asarray(new_targets).reshape(-1, targets.shape[1]) |
|
return img, new_targets |
|
|
|
def generate(self, train=True): |
|
while True: |
|
if train: |
|
shuffle(self.train_keys) |
|
keys = self.train_keys |
|
else: |
|
shuffle(self.val_keys) |
|
keys = self.val_keys |
|
inputs = [] |
|
targets = [] |
|
for key in keys: |
|
img_path = self.path_prefix + key |
|
img = imread(img_path,mode="RGB").astype('float32') |
|
y = self.gt[key].copy() |
|
if train and self.do_crop: |
|
img, y = self.random_sized_crop(img, y) |
|
img = imresize(img, self.image_size).astype('float32') |
|
|
|
|
|
if train: |
|
shuffle(self.color_jitter) |
|
for jitter in self.color_jitter: |
|
img = jitter(img) |
|
if self.lighting_std: |
|
img = self.lighting(img) |
|
if self.hflip_prob > 0: |
|
img, y = self.horizontal_flip(img, y) |
|
if self.vflip_prob > 0: |
|
img, y = self.vertical_flip(img, y) |
|
|
|
|
|
y = self.bbox_util.assign_boxes(y) |
|
|
|
inputs.append(img) |
|
targets.append(y) |
|
if len(targets) == self.batch_size: |
|
tmp_inp = np.array(inputs) |
|
|
|
tmp_targets = np.array(targets) |
|
inputs = [] |
|
targets = [] |
|
yield preprocess_input(tmp_inp), tmp_targets |
|
|
|
path_prefix = IMAGE_DIR+"/" |
|
gen = Generator(gt, bbox_util, 5, path_prefix, |
|
train_keys, val_keys, |
|
(input_shape[0], input_shape[1]), do_crop=True) |
|
|
|
model = SSD300(input_shape, num_classes=NUM_CLASSES) |
|
|
|
def schedule(epoch, decay=0.9): |
|
return base_lr * decay**(epoch) |
|
|
|
callbacks = [keras.callbacks.ModelCheckpoint('./checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5', |
|
verbose=1, |
|
save_weights_only=True), |
|
keras.callbacks.LearningRateScheduler(schedule)] |
|
|
|
base_lr = 3e-4 |
|
optim = keras.optimizers.Adam(lr=base_lr) |
|
model.compile(optimizer=optim, |
|
loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=5.0).compute_loss) |
|
|
|
nb_epoch = 100 |
|
history = model.fit_generator(gen.generate(True), gen.train_batches, |
|
nb_epoch, verbose=1, |
|
callbacks=callbacks, |
|
validation_data=gen.generate(False), |
|
nb_val_samples=gen.val_batches, |
|
nb_worker=1) |
|
|
|
|