Niv Sardi commited on
Commit
8f69832
1 Parent(s): ae7097b

augmentation, first pass

Browse files

Signed-off-by: Niv Sardi <xaiki@evilgiggle.com>

README.org CHANGED
@@ -34,7 +34,13 @@ https://github.com/ModelDepot/tfjs-yolo-tiny
34
  https://github.com/Hyuto/yolov5-tfjs
35
 
36
  ** augmentation
37
- https://github.com/srp-31/Data-Augmentation-for-Object-Detection-YOLO-
 
 
 
 
 
 
38
 
39
  ** proveedores
40
  http://www.bcra.gov.ar/SistemasFinancierosYdePagos/Proveedores-servicios-de-pago-ofrecen-cuentas-de-pago.asp
 
34
  https://github.com/Hyuto/yolov5-tfjs
35
 
36
  ** augmentation
37
+ there were a lot of augmentation solutions out there, because it had better
38
+ piplines and multicore support we went with:
39
+ - https://github.com/aleju/imgaug
40
+
41
+ but leaving the other here for refs
42
+ - https://github.com/srp-31/Data-Augmentation-for-Object-Detection-YOLO-
43
+ - https://github.com/mdbloice/Augmentor
44
 
45
  ** proveedores
46
  http://www.bcra.gov.ar/SistemasFinancierosYdePagos/Proveedores-servicios-de-pago-ofrecen-cuentas-de-pago.asp
python/augment.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import math
4
+ import random
5
+
6
+ from io import BytesIO
7
+ import numpy as np
8
+ from cairosvg import svg2png
9
+ import cv2
10
+
11
+ import filetype
12
+ from filetype.match import image_matchers
13
+
14
+ import imgaug as ia
15
+ from imgaug import augmenters as iaa
16
+ from imgaug.augmentables.batches import UnnormalizedBatch
17
+
18
+ from common import defaults, mkdir
19
+ import imtool
20
+ import pipelines
21
+
22
+ BATCH_SIZE = 16
23
+
24
+ mkdir.make_dirs([defaults.AUGMENTED_IMAGES_PATH, defaults.AUGMENTED_LABELS_PATH])
25
+
26
+ logo_images = []
27
+ background_images = [d for d in os.scandir(defaults.IMAGES_PATH)]
28
+
29
+ stats = {
30
+ 'failed': 0,
31
+ 'ok': 0
32
+ }
33
+
34
+ for d in os.scandir(defaults.LOGOS_DATA_PATH):
35
+ img = None
36
+ if not d.is_file():
37
+ stats['failed'] += 1
38
+ continue
39
+
40
+ try:
41
+ if filetype.match(d.path, matchers=image_matchers):
42
+ img = cv2.imread(d.path, cv2.IMREAD_UNCHANGED)
43
+ else:
44
+ png = svg2png(url=d.path)
45
+ img = cv2.imdecode(np.asarray(bytearray(png), dtype=np.uint8), cv2.IMREAD_UNCHANGED)
46
+ stats['ok'] += 1
47
+
48
+ (h, w, c) = img.shape
49
+ if c == 3:
50
+ img = imtool.add_alpha(img)
51
+
52
+ if img.ndim < 3:
53
+ print(f'very bad dim: {img.ndim}')
54
+
55
+ img = imtool.remove_white(img)
56
+ (h, w, c) = img.shape
57
+
58
+ assert(w > 10)
59
+ assert(h > 10)
60
+
61
+ logo_images.append(img)
62
+ except Exception as e:
63
+ stats['failed'] += 1
64
+ print(f'error loading: {d.path}: {e}')
65
+
66
+ print(stats)
67
+ batches = [UnnormalizedBatch(images=logo_images[i:i+BATCH_SIZE])
68
+ for i in range(math.floor(len(logo_images)/BATCH_SIZE))]
69
+
70
+ # We use a single, very fast augmenter here to show that batches
71
+ # are only loaded once there is space again in the buffer.
72
+ pipeline = pipelines.HUGE
73
+
74
+ def create_generator(lst):
75
+ for b in lst:
76
+ print(f"Loading next unaugmented batch...")
77
+ yield b
78
+
79
+ batches_generator = create_generator(batches)
80
+
81
+ with pipeline.pool(processes=-1, seed=1) as pool:
82
+ batches_aug = pool.imap_batches(batches_generator, output_buffer_size=5)
83
+
84
+ print(f"Requesting next augmented batch...")
85
+ for i, batch_aug in enumerate(batches_aug):
86
+ idx = list(range(len(batch_aug.images_aug)))
87
+ random.shuffle(idx)
88
+ for j, d in enumerate(background_images):
89
+ img = imtool.remove_white(cv2.imread(d.path))
90
+ basename = d.name.replace('.png', '') + f'.{i}.{j}'
91
+
92
+ anotations = []
93
+ for k in range(math.floor(len(batch_aug.images_aug)/3)):
94
+ logo = batch_aug.images_aug[(j+k)%len(batch_aug.images_aug)]
95
+ try:
96
+ img, bb, (w, h) = imtool.mix(img, logo, random.random(), random.random())
97
+ anotations.append(f'0 {bb.x/w} {bb.y/h} {bb.w/w} {bb.h/h}')
98
+ except AssertionError:
99
+ print(f'couldnt process {i}, {j}')
100
+
101
+ try:
102
+ cv2.imwrite(f'{defaults.AUGMENTED_IMAGES_PATH}/{basename}.png', img)
103
+ label_path = f"{defaults.AUGMENTED_LABELS_PATH}/{basename}.txt"
104
+ with open(label_path, 'a') as f:
105
+ f.write('\n'.join(anotations))
106
+ except Exception:
107
+ print(f'couldnt write image {basename}')
108
+
109
+ if i < len(batches)-1:
110
+ print("Requesting next augmented batch...")
111
+
python/imtool.py CHANGED
@@ -3,6 +3,7 @@
3
  import os
4
  import math
5
  import cv2
 
6
  from typing import NamedTuple
7
 
8
  from entity import Entity
@@ -38,16 +39,26 @@ class Centroid(BoundingBox):
38
 
39
  def read_bounding_boxes(filename):
40
  boxes = []
 
41
  with open(filename, 'r') as f:
42
  lines = f.readlines()
43
  for l in lines:
44
- (bco, x,y,w,h) = [float(i) for i in l.split(' ')]
 
45
  if x < 0 or y < 0 or w < 10 or h < 10:
46
- print(f"dropping logo, it has inconsistent size: {w}x{h}+{x}x{y}")
47
  continue
48
  boxes.append(BoundingBox(x,y,w,h))
49
  return bco, boxes
50
 
 
 
 
 
 
 
 
 
51
  def floor_point(x, y):
52
  return (math.floor(x), math.floor(y))
53
 
@@ -64,6 +75,39 @@ def cut_logo(im, l):
64
  (x, y, w, h) = floor_logo(l)
65
  return im[x:w, y:h]
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def crop(id, fn, logos):
68
  basename = os.path.basename(fn).replace('.png', '')
69
  img_out = f"./data/squares/images"
@@ -85,7 +129,7 @@ def crop(id, fn, logos):
85
  for x in range(tx):
86
  for y in range(ty):
87
  color = (0,x*(255/tx),y*(255/ty))
88
-
89
 
90
  if tx < 2:
91
  xs = 0
@@ -104,6 +148,10 @@ def crop(id, fn, logos):
104
  rim = cv2.rectangle(rim, start, end, color, 10)
105
  li = []
106
  for l in logos:
 
 
 
 
107
  def intersect():
108
  six = l.x - f.x
109
  siy = l.y - f.y
@@ -135,8 +183,6 @@ def crop(id, fn, logos):
135
  if p:
136
  li.append(p)
137
 
138
- c = (255, 0, 0)
139
-
140
  nim = im[start[1]:end[1], start[0]:end[0]]
141
  rnim = rim[start[1]:end[1], start[0]:end[0]]
142
  img_name =f"{img_out}/{basename}-x{x}y{y}.jpg"
@@ -152,7 +198,7 @@ def crop(id, fn, logos):
152
  dim = cv2.rectangle(rnim,
153
  floor_point(cx - p.w/2, cy - p.h/2),
154
  floor_point(cx + p.w/2, cy + p.h/2),
155
- c,
156
  5)
157
 
158
  a = f"{int(id)} {cx/TILE_SIZE} {cy/TILE_SIZE} {p.w/TILE_SIZE} {p.h/TILE_SIZE}\n"
@@ -160,7 +206,7 @@ def crop(id, fn, logos):
160
  print(a)
161
  cv2.imwrite(f'{debug_out}/{basename}{x}{y}.debug.png', dim)
162
 
163
- cv2.imwrite(f'{debug_out}/{basename}.debug.png', im)
164
 
165
  if __name__ == '__main__':
166
  i = 0
 
3
  import os
4
  import math
5
  import cv2
6
+ import numpy as np
7
  from typing import NamedTuple
8
 
9
  from entity import Entity
 
39
 
40
  def read_bounding_boxes(filename):
41
  boxes = []
42
+ bco = None
43
  with open(filename, 'r') as f:
44
  lines = f.readlines()
45
  for l in lines:
46
+ (b, x,y,w,h) = [float(i) for i in l.split(' ')]
47
+ bco = b
48
  if x < 0 or y < 0 or w < 10 or h < 10:
49
+ print(f"dropping logo, it has inconsistent size: {w}x{h}@{x}x{y}")
50
  continue
51
  boxes.append(BoundingBox(x,y,w,h))
52
  return bco, boxes
53
 
54
+ def coord_dict_to_point(c):
55
+ return coord_to_point(c['x'], c['y'], c['width'], c['heigh'])
56
+
57
+ def coord_to_point(cx, cy, cw, ch):
58
+ x = math.floor(cx + cw/2)
59
+ y = math.floor(cy + ch/2)
60
+ return f"{x} {y} {math.ceil(cw)} {math.ceil(ch)}"
61
+
62
  def floor_point(x, y):
63
  return (math.floor(x), math.floor(y))
64
 
 
75
  (x, y, w, h) = floor_logo(l)
76
  return im[x:w, y:h]
77
 
78
+ def add_alpha(img):
79
+ b, g, r = cv2.split(img)
80
+ a = np.ones(b.shape, dtype=b.dtype) * 50
81
+ return cv2.merge((b,g,r,a))
82
+
83
+ def remove_white(img):
84
+ gray = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
85
+ gray = 255*(gray<128)
86
+ coords = cv2.findNonZero(gray)
87
+ x, y, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box
88
+ rect = img[y:y+h, x:x+w] # Crop the image - note we do this on the original image
89
+
90
+ return rect
91
+
92
+ def mix(a, b, fx, fy):
93
+ (ah, aw, ac) = a.shape
94
+ (bh, bw, bc) = b.shape
95
+
96
+ assert(aw > bw)
97
+ assert(ah > bh)
98
+
99
+ x = math.floor(fx*(aw - bw))
100
+ y = math.floor(fy*(ah - bh))
101
+
102
+ mat = a[y:y+bh,x:x+bw]
103
+ cols = b[:, :, :3]
104
+ alpha = b[:, :, 3]/255
105
+ mask = np.dstack((alpha, alpha, alpha))
106
+
107
+ a[y:y+bh,x:x+bw] = mat * (1 - mask) + cols * mask
108
+
109
+ return a, BoundingBox(x, y, bw, bh), (aw, ah)
110
+
111
  def crop(id, fn, logos):
112
  basename = os.path.basename(fn).replace('.png', '')
113
  img_out = f"./data/squares/images"
 
129
  for x in range(tx):
130
  for y in range(ty):
131
  color = (0,x*(255/tx),y*(255/ty))
132
+ logo_color = (255, 0, 0)
133
 
134
  if tx < 2:
135
  xs = 0
 
148
  rim = cv2.rectangle(rim, start, end, color, 10)
149
  li = []
150
  for l in logos:
151
+ rim = cv2.rectangle(rim,
152
+ floor_point(l.x, l.y),
153
+ floor_point(l.x + l.w, l.y + l.h),
154
+ logo_color, 5)
155
  def intersect():
156
  six = l.x - f.x
157
  siy = l.y - f.y
 
183
  if p:
184
  li.append(p)
185
 
 
 
186
  nim = im[start[1]:end[1], start[0]:end[0]]
187
  rnim = rim[start[1]:end[1], start[0]:end[0]]
188
  img_name =f"{img_out}/{basename}-x{x}y{y}.jpg"
 
198
  dim = cv2.rectangle(rnim,
199
  floor_point(cx - p.w/2, cy - p.h/2),
200
  floor_point(cx + p.w/2, cy + p.h/2),
201
+ logo_color,
202
  5)
203
 
204
  a = f"{int(id)} {cx/TILE_SIZE} {cy/TILE_SIZE} {p.w/TILE_SIZE} {p.h/TILE_SIZE}\n"
 
206
  print(a)
207
  cv2.imwrite(f'{debug_out}/{basename}{x}{y}.debug.png', dim)
208
 
209
+ cv2.imwrite(f'{debug_out}/{basename}.debug.png', rim)
210
 
211
  if __name__ == '__main__':
212
  i = 0
python/pipelines.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import imgaug as ia
2
+ from imgaug import augmenters as iaa
3
+
4
+ # Sometimes(0.5, ...) applies the given augmenter in 50% of all cases,
5
+ # e.g. Sometimes(0.5, GaussianBlur(0.3)) would blur roughly every second image.
6
+ sometimes = lambda aug: iaa.Sometimes(0.1, aug)
7
+
8
+ # Define our sequence of augmentation steps that will be applied to every image
9
+ # All augmenters with per_channel=0.5 will sample one value _per image_
10
+ # in 50% of all cases. In all other cases they will sample new values
11
+ # _per channel_.
12
+
13
+ HUGE = sometimes(iaa.Sequential(
14
+ [
15
+ # apply the following augmenters to most images
16
+ iaa.Fliplr(0.5), # horizontally flip 50% of all images
17
+ iaa.Flipud(0.2), # vertically flip 20% of all images
18
+ # crop images by -5% to 10% of their height/width
19
+ sometimes(iaa.CropAndPad(
20
+ percent=(-0.05, 0.1),
21
+ pad_mode=ia.ALL,
22
+ pad_cval=(0, 255)
23
+ )),
24
+ sometimes(iaa.Affine(
25
+ scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis
26
+ translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis)
27
+ rotate=(-45, 45), # rotate by -45 to +45 degrees
28
+ shear=(-16, 16), # shear by -16 to +16 degrees
29
+ order=[0, 1], # use nearest neighbour or bilinear interpolation (fast)
30
+ cval=(0, 255), # if mode is constant, use a cval between 0 and 255
31
+ mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples)
32
+ )),
33
+ # execute 0 to 5 of the following (less important) augmenters per image
34
+ # don't execute all of them, as that would often be way too strong
35
+ iaa.SomeOf((0, 5),
36
+ [
37
+ sometimes(iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200))), # convert images into their superpixel representation
38
+ iaa.OneOf([
39
+ iaa.GaussianBlur((0, 3.0)), # blur images with a sigma between 0 and 3.0
40
+ iaa.AverageBlur(k=(2, 7)), # blur image using local means with kernel sizes between 2 and 7
41
+ iaa.MedianBlur(k=(3, 11)), # blur image using local medians with kernel sizes between 2 and 7
42
+ ]),
43
+ iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images
44
+ iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images
45
+ # search either for all edges or for directed edges,
46
+ # blend the result with the original image using a blobby mask
47
+ iaa.SimplexNoiseAlpha(iaa.OneOf([
48
+ iaa.EdgeDetect(alpha=(0.5, 1.0)),
49
+ iaa.DirectedEdgeDetect(alpha=(0.5, 1.0), direction=(0.0, 1.0)),
50
+ ])),
51
+ iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5), # add gaussian noise to images
52
+ iaa.OneOf([
53
+ iaa.Dropout((0.01, 0.1), per_channel=0.5), # randomly remove up to 10% of the pixels
54
+ iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2),
55
+ ]),
56
+ iaa.Invert(0.05, per_channel=True), # invert color channels
57
+ iaa.Add((-10, 10), per_channel=0.5), # change brightness of images (by -10 to 10 of original value)
58
+ iaa.AddToHueAndSaturation((-20, 20)), # change hue and saturation
59
+ # either change the brightness of the whole image (sometimes
60
+ # per channel) or change the brightness of subareas
61
+ iaa.OneOf([
62
+ iaa.Multiply((0.5, 1.5), per_channel=0.5),
63
+ iaa.FrequencyNoiseAlpha(
64
+ exponent=(-4, 0),
65
+ first=iaa.Multiply((0.5, 1.5), per_channel=True),
66
+ second=iaa.LinearContrast((0.5, 2.0))
67
+ )
68
+ ]),
69
+ iaa.LinearContrast((0.5, 2.0), per_channel=0.5), # improve or worsen the contrast
70
+ iaa.Grayscale(alpha=(0.0, 1.0)),
71
+ sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths)
72
+ sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))), # sometimes move parts of the image around
73
+ sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.1)))
74
+ ],
75
+ random_order=True
76
+ )
77
+ ],
78
+ random_order=True
79
+ ))
python/requirements.txt CHANGED
@@ -1,4 +1,11 @@
1
- bs4==0.0.1
 
 
 
 
 
 
2
  progress==1.6
3
- inotify
4
- requests
 
 
1
+ beautifulsoup4==4.11.1
2
+ CairoSVG==2.5.2
3
+ filetype==1.1.0
4
+ imgaug==0.4.0
5
+ inotify==0.2.10
6
+ numpy==1.23.2
7
+ opencv_python==4.6.0.66
8
  progress==1.6
9
+ PyYAML==6.0
10
+ requests==2.27.1
11
+ selenium==4.4.3
python/screenshot.py CHANGED
@@ -10,16 +10,12 @@ from selenium.webdriver.common.by import By
10
  from common import selectors
11
  from entity import Entity
12
  from common import defaults,mkdir
 
13
 
14
  options = webdriver.FirefoxOptions()
15
  options.add_argument("--headless")
16
  options.add_argument("--window-size=1920x8000")
17
 
18
- def coord_to_point(c):
19
- x = math.floor(c['x'] + c['width']/2)
20
- y = math.floor(c['y'] + c['height']/2)
21
- return f"{x} {y} {math.ceil(c['width'])} {math.ceil(c['height'])}"
22
-
23
  driver = webdriver.Firefox(options=options)
24
  def sc_entity(e: Entity):
25
  print(f'screenshoting: {e}')
@@ -38,7 +34,7 @@ def sc_entity(e: Entity):
38
  logos.extend(driver.find_elements(By.CSS_SELECTOR, selectors.cls_logo) or [])
39
  with open(f"{defaults.LABELS_PATH}/{e.bco}.full.txt", 'w') as f:
40
  for i in logos:
41
- f.write(f"{e.id} {coord_to_point(i.rect)}\n")
42
 
43
  if __name__ == '__main__':
44
  sc_entity(Entity.from_dict({'url': 'http://www.bbva.com.ar', 'bco': 'debug'}))
 
10
  from common import selectors
11
  from entity import Entity
12
  from common import defaults,mkdir
13
+ from imtool import coord_dict_to_point
14
 
15
  options = webdriver.FirefoxOptions()
16
  options.add_argument("--headless")
17
  options.add_argument("--window-size=1920x8000")
18
 
 
 
 
 
 
19
  driver = webdriver.Firefox(options=options)
20
  def sc_entity(e: Entity):
21
  print(f'screenshoting: {e}')
 
34
  logos.extend(driver.find_elements(By.CSS_SELECTOR, selectors.cls_logo) or [])
35
  with open(f"{defaults.LABELS_PATH}/{e.bco}.full.txt", 'w') as f:
36
  for i in logos:
37
+ f.write(f"{e.id} {coord_dict_to_point(i.rect)}\n")
38
 
39
  if __name__ == '__main__':
40
  sc_entity(Entity.from_dict({'url': 'http://www.bbva.com.ar', 'bco': 'debug'}))
python/web.py CHANGED
@@ -17,14 +17,14 @@ def get_page(e: Entity):
17
 
18
  def get_cert(e: Entity):
19
  ssl_url = e.url.split("/")[2]
20
- mkdir.make_dirs(defaults.CERTS_PATH)
21
  try:
22
  cert = ssl.get_server_certificate((ssl_url, 443), ca_certs=None)
23
  fn = f"{defaults.CERTS_PATH}/{e.bco}.cert"
24
  with open(fn, 'w') as f:
25
  f.write(cert)
26
  except Exception as err:
27
- with open(f"{fn}.error.log", 'w+') as f:
28
  f.write(str(err))
29
  return fn
30
 
@@ -40,7 +40,7 @@ def get_logos(e: Entity, page):
40
  logos.extend(soup.select(selectors.id_logo))
41
  logos.extend(soup.select(selectors.cls_logo))
42
 
43
- mkdir.make_dirs(defaults.LOGOS_DATA_PATH)
44
 
45
  i = 0
46
  lfn = []
 
17
 
18
  def get_cert(e: Entity):
19
  ssl_url = e.url.split("/")[2]
20
+ mkdir.make_dirs([defaults.CERTS_PATH])
21
  try:
22
  cert = ssl.get_server_certificate((ssl_url, 443), ca_certs=None)
23
  fn = f"{defaults.CERTS_PATH}/{e.bco}.cert"
24
  with open(fn, 'w') as f:
25
  f.write(cert)
26
  except Exception as err:
27
+ with open(f"{defaults.DATA_PATH}/{e.bco}.error.log", 'w+') as f:
28
  f.write(str(err))
29
  return fn
30
 
 
40
  logos.extend(soup.select(selectors.id_logo))
41
  logos.extend(soup.select(selectors.cls_logo))
42
 
43
+ mkdir.make_dirs([defaults.LOGOS_DATA_PATH])
44
 
45
  i = 0
46
  lfn = []