Niv Sardi commited on
Commit
f7e5bce
1 Parent(s): 304ab5e

bugfix: correctly position logos and fix selenium code

Browse files
Files changed (2) hide show
  1. crawler/imtool.py +19 -13
  2. crawler/screenshot.py +3 -2
crawler/imtool.py CHANGED
@@ -76,9 +76,10 @@ def crop(fn, logos):
76
  im = cv2.imread(fn)
77
 
78
  (h, w, c) = im.shape
 
79
  (tx, ty)= (
80
- math.ceil(w/(TILE_SIZE*TILE_OVERLAP)),
81
- math.ceil(h/(TILE_SIZE*TILE_OVERLAP))
82
  )
83
 
84
  print('shape', basename, tx, ty, w, h, logos)
@@ -86,18 +87,21 @@ def crop(fn, logos):
86
  for y in range(ty):
87
  color = (0,x*(255/tx),y*(255/ty))
88
 
89
- (tw, th) = (min(w, TILE_SIZE), min(h, TILE_SIZE))
90
- f = BoundingBox(
91
- (w - tw)*x/(tx),
92
- (h - th)*y/(ty),
93
- tw,
94
- th
95
- )
 
 
 
 
96
 
97
  start = floor_point(f.x, f.y)
98
  end = floor_point(f.x + f.w, f.y + f.h)
99
 
100
- print(x, y, start, end, logos)
101
  im = cv2.rectangle(im, start, end, color, 10)
102
  li = []
103
  for l in logos:
@@ -144,9 +148,9 @@ def crop(fn, logos):
144
  with open(txt_name, 'w') as f:
145
  for p in li:
146
  print(p)
147
- im = cv2.rectangle(im,
148
- floor_point(p.x, p.y),
149
- floor_point(p.x + p.w, p.y + p.h),
150
  c,
151
  5)
152
  cx = p.w/2 + p.x
@@ -155,6 +159,8 @@ def crop(fn, logos):
155
  a = f"{basename} {cx/TILE_SIZE} {cy/TILE_SIZE} {p.w/TILE_SIZE} {p.h/TILE_SIZE}"
156
  f.write(a)
157
  print(a)
 
 
158
  cv2.imwrite(f'{debug_out}/{basename}.debug.png', im)
159
 
160
  if __name__ == '__main__':
 
76
  im = cv2.imread(fn)
77
 
78
  (h, w, c) = im.shape
79
+ (tw, th) = (min(w, TILE_SIZE), min(h, TILE_SIZE))
80
  (tx, ty)= (
81
+ math.ceil(w/(tw*TILE_OVERLAP)),
82
+ math.ceil(h/(th*TILE_OVERLAP))
83
  )
84
 
85
  print('shape', basename, tx, ty, w, h, logos)
 
87
  for y in range(ty):
88
  color = (0,x*(255/tx),y*(255/ty))
89
 
90
+
91
+ if tx < 2:
92
+ xs = 0
93
+ else:
94
+ xs = (w - tw)*x/(tx - 1)
95
+ if ty < 2:
96
+ ys = 0
97
+ else:
98
+ ys = (h - th)*y/(ty - 1)
99
+
100
+ f = BoundingBox(xs, ys, tw, th)
101
 
102
  start = floor_point(f.x, f.y)
103
  end = floor_point(f.x + f.w, f.y + f.h)
104
 
 
105
  im = cv2.rectangle(im, start, end, color, 10)
106
  li = []
107
  for l in logos:
 
148
  with open(txt_name, 'w') as f:
149
  for p in li:
150
  print(p)
151
+ dim = cv2.rectangle(nim,
152
+ floor_point(p.x - p.w/2, p.y - p.h/2),
153
+ floor_point(p.x + p.w/2, p.y + p.h/2),
154
  c,
155
  5)
156
  cx = p.w/2 + p.x
 
159
  a = f"{basename} {cx/TILE_SIZE} {cy/TILE_SIZE} {p.w/TILE_SIZE} {p.h/TILE_SIZE}"
160
  f.write(a)
161
  print(a)
162
+ cv2.imwrite(f'{debug_out}/{basename}{x}{y}.debug.png', dim)
163
+
164
  cv2.imwrite(f'{debug_out}/{basename}.debug.png', im)
165
 
166
  if __name__ == '__main__':
crawler/screenshot.py CHANGED
@@ -17,11 +17,12 @@ options.add_argument("--window-size=1920x8000")
17
  def coord_to_point(c):
18
  x = math.floor(c['x'] + c['width']/2)
19
  y = math.floor(c['y'] + c['height']/2)
20
- return f"{x} {y} {math.roof(c['width'])} {math.roof(c['height'])}"
21
 
22
  driver = webdriver.Firefox(options=options)
23
  def sc_entity(e: Entity):
24
  print(e)
 
25
  driver.get(e.url)
26
  driver.save_screenshot(f"{e.DATA_PATH}/{e.bco}.png")
27
  driver.save_full_page_screenshot(f"{e.DATA_PATH}/{e.bco}.full.png")
@@ -29,7 +30,7 @@ def sc_entity(e: Entity):
29
  logos = driver.find_elements(By.CSS_SELECTOR, selectors.logo)
30
  with open(f"{e.DATA_PATH}/{e.bco}.full.txt", 'w') as f:
31
  for i in logos:
32
- f.write(f"{e.bco} {coord_to_point(i.rect)}")
33
 
34
  if __name__ == '__main__':
35
  sc_entity(Entity.from_dict({'url': 'http://www.bbva.com.ar', 'bco': 'debug'}))
 
17
  def coord_to_point(c):
18
  x = math.floor(c['x'] + c['width']/2)
19
  y = math.floor(c['y'] + c['height']/2)
20
+ return f"{x} {y} {math.ceil(c['width'])} {math.ceil(c['height'])}"
21
 
22
  driver = webdriver.Firefox(options=options)
23
  def sc_entity(e: Entity):
24
  print(e)
25
+ driver.implicitly_wait(10)
26
  driver.get(e.url)
27
  driver.save_screenshot(f"{e.DATA_PATH}/{e.bco}.png")
28
  driver.save_full_page_screenshot(f"{e.DATA_PATH}/{e.bco}.full.png")
 
30
  logos = driver.find_elements(By.CSS_SELECTOR, selectors.logo)
31
  with open(f"{e.DATA_PATH}/{e.bco}.full.txt", 'w') as f:
32
  for i in logos:
33
+ f.write(f"{e.bco} {coord_to_point(i.rect)}\n")
34
 
35
  if __name__ == '__main__':
36
  sc_entity(Entity.from_dict({'url': 'http://www.bbva.com.ar', 'bco': 'debug'}))