test_ocr_1 / crop_images.py
B2W1234's picture
Upload 53 files
0a96b76 verified
import os
import numpy as np
import cv2
import pandas as pd
def crop(pts, image):
"""
Takes inputs as 8 points
and Returns cropped, masked image with a white background
"""
# Giới hạn giá trị của pts
pts[:, 0] = np.clip(pts[:, 0], 0, image.shape[1] - 1)
pts[:, 1] = np.clip(pts[:, 1], 0, image.shape[0] - 1)
rect = cv2.boundingRect(pts)
x, y, w, h = rect
x = int(x)
y = int(y)
w = int(w)
if h == 0 or w == 0:
return np.ones((10, 10, 3),
np.uint8) * 255 # Trả về một ảnh trắng 10x10, bạn có thể thay đổi kích thước này nếu muốn
cropped = image[y:y + h, x:x + w].copy()
pts = pts - pts.min(axis=0)
mask = np.zeros(cropped.shape[:2], np.uint8)
# print("Kích thước của mask:", mask.shape)
# print("Kích thước của cropped:", cropped.shape)
# print("Giá trị của pts:", pts)
cv2.drawContours(mask, [pts], -1, (255, 255, 255), -1, cv2.LINE_AA)
dst = cv2.bitwise_and(cropped, cropped, mask=mask)
bg = np.ones_like(cropped, np.uint8) * 255
cv2.bitwise_not(bg, bg, mask=mask)
dst2 = bg + dst
return dst2
def generate_words(image_name, score_bbox, image):
num_bboxes = len(score_bbox)
for num in range(num_bboxes):
bbox_coords = score_bbox[num].split(':')[-1].split(',\n')
if bbox_coords != ['{}']:
l_t = float(bbox_coords[0].strip(' array([').strip(']').split(',')[0])
t_l = float(bbox_coords[0].strip(' array([').strip(']').split(',')[1])
r_t = float(bbox_coords[1].strip(' [').strip(']').split(',')[0])
t_r = float(bbox_coords[1].strip(' [').strip(']').split(',')[1])
r_b = float(bbox_coords[2].strip(' [').strip(']').split(',')[0])
b_r = float(bbox_coords[2].strip(' [').strip(']').split(',')[1])
l_b = float(bbox_coords[3].strip(' [').strip(']').split(',')[0])
b_l = float(bbox_coords[3].strip(' [').strip(']').split(',')[1].strip(']'))
pts = np.array([[int(l_t), int(t_l)], [int(r_t), int(t_r)], [int(r_b), int(b_r)], [int(l_b), int(b_l)]])
if np.all(pts) > 0:
word = crop(pts, image)
folder = '/'.join(image_name.split('/')[:-1])
# CHANGE DIR
dir = '/content/Pipeline/Crop Words/'
if os.path.isdir(os.path.join(dir + folder)) == False:
os.makedirs(os.path.join(dir + folder))
try:
file_name = os.path.join(dir + image_name)
cv2.imwrite(
file_name + '_{}_{}_{}_{}_{}_{}_{}_{}.jpg'.format(l_t, t_l, r_t, t_r, r_b, b_r, l_b, b_l), word)
print('Image saved to ' + file_name + '_{}_{}_{}_{}_{}_{}_{}_{}.jpg'.format(l_t, t_l, r_t, t_r, r_b,
b_r, l_b, b_l))
except:
continue