MAERec-Gradio / mmocr /utils /img_utils.py
Mountchicken's picture
Upload 704 files
9bf4bd7
raw
history blame
3.75 kB
# Copyright (c) OpenMMLab. All rights reserved.
import cv2
import numpy as np
from mmengine.utils import is_seq_of
from shapely.geometry import LineString, Point
from .bbox_utils import bbox_jitter
from .polygon_utils import sort_vertex
def warp_img(src_img,
box,
jitter=False,
jitter_ratio_x=0.5,
jitter_ratio_y=0.1):
"""Crop box area from image using opencv warpPerspective.
Args:
src_img (np.array): Image before cropping.
box (list[float | int]): Coordinates of quadrangle.
jitter (bool): Whether to jitter the box.
jitter_ratio_x (float): Horizontal jitter ratio relative to the height.
jitter_ratio_y (float): Vertical jitter ratio relative to the height.
Returns:
np.array: The warped image.
"""
assert is_seq_of(box, (float, int))
assert len(box) == 8
h, w = src_img.shape[:2]
points_x = [min(max(x, 0), w) for x in box[0:8:2]]
points_y = [min(max(y, 0), h) for y in box[1:9:2]]
points_x, points_y = sort_vertex(points_x, points_y)
if jitter:
bbox_jitter(
points_x,
points_y,
jitter_ratio_x=jitter_ratio_x,
jitter_ratio_y=jitter_ratio_y)
points = [Point(points_x[i], points_y[i]) for i in range(4)]
edges = [
LineString([points[i], points[i + 1 if i < 3 else 0]])
for i in range(4)
]
pts1 = np.float32([[points[i].x, points[i].y] for i in range(4)])
box_width = max(edges[0].length, edges[2].length)
box_height = max(edges[1].length, edges[3].length)
pts2 = np.float32([[0, 0], [box_width, 0], [box_width, box_height],
[0, box_height]])
M = cv2.getPerspectiveTransform(pts1, pts2)
dst_img = cv2.warpPerspective(src_img, M,
(int(box_width), int(box_height)))
return dst_img
def crop_img(src_img, box, long_edge_pad_ratio=0.4, short_edge_pad_ratio=0.2):
"""Crop text region given the bounding box which might be slightly padded.
The bounding box is assumed to be a quadrangle and tightly bound the text
region.
Args:
src_img (np.array): The original image.
box (list[float | int]): Points of quadrangle.
long_edge_pad_ratio (float): The ratio of padding to the long edge. The
padding will be the length of the short edge * long_edge_pad_ratio.
Defaults to 0.4.
short_edge_pad_ratio (float): The ratio of padding to the short edge.
The padding will be the length of the long edge *
short_edge_pad_ratio. Defaults to 0.2.
Returns:
np.array: The cropped image.
"""
assert is_seq_of(box, (float, int))
assert len(box) == 8
assert 0. <= long_edge_pad_ratio < 1.0
assert 0. <= short_edge_pad_ratio < 1.0
h, w = src_img.shape[:2]
points_x = np.clip(np.array(box[0::2]), 0, w)
points_y = np.clip(np.array(box[1::2]), 0, h)
box_width = np.max(points_x) - np.min(points_x)
box_height = np.max(points_y) - np.min(points_y)
shorter_size = min(box_height, box_width)
if box_height < box_width:
horizontal_pad = long_edge_pad_ratio * shorter_size
vertical_pad = short_edge_pad_ratio * shorter_size
else:
horizontal_pad = short_edge_pad_ratio * shorter_size
vertical_pad = long_edge_pad_ratio * shorter_size
left = np.clip(int(np.min(points_x) - horizontal_pad), 0, w)
top = np.clip(int(np.min(points_y) - vertical_pad), 0, h)
right = np.clip(int(np.max(points_x) + horizontal_pad), 0, w)
bottom = np.clip(int(np.max(points_y) + vertical_pad), 0, h)
dst_img = src_img[top:bottom, left:right]
return dst_img