Spaces:
Running
Running
import math | |
import random | |
import cv2 | |
import numpy as np | |
import torch | |
from PIL import Image | |
from torchvision import transforms as T | |
from torchvision.transforms import functional as F | |
class CDistNetResize(object): | |
def __init__(self, image_shape, **kwargs): | |
self.image_shape = image_shape | |
def __call__(self, data): | |
img = data['image'] | |
_, h, w = self.image_shape | |
# keep_aspect_ratio = False | |
image_pil = Image.fromarray(np.uint8(img)) | |
image = image_pil.resize((w, h), Image.LANCZOS) | |
image = np.array(image) | |
# rgb2gray = False | |
image = image.transpose((2, 0, 1)) | |
image = image.astype(np.float32) / 128.0 - 1.0 | |
data['image'] = image | |
data['valid_ratio'] = 1 | |
return data | |
class ABINetResize(object): | |
def __init__(self, image_shape, **kwargs): | |
self.image_shape = image_shape | |
def __call__(self, data): | |
img = data['image'] | |
h, w = img.shape[:2] | |
norm_img, valid_ratio = resize_norm_img_abinet(img, self.image_shape) | |
data['image'] = norm_img | |
data['valid_ratio'] = valid_ratio | |
r = float(w) / float(h) | |
data['real_ratio'] = max(1, round(r)) | |
return data | |
def resize_norm_img_abinet(img, image_shape): | |
imgC, imgH, imgW = image_shape | |
resized_image = cv2.resize(img, (imgW, imgH), | |
interpolation=cv2.INTER_LINEAR) | |
resized_w = imgW | |
resized_image = resized_image.astype('float32') | |
resized_image = resized_image / 255.0 | |
mean = np.array([0.485, 0.456, 0.406]) | |
std = np.array([0.229, 0.224, 0.225]) | |
resized_image = (resized_image - mean[None, None, ...]) / std[None, None, | |
...] | |
resized_image = resized_image.transpose((2, 0, 1)) | |
resized_image = resized_image.astype('float32') | |
valid_ratio = min(1.0, float(resized_w / imgW)) | |
return resized_image, valid_ratio | |
class SVTRResize(object): | |
def __init__(self, image_shape, padding=True, **kwargs): | |
self.image_shape = image_shape | |
self.padding = padding | |
def __call__(self, data): | |
img = data['image'] | |
h, w = img.shape[:2] | |
norm_img, valid_ratio = resize_norm_img(img, self.image_shape, | |
self.padding) | |
data['image'] = norm_img | |
data['valid_ratio'] = valid_ratio | |
r = float(w) / float(h) | |
data['real_ratio'] = max(1, round(r)) | |
return data | |
class RecTVResize(object): | |
def __init__(self, image_shape=[32, 128], padding=True, **kwargs): | |
self.padding = padding | |
self.image_shape = image_shape | |
self.interpolation = T.InterpolationMode.BICUBIC | |
transforms = [] | |
transforms.extend([ | |
T.ToTensor(), | |
T.Normalize(0.5, 0.5), | |
]) | |
self.transforms = T.Compose(transforms) | |
def __call__(self, data): | |
img = data['image'] | |
imgH, imgW = self.image_shape | |
w, h = img.size | |
if not self.padding: | |
resized_w = imgW | |
else: | |
ratio = w / float(h) | |
if math.ceil(imgH * ratio) > imgW: | |
resized_w = imgW | |
else: | |
resized_w = int(math.ceil(imgH * ratio)) | |
resized_image = F.resize(img, (imgH, resized_w), | |
interpolation=self.interpolation) | |
img = self.transforms(resized_image) | |
if resized_w < imgW: | |
img = F.pad(img, [0, 0, imgW - resized_w, 0], fill=0.) | |
valid_ratio = min(1.0, float(resized_w / imgW)) | |
data['image'] = img | |
data['valid_ratio'] = valid_ratio | |
r = float(w) / float(h) | |
data['real_ratio'] = max(1, round(r)) | |
return data | |
class LongResize(object): | |
def __init__(self, | |
base_shape=[[64, 64], [96, 48], [112, 40], [128, 32]], | |
max_ratio=12, | |
base_h=32, | |
padding_rand=False, | |
padding_bi=False, | |
padding=True, | |
**kwargs): | |
self.base_shape = base_shape | |
self.max_ratio = max_ratio | |
self.base_h = base_h | |
self.padding = padding | |
self.padding_rand = padding_rand | |
self.padding_bi = padding_bi | |
def __call__(self, data): | |
data = resize_norm_img_long( | |
data, | |
self.base_shape, | |
self.max_ratio, | |
self.base_h, | |
self.padding, | |
self.padding_rand, | |
self.padding_bi, | |
) | |
return data | |
class SliceResize(object): | |
def __init__(self, image_shape, padding=True, max_ratio=12, **kwargs): | |
self.image_shape = image_shape | |
self.padding = padding | |
self.max_ratio = max_ratio | |
def __call__(self, data): | |
img = data['image'] | |
h, w = img.shape[:2] | |
w_bi = w // 2 | |
img_list = [ | |
img[:, :w_bi, :], img[:, w_bi:2 * w_bi, :], | |
img[:, w_bi // 2:(w_bi // 2) + w_bi, :] | |
] | |
img_reshape = [] | |
for img_s in img_list: | |
norm_img, valid_ratio = resize_norm_img_slice( | |
img_s, self.image_shape, max_ratio=self.max_ratio) | |
img_reshape.append(norm_img[None, :, :, :]) | |
data['image'] = np.concatenate(img_reshape, 0) | |
data['valid_ratio'] = valid_ratio | |
return data | |
class SliceTVResize(object): | |
def __init__(self, | |
image_shape, | |
padding=True, | |
base_shape=[[64, 64], [96, 48], [112, 40], [128, 32]], | |
max_ratio=12, | |
base_h=32, | |
**kwargs): | |
self.image_shape = image_shape | |
self.padding = padding | |
self.max_ratio = max_ratio | |
self.base_h = base_h | |
self.interpolation = T.InterpolationMode.BICUBIC | |
transforms = [] | |
transforms.extend([ | |
T.ToTensor(), | |
T.Normalize(0.5, 0.5), | |
]) | |
self.transforms = T.Compose(transforms) | |
def __call__(self, data): | |
img = data['image'] | |
w, h = img.size | |
w_ratio = ((w // h) // 2) * 2 | |
w_ratio = max(6, w_ratio) | |
img = F.resize(img, (self.base_h, self.base_h * w_ratio), | |
interpolation=self.interpolation) | |
img = self.transforms(img) | |
img_list = [] | |
for i in range(0, w_ratio // 2 - 1): | |
img_list.append(img[None, :, :, | |
i * 2 * self.base_h:(i * 2 + 4) * self.base_h]) | |
data['image'] = torch.concat(img_list, 0) | |
data['valid_ratio'] = float(w_ratio) / w | |
return data | |
class RecTVResizeRatio(object): | |
def __init__(self, | |
image_shape=[32, 128], | |
padding=True, | |
base_shape=[[64, 64], [96, 48], [112, 40], [128, 32]], | |
max_ratio=12, | |
base_h=32, | |
**kwargs): | |
self.padding = padding | |
self.image_shape = image_shape | |
self.max_ratio = max_ratio | |
self.base_shape = base_shape | |
self.base_h = base_h | |
self.interpolation = T.InterpolationMode.BICUBIC | |
transforms = [] | |
transforms.extend([ | |
T.ToTensor(), | |
T.Normalize(0.5, 0.5), | |
]) | |
self.transforms = T.Compose(transforms) | |
def __call__(self, data): | |
img = data['image'] | |
imgH, imgW = self.image_shape | |
w, h = img.size | |
gen_ratio = round(float(w) / float(h)) | |
ratio_resize = 1 if gen_ratio == 0 else gen_ratio | |
ratio_resize = min(ratio_resize, self.max_ratio) | |
imgW, imgH = self.base_shape[ratio_resize - | |
1] if ratio_resize <= 4 else [ | |
self.base_h * | |
ratio_resize, self.base_h | |
] | |
if not self.padding: | |
resized_w = imgW | |
else: | |
ratio = w / float(h) | |
if math.ceil(imgH * ratio) > imgW: | |
resized_w = imgW | |
else: | |
resized_w = int(math.ceil(imgH * ratio)) | |
resized_image = F.resize(img, (imgH, resized_w), | |
interpolation=self.interpolation) | |
img = self.transforms(resized_image) | |
if resized_w < imgW: | |
img = F.pad(img, [0, 0, imgW - resized_w, 0], fill=0.) | |
valid_ratio = min(1.0, float(resized_w / imgW)) | |
data['image'] = img | |
data['valid_ratio'] = valid_ratio | |
return data | |
class RecDynamicResize(object): | |
def __init__(self, image_shape=[32, 128], padding=True, **kwargs): | |
self.padding = padding | |
self.image_shape = image_shape | |
self.max_ratio = image_shape[1] * 1.0 / image_shape[0] | |
def __call__(self, data): | |
img = data['image'] | |
imgH, imgW = self.image_shape | |
h, w, imgC = img.shape | |
ratio = w / float(h) | |
max_wh_ratio = max(ratio, self.max_ratio) | |
imgW = int(imgH * max_wh_ratio) | |
if math.ceil(imgH * ratio) > imgW: | |
resized_w = imgW | |
else: | |
resized_w = int(math.ceil(imgH * ratio)) | |
resized_image = cv2.resize(img, (resized_w, imgH)) | |
resized_image = resized_image.astype('float32') | |
resized_image = resized_image.transpose((2, 0, 1)) / 255 | |
resized_image -= 0.5 | |
resized_image /= 0.5 | |
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) | |
padding_im[:, :, 0:resized_w] = resized_image | |
data['image'] = padding_im | |
return data | |
def resize_norm_img_slice( | |
img, | |
image_shape, | |
base_shape=[[64, 64], [96, 48], [112, 40], [128, 32]], | |
max_ratio=12, | |
base_h=32, | |
padding=True, | |
): | |
imgC, imgH, imgW = image_shape | |
h = img.shape[0] | |
w = img.shape[1] | |
gen_ratio = round(float(w) / float(h)) | |
ratio_resize = 1 if gen_ratio == 0 else gen_ratio | |
ratio_resize = min(ratio_resize, max_ratio) | |
imgW, imgH = base_shape[ratio_resize - 1] if ratio_resize <= 4 else [ | |
base_h * ratio_resize, base_h | |
] | |
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) | |
if not padding: | |
resized_image = cv2.resize(img, (imgW, imgH)) | |
resized_w = imgW | |
else: | |
ratio = w / float(h) | |
if math.ceil(imgH * ratio) > imgW: | |
resized_w = imgW | |
else: | |
resized_w = int(math.ceil(imgH * ratio * (random.random() + 0.5))) | |
resized_w = min(imgW, resized_w) | |
resized_image = cv2.resize(img, (resized_w, imgH)) | |
resized_image = resized_image.transpose((2, 0, 1)) / 255 | |
resized_image -= 0.5 | |
resized_image /= 0.5 | |
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) | |
padding_im[:, :, :resized_w] = resized_image | |
valid_ratio = min(1.0, float(resized_w / imgW)) | |
return padding_im, valid_ratio | |
def resize_norm_img(img, | |
image_shape, | |
padding=True, | |
interpolation=cv2.INTER_LINEAR): | |
imgC, imgH, imgW = image_shape | |
h = img.shape[0] | |
w = img.shape[1] | |
if not padding: | |
resized_image = cv2.resize(img, (imgW, imgH), | |
interpolation=interpolation) | |
resized_w = imgW | |
else: | |
ratio = w / float(h) | |
if math.ceil(imgH * ratio) > imgW: | |
resized_w = imgW | |
else: | |
resized_w = int(math.ceil(imgH * ratio)) | |
resized_image = cv2.resize(img, (resized_w, imgH)) | |
resized_image = resized_image.astype('float32') | |
if image_shape[0] == 1: | |
resized_image = resized_image / 255 | |
resized_image = resized_image[np.newaxis, :] | |
else: | |
resized_image = resized_image.transpose((2, 0, 1)) / 255 | |
resized_image -= 0.5 | |
resized_image /= 0.5 | |
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) | |
padding_im[:, :, 0:resized_w] = resized_image | |
valid_ratio = min(1.0, float(resized_w / imgW)) | |
return padding_im, valid_ratio | |
def resize_norm_img_long( | |
data, | |
base_shape=[[64, 64], [96, 48], [112, 40], [128, 32]], | |
max_ratio=12, | |
base_h=32, | |
padding=True, | |
padding_rand=False, | |
padding_bi=False, | |
): | |
img = data['image'] | |
h = img.shape[0] | |
w = img.shape[1] | |
gen_ratio = data.get('gen_ratio', 0) | |
if gen_ratio == 0: | |
ratio = w / float(h) | |
gen_ratio = round(ratio) if ratio > 0.5 else 1 | |
gen_ratio = min(data['gen_ratio'], max_ratio) | |
if padding_rand and random.random() < 0.5: | |
padding = False if padding else True | |
imgW, imgH = base_shape[gen_ratio - | |
1] if gen_ratio <= len(base_shape) else [ | |
base_h * gen_ratio, base_h | |
] | |
if not padding: | |
resized_image = cv2.resize(img, (imgW, imgH), | |
interpolation=cv2.INTER_LINEAR) | |
resized_w = imgW | |
else: | |
ratio = w / float(h) | |
if math.ceil(imgH * ratio) > imgW: | |
resized_w = imgW | |
else: | |
resized_w = int(math.ceil(imgH * ratio * (random.random() + 0.5))) | |
resized_w = min(imgW, resized_w) | |
resized_image = cv2.resize(img, (resized_w, imgH)) | |
resized_image = resized_image.astype('float32') | |
resized_image = resized_image.transpose((2, 0, 1)) / 255 | |
resized_image -= 0.5 | |
resized_image /= 0.5 | |
padding_im = np.zeros((3, imgH, imgW), dtype=np.float32) | |
if padding_bi and random.random() < 0.5: | |
padding_im[:, :, -resized_w:] = resized_image | |
else: | |
padding_im[:, :, :resized_w] = resized_image | |
valid_ratio = min(1.0, float(resized_w / imgW)) | |
data['image'] = padding_im | |
data['valid_ratio'] = valid_ratio | |
data['gen_ratio'] = imgW // imgH | |
data['real_ratio'] = w // h | |
return data | |
class VisionLANResize(object): | |
def __init__(self, image_shape, **kwargs): | |
self.image_shape = image_shape | |
def __call__(self, data): | |
img = data['image'] | |
imgC, imgH, imgW = self.image_shape | |
resized_image = cv2.resize(img, (imgW, imgH)) | |
resized_image = resized_image.astype('float32') | |
if imgC == 1: | |
resized_image = resized_image / 255 | |
norm_img = resized_image[np.newaxis, :] | |
else: | |
norm_img = resized_image.transpose((2, 0, 1)) / 255 | |
data['image'] = norm_img | |
data['valid_ratio'] = 1.0 | |
return data | |
class RobustScannerRecResizeImg(object): | |
def __init__(self, image_shape, width_downsample_ratio=0.25, **kwargs): | |
self.image_shape = image_shape | |
self.width_downsample_ratio = width_downsample_ratio | |
def __call__(self, data): | |
img = data['image'] | |
norm_img, resize_shape, pad_shape, valid_ratio = resize_norm_img_sar( | |
img, self.image_shape, self.width_downsample_ratio) | |
data['image'] = norm_img | |
data['resized_shape'] = resize_shape | |
data['pad_shape'] = pad_shape | |
data['valid_ratio'] = valid_ratio | |
return data | |
def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25): | |
imgC, imgH, imgW_min, imgW_max = image_shape | |
h = img.shape[0] | |
w = img.shape[1] | |
valid_ratio = 1.0 | |
# make sure new_width is an integral multiple of width_divisor. | |
width_divisor = int(1 / width_downsample_ratio) | |
# resize | |
ratio = w / float(h) | |
resize_w = math.ceil(imgH * ratio) | |
if resize_w % width_divisor != 0: | |
resize_w = round(resize_w / width_divisor) * width_divisor | |
if imgW_min is not None: | |
resize_w = max(imgW_min, resize_w) | |
if imgW_max is not None: | |
valid_ratio = min(1.0, 1.0 * resize_w / imgW_max) | |
resize_w = min(imgW_max, resize_w) | |
resized_image = cv2.resize(img, (resize_w, imgH)) | |
resized_image = resized_image.astype('float32') | |
# norm | |
if image_shape[0] == 1: | |
resized_image = resized_image / 255 | |
resized_image = resized_image[np.newaxis, :] | |
else: | |
resized_image = resized_image.transpose((2, 0, 1)) / 255 | |
resized_image -= 0.5 | |
resized_image /= 0.5 | |
resize_shape = resized_image.shape | |
padding_im = -1.0 * np.ones((imgC, imgH, imgW_max), dtype=np.float32) | |
padding_im[:, :, 0:resize_w] = resized_image | |
pad_shape = padding_im.shape | |
return padding_im, resize_shape, pad_shape, valid_ratio | |
class SRNRecResizeImg(object): | |
def __init__(self, image_shape, **kwargs): | |
self.image_shape = image_shape | |
def __call__(self, data): | |
img = data['image'] | |
norm_img = resize_norm_img_srn(img, self.image_shape) | |
data['image'] = norm_img | |
return data | |
def resize_norm_img_srn(img, image_shape): | |
imgC, imgH, imgW = image_shape | |
img_black = np.zeros((imgH, imgW)) | |
im_hei = img.shape[0] | |
im_wid = img.shape[1] | |
if im_wid <= im_hei * 1: | |
img_new = cv2.resize(img, (imgH * 1, imgH)) | |
elif im_wid <= im_hei * 2: | |
img_new = cv2.resize(img, (imgH * 2, imgH)) | |
elif im_wid <= im_hei * 3: | |
img_new = cv2.resize(img, (imgH * 3, imgH)) | |
else: | |
img_new = cv2.resize(img, (imgW, imgH)) | |
img_np = np.asarray(img_new) | |
img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY) | |
img_black[:, 0:img_np.shape[1]] = img_np | |
img_black = img_black[:, :, np.newaxis] | |
row, col, c = img_black.shape | |
c = 1 | |
return np.reshape(img_black, (c, row, col)).astype(np.float32) | |