|
import os |
|
from typing import Callable |
|
import cv2 |
|
import warnings |
|
import numpy as np |
|
from image_processing.image import is_contour_rectangular, apply_adaptive_threshold, group_contours_horizontally, group_contours_vertically, adaptive_hconcat, adaptive_vconcat, group_bounding_boxes_horizontally, group_bounding_boxes_vertically |
|
from myutils.myutils import load_images, load_image |
|
from tqdm import tqdm |
|
from image_processing.model import model |
|
from manga_panel_processor import sort_panels_by_column_then_row |
|
|
|
class OutputMode: |
|
BOUNDING = 'bounding' |
|
MASKED = 'masked' |
|
|
|
def from_index(index: int) -> str: |
|
return [OutputMode.BOUNDING, OutputMode.MASKED][index] |
|
|
|
|
|
class MergeMode: |
|
NONE = 'none' |
|
VERTICAL = 'vertical' |
|
HORIZONTAL = 'horizontal' |
|
|
|
def from_index(index: int) -> str: |
|
return [MergeMode.NONE, MergeMode.VERTICAL, MergeMode.HORIZONTAL][index] |
|
|
|
|
|
def get_background_intensity_range(grayscale_image: np.ndarray, min_range: int = 1) -> tuple[int, int]: |
|
""" |
|
Returns the minimum and maximum intensity values of the background of the image |
|
""" |
|
edges = [grayscale_image[-1, :], grayscale_image[0, :], grayscale_image[:, 0], grayscale_image[:, -1]] |
|
sorted_edges = sorted(edges, key=lambda x: np.var(x)) |
|
|
|
least_varied_edge = sorted_edges[0] |
|
|
|
max_intensity = max(least_varied_edge) |
|
min_intensity = max(min(min(least_varied_edge), max_intensity - min_range), 0) |
|
|
|
return min_intensity, max_intensity |
|
|
|
|
|
def generate_background_mask(grayscale_image: np.ndarray) -> np.ndarray: |
|
""" |
|
Generates a mask by focusing on the largest area of white pixels |
|
""" |
|
WHITE = 255 |
|
LESS_WHITE, _ = get_background_intensity_range(grayscale_image, 25) |
|
LESS_WHITE = max(LESS_WHITE, 240) |
|
|
|
ret, thresh = cv2.threshold(grayscale_image, LESS_WHITE, WHITE, cv2.THRESH_BINARY) |
|
nlabels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh) |
|
|
|
mask = np.zeros_like(thresh) |
|
|
|
PAGE_TO_SEGMENT_RATIO = 1024 |
|
|
|
halting_area_size = mask.size // PAGE_TO_SEGMENT_RATIO |
|
|
|
mask_height, mask_width = mask.shape |
|
base_background_size_error_threshold = 0.05 |
|
whole_background_min_width = mask_width * (1 - base_background_size_error_threshold) |
|
whole_background_min_height = mask_height * (1 - base_background_size_error_threshold) |
|
|
|
for i in np.argsort(stats[1:, 4])[::-1]: |
|
contour_index = i + 1 |
|
x, y, w, h, area = stats[contour_index] |
|
if area < halting_area_size: |
|
break |
|
if ( |
|
(w > whole_background_min_width) or |
|
(h > whole_background_min_height) or |
|
(is_contour_rectangular(cv2.findContours((labels == contour_index).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0][0])) |
|
): |
|
mask[labels == contour_index] = WHITE |
|
|
|
mask = cv2.dilate(mask, np.ones((3, 3), np.uint8), iterations=2) |
|
|
|
return mask |
|
|
|
|
|
def extract_panels( |
|
image: np.ndarray, |
|
panel_contours: list[np.ndarray], |
|
accept_page_as_panel: bool = True, |
|
mode: str = OutputMode.BOUNDING, |
|
fill_in_color: tuple[int, int, int] = (0, 0, 0), |
|
) -> list[np.ndarray]: |
|
""" |
|
Extracts panels from the image using the given contours corresponding to the panels |
|
|
|
Parameters: |
|
- image: The image to extract the panels from |
|
- panel_contours: The contours corresponding to the panels |
|
- accept_page_as_panel: Whether to accept the whole page as a panel |
|
- mode: The mode to use for extraction |
|
- 'masked': Extracts the panels by cuting out only the inside of the contours |
|
- 'bounding': Extracts the panels by using the bounding boxes of the contours |
|
- fill_in_color: The color to fill in the background of the panel images |
|
""" |
|
height, width = image.shape[:2] |
|
|
|
returned_panels = [] |
|
|
|
for contour in panel_contours: |
|
x, y, w, h = cv2.boundingRect(contour) |
|
|
|
if not accept_page_as_panel and ((w >= width * 0.99) or (h >= height * 0.99)): |
|
continue |
|
|
|
if mode == 'masked': |
|
mask = np.zeros_like(image) |
|
cv2.drawContours(mask, [contour], -1, (255, 255, 255), -1) |
|
masked_image = cv2.bitwise_and(image, mask) |
|
fitted_panel = masked_image[y:y + h, x:x + w] |
|
fitted_panel = cv2.bitwise_or(cv2.bitwise_and(cv2.bitwise_not(mask[y:y + h, x:x + w]), fill_in_color), fitted_panel) |
|
else: |
|
fitted_panel = image[y:y + h, x:x + w] |
|
|
|
returned_panels.append(fitted_panel) |
|
|
|
return returned_panels |
|
|
|
|
|
def preprocess_image(grayscale_image: np.ndarray) -> np.ndarray: |
|
""" |
|
Preprocesses the image for panel extraction |
|
""" |
|
processed_image = cv2.GaussianBlur(grayscale_image, (3, 3), 0) |
|
processed_image = cv2.Laplacian(processed_image, -1) |
|
return processed_image |
|
|
|
|
|
def preprocess_image_with_dilation(grayscale_image: np.ndarray) -> np.ndarray: |
|
""" |
|
Preprocesses the image for panel extraction |
|
""" |
|
processed_image = cv2.GaussianBlur(grayscale_image, (3, 3), 0) |
|
processed_image = cv2.Laplacian(processed_image, -1) |
|
processed_image = cv2.dilate(processed_image, np.ones((5, 5), np.uint8), iterations=1) |
|
processed_image = 255 - processed_image |
|
return processed_image |
|
|
|
|
|
def joint_panel_split_extraction(grayscale_image: np.ndarray, background_mask: np.ndarray) -> np.ndarray: |
|
""" |
|
Extracts the panels from the image with splitting the joint panels |
|
""" |
|
pixels_before = np.count_nonzero(background_mask) |
|
background_mask = cv2.ximgproc.thinning(background_mask) |
|
|
|
up_kernel = np.array([[0, 0, 0], [0, 1, 0], [0, 1, 0]], np.uint8) |
|
down_kernel = np.array([[0, 1, 0], [0, 1, 0], [0, 0, 0]], np.uint8) |
|
left_kernel = np.array([[0, 0, 0], [0, 1, 1], [0, 0, 0]], np.uint8) |
|
right_kernel = np.array([[0, 0, 0], [1, 1, 0], [0, 0, 0]], np.uint8) |
|
|
|
down_right_diagonal_kernel = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 0]], np.uint8) |
|
down_left_diagonal_kernel = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 0]], np.uint8) |
|
up_left_diagonal_kernel = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 1]], np.uint8) |
|
up_right_diagonal_kernel = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]], np.uint8) |
|
|
|
PAGE_TO_JOINT_OBJECT_RATIO = 3 |
|
image_height, image_width = grayscale_image.shape |
|
|
|
height_based_size = image_height // PAGE_TO_JOINT_OBJECT_RATIO |
|
width_based_size = (2 * image_width) // PAGE_TO_JOINT_OBJECT_RATIO |
|
|
|
height_based_size += height_based_size % 2 + 1 |
|
width_based_size += width_based_size % 2 + 1 |
|
|
|
up_dilation_kernel = np.zeros((height_based_size, height_based_size), np.uint8) |
|
up_dilation_kernel[height_based_size // 2:, height_based_size // 2] = 1 |
|
|
|
down_dilation_kernel = np.zeros((height_based_size, height_based_size), np.uint8) |
|
down_dilation_kernel[:height_based_size // 2 + 1, height_based_size // 2] = 1 |
|
|
|
left_dilation_kernel = np.zeros((width_based_size, width_based_size), np.uint8) |
|
left_dilation_kernel[width_based_size // 2, width_based_size // 2:] = 1 |
|
|
|
right_dilation_kernel = np.zeros((width_based_size, width_based_size), np.uint8) |
|
right_dilation_kernel[width_based_size // 2, :width_based_size // 2 + 1] = 1 |
|
|
|
min_based_size = min(width_based_size, height_based_size) |
|
|
|
down_right_dilation_kernel = np.identity(min_based_size // 2 + 1, dtype=np.uint8) |
|
down_right_dilation_kernel = np.pad(down_right_dilation_kernel, ((0, min_based_size // 2), (0, min_based_size // 2))) |
|
|
|
up_left_dilation_kernel = np.identity(min_based_size // 2 + 1, dtype=np.uint8) |
|
up_left_dilation_kernel = np.pad(up_left_dilation_kernel, ((min_based_size // 2, 0), (0, min_based_size // 2))) |
|
|
|
up_right_dilation_kernel = np.flip(np.identity(min_based_size // 2 + 1, dtype=np.uint8), axis=1) |
|
up_right_dilation_kernel = np.pad(up_right_dilation_kernel, ((min_based_size // 2, 0), (0, min_based_size // 2))) |
|
|
|
down_left_dilation_kernel = np.flip(np.identity(min_based_size // 2 + 1, dtype=np.uint8), axis=1) |
|
down_left_dilation_kernel = np.pad(down_left_dilation_kernel, ((0, min_based_size // 2), (min_based_size // 2, 0))) |
|
|
|
match_kernels = [ |
|
up_kernel, |
|
down_kernel, |
|
left_kernel, |
|
right_kernel, |
|
down_right_diagonal_kernel, |
|
down_left_diagonal_kernel, |
|
up_left_diagonal_kernel, |
|
up_right_diagonal_kernel, |
|
] |
|
|
|
dilation_kernels = [ |
|
up_dilation_kernel, |
|
down_dilation_kernel, |
|
left_dilation_kernel, |
|
right_dilation_kernel, |
|
down_right_dilation_kernel, |
|
down_left_dilation_kernel, |
|
up_left_dilation_kernel, |
|
up_right_dilation_kernel, |
|
] |
|
|
|
def get_dots(grayscale_image: np.ndarray, kernel: np.ndarray) -> tuple[np.ndarray, int]: |
|
temp = cv2.matchTemplate(grayscale_image, kernel, cv2.TM_CCOEFF_NORMED) |
|
_, temp = cv2.threshold(temp, 0.9, 1, cv2.THRESH_BINARY) |
|
temp = np.where(temp == 1, 255, 0).astype(np.uint8) |
|
pad_height = (kernel.shape[0] - 1) // 2 |
|
pad_width = (kernel.shape[1] - 1) // 2 |
|
temp = cv2.copyMakeBorder(temp, pad_height, kernel.shape[0] - pad_height - 1, pad_width, kernel.shape[1] - pad_width - 1, cv2.BORDER_CONSTANT, value=0) |
|
return temp |
|
|
|
for match_kernel, dilation_kernel in zip(match_kernels, dilation_kernels): |
|
dots = get_dots(background_mask, match_kernel) |
|
lines = cv2.dilate(dots, dilation_kernel, iterations=1) |
|
background_mask = cv2.bitwise_or(background_mask, lines) |
|
|
|
pixels_now = np.count_nonzero(background_mask) |
|
dilation_size = pixels_before // (4 * pixels_now) |
|
dilation_size += dilation_size % 2 + 1 |
|
background_mask = cv2.dilate(background_mask, np.ones((dilation_size, dilation_size), np.uint8), iterations=1) |
|
|
|
page_without_background = 255 - background_mask |
|
|
|
return page_without_background |
|
|
|
|
|
def is_contour_sufficiently_big(contour: np.ndarray, image_height: int, image_width: int) -> bool: |
|
PAGE_TO_PANEL_RATIO = 32 |
|
image_area = image_width * image_height |
|
area_threshold = image_area // PAGE_TO_PANEL_RATIO |
|
area = cv2.contourArea(contour) |
|
return area > area_threshold |
|
|
|
|
|
def threshold_extraction( |
|
image: np.ndarray, |
|
grayscale_image: np.ndarray, |
|
mode: str = OutputMode.BOUNDING, |
|
) -> list[np.ndarray]: |
|
""" |
|
Extracts panels from the image using thresholding |
|
""" |
|
processed_image = cv2.GaussianBlur(grayscale_image, (3, 3), 0) |
|
processed_image = cv2.Laplacian(processed_image, -1) |
|
_, thresh = cv2.threshold(processed_image, 8, 255, cv2.THRESH_BINARY) |
|
processed_image = apply_adaptive_threshold(processed_image) |
|
processed_image = cv2.subtract(processed_image, thresh) |
|
processed_image = cv2.dilate(processed_image, np.ones((3, 3), np.uint8), iterations=2) |
|
contours, _ = cv2.findContours(processed_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
contours = list(filter(lambda c: is_contour_sufficiently_big(c, image.shape[0], image.shape[1]), contours)) |
|
panels = extract_panels(image, contours, False, mode=mode) |
|
|
|
return panels |
|
|
|
|
|
def get_page_without_background(grayscale_image: np.ndarray, background_mask: np.ndarray, split_joint_panels = False) -> np.ndarray: |
|
""" |
|
Returns the page without the background |
|
""" |
|
STRIPE_FORMAT_MASK_AREA_RATIO = 0.3 |
|
|
|
mask_area = np.count_nonzero(background_mask) |
|
mask_area_ratio = mask_area / background_mask.size |
|
|
|
if STRIPE_FORMAT_MASK_AREA_RATIO > mask_area_ratio and split_joint_panels: |
|
page_without_background = joint_panel_split_extraction(grayscale_image, background_mask) |
|
else: |
|
page_without_background = cv2.subtract(grayscale_image, background_mask) |
|
|
|
return page_without_background |
|
|
|
|
|
def get_fallback_panels( |
|
image: np.ndarray, |
|
grayscale_image: np.ndarray, |
|
fallback: bool, |
|
panels: list[np.ndarray], |
|
mode: str = OutputMode.BOUNDING, |
|
) -> list[np.ndarray]: |
|
""" |
|
Checks if the fallback is needed and returns the appropriate panels |
|
|
|
Parameters: |
|
- mode: The mode to use for extraction |
|
- 'masked': Extracts the panels by cuting out only the inside of the contours |
|
- 'bounding': Extracts the panels by using the bounding boxes of the contours |
|
""" |
|
if fallback and len(panels) < 2: |
|
tmp = threshold_extraction(image, grayscale_image, mode=mode) |
|
if len(tmp) > len(panels): |
|
return tmp |
|
|
|
return panels |
|
|
|
|
|
def generate_panel_blocks( |
|
image: np.ndarray, |
|
background_generator: Callable[[np.ndarray], np.ndarray] = generate_background_mask, |
|
split_joint_panels: bool = False, |
|
fallback: bool = True, |
|
mode: str = OutputMode.BOUNDING, |
|
merge: str = MergeMode.NONE, |
|
rtl_order: bool = False |
|
) -> list[np.ndarray]: |
|
""" |
|
Generates the separate panel images from the base image |
|
|
|
Parameters: |
|
- mode: The mode to use for extraction |
|
- 'masked': Extracts the panels by cuting out only the inside of the contours |
|
- 'bounding': Extracts the panels by using the bounding boxes of the contours |
|
- rtl_order: If True, sort panels from right-to-left. Otherwise, left-to-right. |
|
""" |
|
|
|
grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
|
processed_image = preprocess_image_with_dilation(grayscale_image) |
|
background_mask = background_generator(processed_image) |
|
page_without_background = get_page_without_background(grayscale_image, background_mask, split_joint_panels) |
|
contours, _ = cv2.findContours(page_without_background, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
contours = list(filter(lambda c: is_contour_sufficiently_big(c, image.shape[0], image.shape[1]), contours)) |
|
|
|
|
|
|
|
if contours: |
|
image_height = image.shape[0] |
|
contours = sort_panels_by_column_then_row(contours, rtl_order) |
|
|
|
def get_panels(contours): |
|
panels = extract_panels(image, contours, mode=mode) |
|
panels = get_fallback_panels(image, grayscale_image, fallback, panels, mode=mode) |
|
return panels |
|
|
|
panels = [] |
|
if merge == MergeMode.NONE: |
|
panels = get_panels(contours) |
|
elif merge == MergeMode.HORIZONTAL: |
|
grouped_contours = group_contours_horizontally(contours) |
|
for group in grouped_contours: |
|
panels.append(adaptive_hconcat(get_panels(group))) |
|
elif merge == MergeMode.VERTICAL: |
|
grouped_contours = group_contours_vertically(contours) |
|
for group in grouped_contours: |
|
panels.append(adaptive_vconcat(get_panels(group))) |
|
|
|
return panels |
|
|
|
|
|
def generate_panel_blocks_by_ai( |
|
image: np.ndarray, |
|
merge: str = MergeMode.NONE, |
|
rtl_order: bool = False |
|
) -> list[np.ndarray]: |
|
""" |
|
Generates the separate panel images from the base image using AI with merge |
|
|
|
Parameters: |
|
- rtl_order: If True, sort panels from right-to-left. Otherwise, left-to-right. |
|
""" |
|
grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
|
processed_image = preprocess_image(grayscale_image) |
|
|
|
warnings.filterwarnings("ignore", category=FutureWarning) |
|
results = model(processed_image) |
|
warnings.filterwarnings("default", category=FutureWarning) |
|
|
|
bounding_boxes = [] |
|
for detection in results.xyxy[0]: |
|
x1, y1, x2, y2, conf, cls = detection.tolist() |
|
x1, y1, x2, y2 = map(int, [x1, y1, x2, y2]) |
|
bounding_boxes.append((x1, y1, x2 - x1, y2 - y1)) |
|
|
|
|
|
if bounding_boxes: |
|
image_height = image.shape[0] |
|
bounding_boxes = sort_panels_by_column_then_row(bounding_boxes, rtl_order) |
|
|
|
def get_panels(bounding_boxes): |
|
panels = [] |
|
for x, y, w, h in bounding_boxes: |
|
panel = image[y:y + h, x:x + w] |
|
panels.append(panel) |
|
return panels |
|
|
|
panels = [] |
|
if merge == MergeMode.NONE: |
|
panels = get_panels(bounding_boxes) |
|
elif merge == MergeMode.HORIZONTAL: |
|
grouped_bounding_boxes = group_bounding_boxes_horizontally(bounding_boxes) |
|
for group in grouped_bounding_boxes: |
|
panels.append(adaptive_hconcat(get_panels(group))) |
|
elif merge == MergeMode.VERTICAL: |
|
grouped_bounding_boxes = group_bounding_boxes_vertically(bounding_boxes) |
|
for group in grouped_bounding_boxes: |
|
panels.append(adaptive_vconcat(get_panels(group))) |
|
|
|
return panels |
|
|
|
|
|
def extract_panels_for_image( |
|
image_path: str, |
|
output_dir: str, |
|
fallback: bool = True, |
|
split_joint_panels: bool = False, |
|
mode: str = OutputMode.BOUNDING, |
|
merge: str = MergeMode.NONE |
|
) -> None: |
|
""" |
|
Extracts panels for a single image |
|
""" |
|
if not os.path.exists(image_path): |
|
return |
|
image_path = os.path.abspath(image_path) |
|
image = load_image(os.path.dirname(image_path), image_path) |
|
image_name, image_ext = os.path.splitext(image.image_name) |
|
panel_blocks = generate_panel_blocks(image.image, split_joint_panels=split_joint_panels, fallback=fallback, mode=mode, merge=merge) |
|
for k, panel in enumerate(tqdm(panel_blocks, total=len(panel_blocks))): |
|
out_path = os.path.join(output_dir, f"{image_name}_{k}{image_ext}") |
|
cv2.imwrite(out_path, panel) |
|
|
|
|
|
def extract_panels_for_images_in_folder( |
|
input_dir: str, |
|
output_dir: str, |
|
fallback: bool = True, |
|
split_joint_panels: bool = False, |
|
mode: str = OutputMode.BOUNDING, |
|
merge: str = MergeMode.NONE |
|
) -> tuple[int, int]: |
|
""" |
|
Basically the main function of the program, |
|
this is written with cli usage in mind |
|
""" |
|
if not os.path.exists(output_dir): |
|
return (0, 0) |
|
files = os.listdir(input_dir) |
|
num_files = len(files) |
|
num_panels = 0 |
|
for _, image in enumerate(tqdm(load_images(input_dir), total=num_files)): |
|
image_name, image_ext = os.path.splitext(image.image_name) |
|
panel_blocks = generate_panel_blocks(image.image, fallback=fallback, split_joint_panels=split_joint_panels, mode=mode, merge=merge) |
|
for j, panel in enumerate(panel_blocks): |
|
out_path = os.path.join(output_dir, f"{image_name}_{j}{image_ext}") |
|
cv2.imwrite(out_path, panel) |
|
num_panels += len(panel_blocks) |
|
return (num_files, num_panels) |
|
|
|
|
|
def extract_panels_for_images_in_folder_by_ai( |
|
input_dir: str, |
|
output_dir: str |
|
) -> tuple[int, int]: |
|
""" |
|
Basically the main function of the program, |
|
this is written with cli usage in mind |
|
""" |
|
if not os.path.exists(output_dir): |
|
return (0, 0) |
|
files = os.listdir(input_dir) |
|
num_files = len(files) |
|
num_panels = 0 |
|
for _, image in enumerate(tqdm(load_images(input_dir), total=num_files)): |
|
image_name, image_ext = os.path.splitext(image.image_name) |
|
panel_blocks = generate_panel_blocks_by_ai(image.image) |
|
for j, panel in enumerate(panel_blocks): |
|
out_path = os.path.join(output_dir, f"{image_name}_{j}{image_ext}") |
|
cv2.imwrite(out_path, panel) |
|
num_panels += len(panel_blocks) |
|
return (num_files, num_panels) |
|
|