avans06's picture
refactor: rename package from image_panel_border_cleaner to manga_panel_processor
5d975b7
import os
from typing import Callable
import cv2
import warnings
import numpy as np
from image_processing.image import is_contour_rectangular, apply_adaptive_threshold, group_contours_horizontally, group_contours_vertically, adaptive_hconcat, adaptive_vconcat, group_bounding_boxes_horizontally, group_bounding_boxes_vertically
from myutils.myutils import load_images, load_image
from tqdm import tqdm
from image_processing.model import model
from manga_panel_processor import sort_panels_by_column_then_row
class OutputMode:
BOUNDING = 'bounding'
MASKED = 'masked'
def from_index(index: int) -> str:
return [OutputMode.BOUNDING, OutputMode.MASKED][index]
class MergeMode:
NONE = 'none'
VERTICAL = 'vertical'
HORIZONTAL = 'horizontal'
def from_index(index: int) -> str:
return [MergeMode.NONE, MergeMode.VERTICAL, MergeMode.HORIZONTAL][index]
def get_background_intensity_range(grayscale_image: np.ndarray, min_range: int = 1) -> tuple[int, int]:
"""
Returns the minimum and maximum intensity values of the background of the image
"""
edges = [grayscale_image[-1, :], grayscale_image[0, :], grayscale_image[:, 0], grayscale_image[:, -1]]
sorted_edges = sorted(edges, key=lambda x: np.var(x))
least_varied_edge = sorted_edges[0]
max_intensity = max(least_varied_edge)
min_intensity = max(min(min(least_varied_edge), max_intensity - min_range), 0)
return min_intensity, max_intensity
def generate_background_mask(grayscale_image: np.ndarray) -> np.ndarray:
"""
Generates a mask by focusing on the largest area of white pixels
"""
WHITE = 255
LESS_WHITE, _ = get_background_intensity_range(grayscale_image, 25)
LESS_WHITE = max(LESS_WHITE, 240)
ret, thresh = cv2.threshold(grayscale_image, LESS_WHITE, WHITE, cv2.THRESH_BINARY)
nlabels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh)
mask = np.zeros_like(thresh)
PAGE_TO_SEGMENT_RATIO = 1024
halting_area_size = mask.size // PAGE_TO_SEGMENT_RATIO
mask_height, mask_width = mask.shape
base_background_size_error_threshold = 0.05
whole_background_min_width = mask_width * (1 - base_background_size_error_threshold)
whole_background_min_height = mask_height * (1 - base_background_size_error_threshold)
for i in np.argsort(stats[1:, 4])[::-1]:
contour_index = i + 1
x, y, w, h, area = stats[contour_index]
if area < halting_area_size:
break
if (
(w > whole_background_min_width) or
(h > whole_background_min_height) or
(is_contour_rectangular(cv2.findContours((labels == contour_index).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0][0]))
):
mask[labels == contour_index] = WHITE
mask = cv2.dilate(mask, np.ones((3, 3), np.uint8), iterations=2)
return mask
def extract_panels(
image: np.ndarray,
panel_contours: list[np.ndarray],
accept_page_as_panel: bool = True,
mode: str = OutputMode.BOUNDING,
fill_in_color: tuple[int, int, int] = (0, 0, 0),
) -> list[np.ndarray]:
"""
Extracts panels from the image using the given contours corresponding to the panels
Parameters:
- image: The image to extract the panels from
- panel_contours: The contours corresponding to the panels
- accept_page_as_panel: Whether to accept the whole page as a panel
- mode: The mode to use for extraction
- 'masked': Extracts the panels by cuting out only the inside of the contours
- 'bounding': Extracts the panels by using the bounding boxes of the contours
- fill_in_color: The color to fill in the background of the panel images
"""
height, width = image.shape[:2]
returned_panels = []
for contour in panel_contours:
x, y, w, h = cv2.boundingRect(contour)
if not accept_page_as_panel and ((w >= width * 0.99) or (h >= height * 0.99)):
continue
if mode == 'masked':
mask = np.zeros_like(image)
cv2.drawContours(mask, [contour], -1, (255, 255, 255), -1)
masked_image = cv2.bitwise_and(image, mask)
fitted_panel = masked_image[y:y + h, x:x + w]
fitted_panel = cv2.bitwise_or(cv2.bitwise_and(cv2.bitwise_not(mask[y:y + h, x:x + w]), fill_in_color), fitted_panel)
else:
fitted_panel = image[y:y + h, x:x + w]
returned_panels.append(fitted_panel)
return returned_panels
def preprocess_image(grayscale_image: np.ndarray) -> np.ndarray:
"""
Preprocesses the image for panel extraction
"""
processed_image = cv2.GaussianBlur(grayscale_image, (3, 3), 0)
processed_image = cv2.Laplacian(processed_image, -1)
return processed_image
def preprocess_image_with_dilation(grayscale_image: np.ndarray) -> np.ndarray:
"""
Preprocesses the image for panel extraction
"""
processed_image = cv2.GaussianBlur(grayscale_image, (3, 3), 0)
processed_image = cv2.Laplacian(processed_image, -1)
processed_image = cv2.dilate(processed_image, np.ones((5, 5), np.uint8), iterations=1)
processed_image = 255 - processed_image
return processed_image
def joint_panel_split_extraction(grayscale_image: np.ndarray, background_mask: np.ndarray) -> np.ndarray:
"""
Extracts the panels from the image with splitting the joint panels
"""
pixels_before = np.count_nonzero(background_mask)
background_mask = cv2.ximgproc.thinning(background_mask)
up_kernel = np.array([[0, 0, 0], [0, 1, 0], [0, 1, 0]], np.uint8)
down_kernel = np.array([[0, 1, 0], [0, 1, 0], [0, 0, 0]], np.uint8)
left_kernel = np.array([[0, 0, 0], [0, 1, 1], [0, 0, 0]], np.uint8)
right_kernel = np.array([[0, 0, 0], [1, 1, 0], [0, 0, 0]], np.uint8)
down_right_diagonal_kernel = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 0]], np.uint8)
down_left_diagonal_kernel = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 0]], np.uint8)
up_left_diagonal_kernel = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 1]], np.uint8)
up_right_diagonal_kernel = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]], np.uint8)
PAGE_TO_JOINT_OBJECT_RATIO = 3
image_height, image_width = grayscale_image.shape
height_based_size = image_height // PAGE_TO_JOINT_OBJECT_RATIO
width_based_size = (2 * image_width) // PAGE_TO_JOINT_OBJECT_RATIO
height_based_size += height_based_size % 2 + 1
width_based_size += width_based_size % 2 + 1
up_dilation_kernel = np.zeros((height_based_size, height_based_size), np.uint8)
up_dilation_kernel[height_based_size // 2:, height_based_size // 2] = 1
down_dilation_kernel = np.zeros((height_based_size, height_based_size), np.uint8)
down_dilation_kernel[:height_based_size // 2 + 1, height_based_size // 2] = 1
left_dilation_kernel = np.zeros((width_based_size, width_based_size), np.uint8)
left_dilation_kernel[width_based_size // 2, width_based_size // 2:] = 1
right_dilation_kernel = np.zeros((width_based_size, width_based_size), np.uint8)
right_dilation_kernel[width_based_size // 2, :width_based_size // 2 + 1] = 1
min_based_size = min(width_based_size, height_based_size)
down_right_dilation_kernel = np.identity(min_based_size // 2 + 1, dtype=np.uint8)
down_right_dilation_kernel = np.pad(down_right_dilation_kernel, ((0, min_based_size // 2), (0, min_based_size // 2)))
up_left_dilation_kernel = np.identity(min_based_size // 2 + 1, dtype=np.uint8)
up_left_dilation_kernel = np.pad(up_left_dilation_kernel, ((min_based_size // 2, 0), (0, min_based_size // 2)))
up_right_dilation_kernel = np.flip(np.identity(min_based_size // 2 + 1, dtype=np.uint8), axis=1)
up_right_dilation_kernel = np.pad(up_right_dilation_kernel, ((min_based_size // 2, 0), (0, min_based_size // 2)))
down_left_dilation_kernel = np.flip(np.identity(min_based_size // 2 + 1, dtype=np.uint8), axis=1)
down_left_dilation_kernel = np.pad(down_left_dilation_kernel, ((0, min_based_size // 2), (min_based_size // 2, 0)))
match_kernels = [
up_kernel,
down_kernel,
left_kernel,
right_kernel,
down_right_diagonal_kernel,
down_left_diagonal_kernel,
up_left_diagonal_kernel,
up_right_diagonal_kernel,
]
dilation_kernels = [
up_dilation_kernel,
down_dilation_kernel,
left_dilation_kernel,
right_dilation_kernel,
down_right_dilation_kernel,
down_left_dilation_kernel,
up_left_dilation_kernel,
up_right_dilation_kernel,
]
def get_dots(grayscale_image: np.ndarray, kernel: np.ndarray) -> tuple[np.ndarray, int]:
temp = cv2.matchTemplate(grayscale_image, kernel, cv2.TM_CCOEFF_NORMED)
_, temp = cv2.threshold(temp, 0.9, 1, cv2.THRESH_BINARY)
temp = np.where(temp == 1, 255, 0).astype(np.uint8)
pad_height = (kernel.shape[0] - 1) // 2
pad_width = (kernel.shape[1] - 1) // 2
temp = cv2.copyMakeBorder(temp, pad_height, kernel.shape[0] - pad_height - 1, pad_width, kernel.shape[1] - pad_width - 1, cv2.BORDER_CONSTANT, value=0)
return temp
for match_kernel, dilation_kernel in zip(match_kernels, dilation_kernels):
dots = get_dots(background_mask, match_kernel)
lines = cv2.dilate(dots, dilation_kernel, iterations=1)
background_mask = cv2.bitwise_or(background_mask, lines)
pixels_now = np.count_nonzero(background_mask)
dilation_size = pixels_before // (4 * pixels_now)
dilation_size += dilation_size % 2 + 1
background_mask = cv2.dilate(background_mask, np.ones((dilation_size, dilation_size), np.uint8), iterations=1)
page_without_background = 255 - background_mask
return page_without_background
def is_contour_sufficiently_big(contour: np.ndarray, image_height: int, image_width: int) -> bool:
PAGE_TO_PANEL_RATIO = 32
image_area = image_width * image_height
area_threshold = image_area // PAGE_TO_PANEL_RATIO
area = cv2.contourArea(contour)
return area > area_threshold
def threshold_extraction(
image: np.ndarray,
grayscale_image: np.ndarray,
mode: str = OutputMode.BOUNDING,
) -> list[np.ndarray]:
"""
Extracts panels from the image using thresholding
"""
processed_image = cv2.GaussianBlur(grayscale_image, (3, 3), 0)
processed_image = cv2.Laplacian(processed_image, -1)
_, thresh = cv2.threshold(processed_image, 8, 255, cv2.THRESH_BINARY)
processed_image = apply_adaptive_threshold(processed_image)
processed_image = cv2.subtract(processed_image, thresh)
processed_image = cv2.dilate(processed_image, np.ones((3, 3), np.uint8), iterations=2)
contours, _ = cv2.findContours(processed_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = list(filter(lambda c: is_contour_sufficiently_big(c, image.shape[0], image.shape[1]), contours))
panels = extract_panels(image, contours, False, mode=mode)
return panels
def get_page_without_background(grayscale_image: np.ndarray, background_mask: np.ndarray, split_joint_panels = False) -> np.ndarray:
"""
Returns the page without the background
"""
STRIPE_FORMAT_MASK_AREA_RATIO = 0.3
mask_area = np.count_nonzero(background_mask)
mask_area_ratio = mask_area / background_mask.size
if STRIPE_FORMAT_MASK_AREA_RATIO > mask_area_ratio and split_joint_panels:
page_without_background = joint_panel_split_extraction(grayscale_image, background_mask)
else:
page_without_background = cv2.subtract(grayscale_image, background_mask)
return page_without_background
def get_fallback_panels(
image: np.ndarray,
grayscale_image: np.ndarray,
fallback: bool,
panels: list[np.ndarray],
mode: str = OutputMode.BOUNDING,
) -> list[np.ndarray]:
"""
Checks if the fallback is needed and returns the appropriate panels
Parameters:
- mode: The mode to use for extraction
- 'masked': Extracts the panels by cuting out only the inside of the contours
- 'bounding': Extracts the panels by using the bounding boxes of the contours
"""
if fallback and len(panels) < 2:
tmp = threshold_extraction(image, grayscale_image, mode=mode)
if len(tmp) > len(panels):
return tmp
return panels
def generate_panel_blocks(
image: np.ndarray,
background_generator: Callable[[np.ndarray], np.ndarray] = generate_background_mask,
split_joint_panels: bool = False,
fallback: bool = True,
mode: str = OutputMode.BOUNDING,
merge: str = MergeMode.NONE,
rtl_order: bool = False
) -> list[np.ndarray]:
"""
Generates the separate panel images from the base image
Parameters:
- mode: The mode to use for extraction
- 'masked': Extracts the panels by cuting out only the inside of the contours
- 'bounding': Extracts the panels by using the bounding boxes of the contours
- rtl_order: If True, sort panels from right-to-left. Otherwise, left-to-right.
"""
grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
processed_image = preprocess_image_with_dilation(grayscale_image)
background_mask = background_generator(processed_image)
page_without_background = get_page_without_background(grayscale_image, background_mask, split_joint_panels)
contours, _ = cv2.findContours(page_without_background, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = list(filter(lambda c: is_contour_sufficiently_big(c, image.shape[0], image.shape[1]), contours))
# Sort by top-to-bottom (y-coordinate) first, then by horizontal order.
# For RTL, we sort by x-coordinate in descending order (by negating it).
if contours:
image_height = image.shape[0]
contours = sort_panels_by_column_then_row(contours, rtl_order)
def get_panels(contours):
panels = extract_panels(image, contours, mode=mode)
panels = get_fallback_panels(image, grayscale_image, fallback, panels, mode=mode)
return panels
panels = []
if merge == MergeMode.NONE:
panels = get_panels(contours)
elif merge == MergeMode.HORIZONTAL:
grouped_contours = group_contours_horizontally(contours)
for group in grouped_contours:
panels.append(adaptive_hconcat(get_panels(group)))
elif merge == MergeMode.VERTICAL:
grouped_contours = group_contours_vertically(contours)
for group in grouped_contours:
panels.append(adaptive_vconcat(get_panels(group)))
return panels
def generate_panel_blocks_by_ai(
image: np.ndarray,
merge: str = MergeMode.NONE,
rtl_order: bool = False
) -> list[np.ndarray]:
"""
Generates the separate panel images from the base image using AI with merge
Parameters:
- rtl_order: If True, sort panels from right-to-left. Otherwise, left-to-right.
"""
grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
processed_image = preprocess_image(grayscale_image)
warnings.filterwarnings("ignore", category=FutureWarning) # Ignore 'FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.'
results = model(processed_image)
warnings.filterwarnings("default", category=FutureWarning)
bounding_boxes = []
for detection in results.xyxy[0]: # Access predictions in (x1, y1, x2, y2, confidence, class) format
x1, y1, x2, y2, conf, cls = detection.tolist() # Convert to Python list
x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
bounding_boxes.append((x1, y1, x2 - x1, y2 - y1))
# Bounding boxes are already (x, y, w, h), so we access coordinates directly.
if bounding_boxes:
image_height = image.shape[0]
bounding_boxes = sort_panels_by_column_then_row(bounding_boxes, rtl_order)
def get_panels(bounding_boxes):
panels = []
for x, y, w, h in bounding_boxes:
panel = image[y:y + h, x:x + w]
panels.append(panel)
return panels
panels = []
if merge == MergeMode.NONE:
panels = get_panels(bounding_boxes)
elif merge == MergeMode.HORIZONTAL:
grouped_bounding_boxes = group_bounding_boxes_horizontally(bounding_boxes)
for group in grouped_bounding_boxes:
panels.append(adaptive_hconcat(get_panels(group)))
elif merge == MergeMode.VERTICAL:
grouped_bounding_boxes = group_bounding_boxes_vertically(bounding_boxes)
for group in grouped_bounding_boxes:
panels.append(adaptive_vconcat(get_panels(group)))
return panels
def extract_panels_for_image(
image_path: str,
output_dir: str,
fallback: bool = True,
split_joint_panels: bool = False,
mode: str = OutputMode.BOUNDING,
merge: str = MergeMode.NONE
) -> None:
"""
Extracts panels for a single image
"""
if not os.path.exists(image_path):
return
image_path = os.path.abspath(image_path)
image = load_image(os.path.dirname(image_path), image_path)
image_name, image_ext = os.path.splitext(image.image_name)
panel_blocks = generate_panel_blocks(image.image, split_joint_panels=split_joint_panels, fallback=fallback, mode=mode, merge=merge)
for k, panel in enumerate(tqdm(panel_blocks, total=len(panel_blocks))):
out_path = os.path.join(output_dir, f"{image_name}_{k}{image_ext}")
cv2.imwrite(out_path, panel)
def extract_panels_for_images_in_folder(
input_dir: str,
output_dir: str,
fallback: bool = True,
split_joint_panels: bool = False,
mode: str = OutputMode.BOUNDING,
merge: str = MergeMode.NONE
) -> tuple[int, int]:
"""
Basically the main function of the program,
this is written with cli usage in mind
"""
if not os.path.exists(output_dir):
return (0, 0)
files = os.listdir(input_dir)
num_files = len(files)
num_panels = 0
for _, image in enumerate(tqdm(load_images(input_dir), total=num_files)):
image_name, image_ext = os.path.splitext(image.image_name)
panel_blocks = generate_panel_blocks(image.image, fallback=fallback, split_joint_panels=split_joint_panels, mode=mode, merge=merge)
for j, panel in enumerate(panel_blocks):
out_path = os.path.join(output_dir, f"{image_name}_{j}{image_ext}")
cv2.imwrite(out_path, panel)
num_panels += len(panel_blocks)
return (num_files, num_panels)
def extract_panels_for_images_in_folder_by_ai(
input_dir: str,
output_dir: str
) -> tuple[int, int]:
"""
Basically the main function of the program,
this is written with cli usage in mind
"""
if not os.path.exists(output_dir):
return (0, 0)
files = os.listdir(input_dir)
num_files = len(files)
num_panels = 0
for _, image in enumerate(tqdm(load_images(input_dir), total=num_files)):
image_name, image_ext = os.path.splitext(image.image_name)
panel_blocks = generate_panel_blocks_by_ai(image.image)
for j, panel in enumerate(panel_blocks):
out_path = os.path.join(output_dir, f"{image_name}_{j}{image_ext}")
cv2.imwrite(out_path, panel)
num_panels += len(panel_blocks)
return (num_files, num_panels)