Spaces:

avans06
/

AdenzuMangaPanelExtractor

Running

App Files Files Community

AdenzuMangaPanelExtractor / image_processing /panel.py

avans06

refactor: rename package from image_panel_border_cleaner to manga_panel_processor

5d975b7 11 days ago

raw

history blame contribute delete

19.6 kB

	import os
	from typing import Callable
	import cv2
	import warnings
	import numpy as np
	from image_processing.image import is_contour_rectangular, apply_adaptive_threshold, group_contours_horizontally, group_contours_vertically, adaptive_hconcat, adaptive_vconcat, group_bounding_boxes_horizontally, group_bounding_boxes_vertically
	from myutils.myutils import load_images, load_image
	from tqdm import tqdm
	from image_processing.model import model
	from manga_panel_processor import sort_panels_by_column_then_row

	class OutputMode:
	BOUNDING = 'bounding'
	MASKED = 'masked'

	def from_index(index: int) -> str:
	return [OutputMode.BOUNDING, OutputMode.MASKED][index]


	class MergeMode:
	NONE = 'none'
	VERTICAL = 'vertical'
	HORIZONTAL = 'horizontal'

	def from_index(index: int) -> str:
	return [MergeMode.NONE, MergeMode.VERTICAL, MergeMode.HORIZONTAL][index]


	def get_background_intensity_range(grayscale_image: np.ndarray, min_range: int = 1) -> tuple[int, int]:
	"""
	Returns the minimum and maximum intensity values of the background of the image
	"""
	edges = [grayscale_image[-1, :], grayscale_image[0, :], grayscale_image[:, 0], grayscale_image[:, -1]]
	sorted_edges = sorted(edges, key=lambda x: np.var(x))

	least_varied_edge = sorted_edges[0]

	max_intensity = max(least_varied_edge)
	min_intensity = max(min(min(least_varied_edge), max_intensity - min_range), 0)

	return min_intensity, max_intensity


	def generate_background_mask(grayscale_image: np.ndarray) -> np.ndarray:
	"""
	Generates a mask by focusing on the largest area of white pixels
	"""
	WHITE = 255
	LESS_WHITE, _ = get_background_intensity_range(grayscale_image, 25)
	LESS_WHITE = max(LESS_WHITE, 240)

	ret, thresh = cv2.threshold(grayscale_image, LESS_WHITE, WHITE, cv2.THRESH_BINARY)
	nlabels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh)

	mask = np.zeros_like(thresh)

	PAGE_TO_SEGMENT_RATIO = 1024

	halting_area_size = mask.size // PAGE_TO_SEGMENT_RATIO

	mask_height, mask_width = mask.shape
	base_background_size_error_threshold = 0.05
	whole_background_min_width = mask_width * (1 - base_background_size_error_threshold)
	whole_background_min_height = mask_height * (1 - base_background_size_error_threshold)

	for i in np.argsort(stats[1:, 4])[::-1]:
	contour_index = i + 1
	x, y, w, h, area = stats[contour_index]
	if area < halting_area_size:
	break
	if (
	(w > whole_background_min_width) or
	(h > whole_background_min_height) or
	(is_contour_rectangular(cv2.findContours((labels == contour_index).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0][0]))
	):
	mask[labels == contour_index] = WHITE

	mask = cv2.dilate(mask, np.ones((3, 3), np.uint8), iterations=2)

	return mask


	def extract_panels(
	image: np.ndarray,
	panel_contours: list[np.ndarray],
	accept_page_as_panel: bool = True,
	mode: str = OutputMode.BOUNDING,
	fill_in_color: tuple[int, int, int] = (0, 0, 0),
	) -> list[np.ndarray]:
	"""
	Extracts panels from the image using the given contours corresponding to the panels

	Parameters:
	- image: The image to extract the panels from
	- panel_contours: The contours corresponding to the panels
	- accept_page_as_panel: Whether to accept the whole page as a panel
	- mode: The mode to use for extraction
	- 'masked': Extracts the panels by cuting out only the inside of the contours
	- 'bounding': Extracts the panels by using the bounding boxes of the contours
	- fill_in_color: The color to fill in the background of the panel images
	"""
	height, width = image.shape[:2]

	returned_panels = []

	for contour in panel_contours:
	x, y, w, h = cv2.boundingRect(contour)

	if not accept_page_as_panel and ((w >= width * 0.99) or (h >= height * 0.99)):
	continue

	if mode == 'masked':
	mask = np.zeros_like(image)
	cv2.drawContours(mask, [contour], -1, (255, 255, 255), -1)
	masked_image = cv2.bitwise_and(image, mask)
	fitted_panel = masked_image[y:y + h, x:x + w]
	fitted_panel = cv2.bitwise_or(cv2.bitwise_and(cv2.bitwise_not(mask[y:y + h, x:x + w]), fill_in_color), fitted_panel)
	else:
	fitted_panel = image[y:y + h, x:x + w]

	returned_panels.append(fitted_panel)

	return returned_panels


	def preprocess_image(grayscale_image: np.ndarray) -> np.ndarray:
	"""
	Preprocesses the image for panel extraction
	"""
	processed_image = cv2.GaussianBlur(grayscale_image, (3, 3), 0)
	processed_image = cv2.Laplacian(processed_image, -1)
	return processed_image


	def preprocess_image_with_dilation(grayscale_image: np.ndarray) -> np.ndarray:
	"""
	Preprocesses the image for panel extraction
	"""
	processed_image = cv2.GaussianBlur(grayscale_image, (3, 3), 0)
	processed_image = cv2.Laplacian(processed_image, -1)
	processed_image = cv2.dilate(processed_image, np.ones((5, 5), np.uint8), iterations=1)
	processed_image = 255 - processed_image
	return processed_image


	def joint_panel_split_extraction(grayscale_image: np.ndarray, background_mask: np.ndarray) -> np.ndarray:
	"""
	Extracts the panels from the image with splitting the joint panels
	"""
	pixels_before = np.count_nonzero(background_mask)
	background_mask = cv2.ximgproc.thinning(background_mask)

	up_kernel = np.array([[0, 0, 0], [0, 1, 0], [0, 1, 0]], np.uint8)
	down_kernel = np.array([[0, 1, 0], [0, 1, 0], [0, 0, 0]], np.uint8)
	left_kernel = np.array([[0, 0, 0], [0, 1, 1], [0, 0, 0]], np.uint8)
	right_kernel = np.array([[0, 0, 0], [1, 1, 0], [0, 0, 0]], np.uint8)

	down_right_diagonal_kernel = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 0]], np.uint8)
	down_left_diagonal_kernel = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 0]], np.uint8)
	up_left_diagonal_kernel = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 1]], np.uint8)
	up_right_diagonal_kernel = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]], np.uint8)

	PAGE_TO_JOINT_OBJECT_RATIO = 3
	image_height, image_width = grayscale_image.shape

	height_based_size = image_height // PAGE_TO_JOINT_OBJECT_RATIO
	width_based_size = (2 * image_width) // PAGE_TO_JOINT_OBJECT_RATIO

	height_based_size += height_based_size % 2 + 1
	width_based_size += width_based_size % 2 + 1

	up_dilation_kernel = np.zeros((height_based_size, height_based_size), np.uint8)
	up_dilation_kernel[height_based_size // 2:, height_based_size // 2] = 1

	down_dilation_kernel = np.zeros((height_based_size, height_based_size), np.uint8)
	down_dilation_kernel[:height_based_size // 2 + 1, height_based_size // 2] = 1

	left_dilation_kernel = np.zeros((width_based_size, width_based_size), np.uint8)
	left_dilation_kernel[width_based_size // 2, width_based_size // 2:] = 1

	right_dilation_kernel = np.zeros((width_based_size, width_based_size), np.uint8)
	right_dilation_kernel[width_based_size // 2, :width_based_size // 2 + 1] = 1

	min_based_size = min(width_based_size, height_based_size)

	down_right_dilation_kernel = np.identity(min_based_size // 2 + 1, dtype=np.uint8)
	down_right_dilation_kernel = np.pad(down_right_dilation_kernel, ((0, min_based_size // 2), (0, min_based_size // 2)))

	up_left_dilation_kernel = np.identity(min_based_size // 2 + 1, dtype=np.uint8)
	up_left_dilation_kernel = np.pad(up_left_dilation_kernel, ((min_based_size // 2, 0), (0, min_based_size // 2)))

	up_right_dilation_kernel = np.flip(np.identity(min_based_size // 2 + 1, dtype=np.uint8), axis=1)
	up_right_dilation_kernel = np.pad(up_right_dilation_kernel, ((min_based_size // 2, 0), (0, min_based_size // 2)))

	down_left_dilation_kernel = np.flip(np.identity(min_based_size // 2 + 1, dtype=np.uint8), axis=1)
	down_left_dilation_kernel = np.pad(down_left_dilation_kernel, ((0, min_based_size // 2), (min_based_size // 2, 0)))

	match_kernels = [
	up_kernel,
	down_kernel,
	left_kernel,
	right_kernel,
	down_right_diagonal_kernel,
	down_left_diagonal_kernel,
	up_left_diagonal_kernel,
	up_right_diagonal_kernel,
	]

	dilation_kernels = [
	up_dilation_kernel,
	down_dilation_kernel,
	left_dilation_kernel,
	right_dilation_kernel,
	down_right_dilation_kernel,
	down_left_dilation_kernel,
	up_left_dilation_kernel,
	up_right_dilation_kernel,
	]

	def get_dots(grayscale_image: np.ndarray, kernel: np.ndarray) -> tuple[np.ndarray, int]:
	temp = cv2.matchTemplate(grayscale_image, kernel, cv2.TM_CCOEFF_NORMED)
	_, temp = cv2.threshold(temp, 0.9, 1, cv2.THRESH_BINARY)
	temp = np.where(temp == 1, 255, 0).astype(np.uint8)
	pad_height = (kernel.shape[0] - 1) // 2
	pad_width = (kernel.shape[1] - 1) // 2
	temp = cv2.copyMakeBorder(temp, pad_height, kernel.shape[0] - pad_height - 1, pad_width, kernel.shape[1] - pad_width - 1, cv2.BORDER_CONSTANT, value=0)
	return temp

	for match_kernel, dilation_kernel in zip(match_kernels, dilation_kernels):
	dots = get_dots(background_mask, match_kernel)
	lines = cv2.dilate(dots, dilation_kernel, iterations=1)
	background_mask = cv2.bitwise_or(background_mask, lines)

	pixels_now = np.count_nonzero(background_mask)
	dilation_size = pixels_before // (4 * pixels_now)
	dilation_size += dilation_size % 2 + 1
	background_mask = cv2.dilate(background_mask, np.ones((dilation_size, dilation_size), np.uint8), iterations=1)

	page_without_background = 255 - background_mask

	return page_without_background


	def is_contour_sufficiently_big(contour: np.ndarray, image_height: int, image_width: int) -> bool:
	PAGE_TO_PANEL_RATIO = 32
	image_area = image_width * image_height
	area_threshold = image_area // PAGE_TO_PANEL_RATIO
	area = cv2.contourArea(contour)
	return area > area_threshold


	def threshold_extraction(
	image: np.ndarray,
	grayscale_image: np.ndarray,
	mode: str = OutputMode.BOUNDING,
	) -> list[np.ndarray]:
	"""
	Extracts panels from the image using thresholding
	"""
	processed_image = cv2.GaussianBlur(grayscale_image, (3, 3), 0)
	processed_image = cv2.Laplacian(processed_image, -1)
	_, thresh = cv2.threshold(processed_image, 8, 255, cv2.THRESH_BINARY)
	processed_image = apply_adaptive_threshold(processed_image)
	processed_image = cv2.subtract(processed_image, thresh)
	processed_image = cv2.dilate(processed_image, np.ones((3, 3), np.uint8), iterations=2)
	contours, _ = cv2.findContours(processed_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	contours = list(filter(lambda c: is_contour_sufficiently_big(c, image.shape[0], image.shape[1]), contours))
	panels = extract_panels(image, contours, False, mode=mode)

	return panels


	def get_page_without_background(grayscale_image: np.ndarray, background_mask: np.ndarray, split_joint_panels = False) -> np.ndarray:
	"""
	Returns the page without the background
	"""
	STRIPE_FORMAT_MASK_AREA_RATIO = 0.3

	mask_area = np.count_nonzero(background_mask)
	mask_area_ratio = mask_area / background_mask.size

	if STRIPE_FORMAT_MASK_AREA_RATIO > mask_area_ratio and split_joint_panels:
	page_without_background = joint_panel_split_extraction(grayscale_image, background_mask)
	else:
	page_without_background = cv2.subtract(grayscale_image, background_mask)

	return page_without_background


	def get_fallback_panels(
	image: np.ndarray,
	grayscale_image: np.ndarray,
	fallback: bool,
	panels: list[np.ndarray],
	mode: str = OutputMode.BOUNDING,
	) -> list[np.ndarray]:
	"""
	Checks if the fallback is needed and returns the appropriate panels

	Parameters:
	- mode: The mode to use for extraction
	- 'masked': Extracts the panels by cuting out only the inside of the contours
	- 'bounding': Extracts the panels by using the bounding boxes of the contours
	"""
	if fallback and len(panels) < 2:
	tmp = threshold_extraction(image, grayscale_image, mode=mode)
	if len(tmp) > len(panels):
	return tmp

	return panels


	def generate_panel_blocks(
	image: np.ndarray,
	background_generator: Callable[[np.ndarray], np.ndarray] = generate_background_mask,
	split_joint_panels: bool = False,
	fallback: bool = True,
	mode: str = OutputMode.BOUNDING,
	merge: str = MergeMode.NONE,
	rtl_order: bool = False
	) -> list[np.ndarray]:
	"""
	Generates the separate panel images from the base image

	Parameters:
	- mode: The mode to use for extraction
	- 'masked': Extracts the panels by cuting out only the inside of the contours
	- 'bounding': Extracts the panels by using the bounding boxes of the contours
	- rtl_order: If True, sort panels from right-to-left. Otherwise, left-to-right.
	"""

	grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	processed_image = preprocess_image_with_dilation(grayscale_image)
	background_mask = background_generator(processed_image)
	page_without_background = get_page_without_background(grayscale_image, background_mask, split_joint_panels)
	contours, _ = cv2.findContours(page_without_background, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	contours = list(filter(lambda c: is_contour_sufficiently_big(c, image.shape[0], image.shape[1]), contours))

	# Sort by top-to-bottom (y-coordinate) first, then by horizontal order.
	# For RTL, we sort by x-coordinate in descending order (by negating it).
	if contours:
	image_height = image.shape[0]
	contours = sort_panels_by_column_then_row(contours, rtl_order)

	def get_panels(contours):
	panels = extract_panels(image, contours, mode=mode)
	panels = get_fallback_panels(image, grayscale_image, fallback, panels, mode=mode)
	return panels

	panels = []
	if merge == MergeMode.NONE:
	panels = get_panels(contours)
	elif merge == MergeMode.HORIZONTAL:
	grouped_contours = group_contours_horizontally(contours)
	for group in grouped_contours:
	panels.append(adaptive_hconcat(get_panels(group)))
	elif merge == MergeMode.VERTICAL:
	grouped_contours = group_contours_vertically(contours)
	for group in grouped_contours:
	panels.append(adaptive_vconcat(get_panels(group)))

	return panels


	def generate_panel_blocks_by_ai(
	image: np.ndarray,
	merge: str = MergeMode.NONE,
	rtl_order: bool = False
	) -> list[np.ndarray]:
	"""
	Generates the separate panel images from the base image using AI with merge

	Parameters:
	- rtl_order: If True, sort panels from right-to-left. Otherwise, left-to-right.
	"""
	grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	processed_image = preprocess_image(grayscale_image)

	warnings.filterwarnings("ignore", category=FutureWarning) # Ignore 'FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.'
	results = model(processed_image)
	warnings.filterwarnings("default", category=FutureWarning)

	bounding_boxes = []
	for detection in results.xyxy[0]: # Access predictions in (x1, y1, x2, y2, confidence, class) format
	x1, y1, x2, y2, conf, cls = detection.tolist() # Convert to Python list
	x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
	bounding_boxes.append((x1, y1, x2 - x1, y2 - y1))

	# Bounding boxes are already (x, y, w, h), so we access coordinates directly.
	if bounding_boxes:
	image_height = image.shape[0]
	bounding_boxes = sort_panels_by_column_then_row(bounding_boxes, rtl_order)

	def get_panels(bounding_boxes):
	panels = []
	for x, y, w, h in bounding_boxes:
	panel = image[y:y + h, x:x + w]
	panels.append(panel)
	return panels

	panels = []
	if merge == MergeMode.NONE:
	panels = get_panels(bounding_boxes)
	elif merge == MergeMode.HORIZONTAL:
	grouped_bounding_boxes = group_bounding_boxes_horizontally(bounding_boxes)
	for group in grouped_bounding_boxes:
	panels.append(adaptive_hconcat(get_panels(group)))
	elif merge == MergeMode.VERTICAL:
	grouped_bounding_boxes = group_bounding_boxes_vertically(bounding_boxes)
	for group in grouped_bounding_boxes:
	panels.append(adaptive_vconcat(get_panels(group)))

	return panels


	def extract_panels_for_image(
	image_path: str,
	output_dir: str,
	fallback: bool = True,
	split_joint_panels: bool = False,
	mode: str = OutputMode.BOUNDING,
	merge: str = MergeMode.NONE
	) -> None:
	"""
	Extracts panels for a single image
	"""
	if not os.path.exists(image_path):
	return
	image_path = os.path.abspath(image_path)
	image = load_image(os.path.dirname(image_path), image_path)
	image_name, image_ext = os.path.splitext(image.image_name)
	panel_blocks = generate_panel_blocks(image.image, split_joint_panels=split_joint_panels, fallback=fallback, mode=mode, merge=merge)
	for k, panel in enumerate(tqdm(panel_blocks, total=len(panel_blocks))):
	out_path = os.path.join(output_dir, f"{image_name}_{k}{image_ext}")
	cv2.imwrite(out_path, panel)


	def extract_panels_for_images_in_folder(
	input_dir: str,
	output_dir: str,
	fallback: bool = True,
	split_joint_panels: bool = False,
	mode: str = OutputMode.BOUNDING,
	merge: str = MergeMode.NONE
	) -> tuple[int, int]:
	"""
	Basically the main function of the program,
	this is written with cli usage in mind
	"""
	if not os.path.exists(output_dir):
	return (0, 0)
	files = os.listdir(input_dir)
	num_files = len(files)
	num_panels = 0
	for _, image in enumerate(tqdm(load_images(input_dir), total=num_files)):
	image_name, image_ext = os.path.splitext(image.image_name)
	panel_blocks = generate_panel_blocks(image.image, fallback=fallback, split_joint_panels=split_joint_panels, mode=mode, merge=merge)
	for j, panel in enumerate(panel_blocks):
	out_path = os.path.join(output_dir, f"{image_name}_{j}{image_ext}")
	cv2.imwrite(out_path, panel)
	num_panels += len(panel_blocks)
	return (num_files, num_panels)


	def extract_panels_for_images_in_folder_by_ai(
	input_dir: str,
	output_dir: str
	) -> tuple[int, int]:
	"""
	Basically the main function of the program,
	this is written with cli usage in mind
	"""
	if not os.path.exists(output_dir):
	return (0, 0)
	files = os.listdir(input_dir)
	num_files = len(files)
	num_panels = 0
	for _, image in enumerate(tqdm(load_images(input_dir), total=num_files)):
	image_name, image_ext = os.path.splitext(image.image_name)
	panel_blocks = generate_panel_blocks_by_ai(image.image)
	for j, panel in enumerate(panel_blocks):
	out_path = os.path.join(output_dir, f"{image_name}_{j}{image_ext}")
	cv2.imwrite(out_path, panel)
	num_panels += len(panel_blocks)
	return (num_files, num_panels)