Spaces:
Running
Running
removed ocr
Browse files
comic_panel_extractor/image_processor.py
CHANGED
|
@@ -34,15 +34,22 @@ class ImageProcessor:
|
|
| 34 |
|
| 35 |
# Convert to grayscale and binary
|
| 36 |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 37 |
-
_, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
is_inverted = False
|
| 39 |
# binary, is_inverted = self.invert_if_black_dominates(binary)
|
| 40 |
|
| 41 |
if not is_inverted:
|
| 42 |
# Dilate to strengthen borders
|
| 43 |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
|
| 44 |
-
dilated = cv2.dilate(
|
| 45 |
-
else: dilated =
|
| 46 |
|
| 47 |
# Save intermediate results
|
| 48 |
gray_path = f'{self.config.output_folder}/2_gray.jpg'
|
|
@@ -50,7 +57,7 @@ class ImageProcessor:
|
|
| 50 |
dilated_path = f'{self.config.output_folder}/4_dilated.jpg'
|
| 51 |
|
| 52 |
cv2.imwrite(str(gray_path), gray)
|
| 53 |
-
cv2.imwrite(str(binary_path),
|
| 54 |
cv2.imwrite(str(dilated_path), dilated)
|
| 55 |
|
| 56 |
return str(gray_path), str(binary_path), str(dilated_path), is_inverted
|
|
@@ -74,7 +81,7 @@ class ImageProcessor:
|
|
| 74 |
# Save result
|
| 75 |
return inverted, black_pixels > white_pixels
|
| 76 |
|
| 77 |
-
def remove_inner_sketch(self, input_path, output_filename="5_remove_inner_sketch.jpg"
|
| 78 |
img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
|
| 79 |
height, width = img.shape
|
| 80 |
|
|
@@ -88,7 +95,7 @@ class ImageProcessor:
|
|
| 88 |
mask = np.zeros_like(binary)
|
| 89 |
for cnt in contours:
|
| 90 |
area = cv2.contourArea(cnt)
|
| 91 |
-
if area >= (height * width * min_area_ratio):
|
| 92 |
cv2.drawContours(mask, [cnt], -1, 255, thickness=cv2.FILLED)
|
| 93 |
|
| 94 |
# Apply mask to original image (keeps only large borders)
|
|
@@ -115,11 +122,19 @@ class ImageProcessor:
|
|
| 115 |
from skimage.measure import label
|
| 116 |
|
| 117 |
# Load image
|
| 118 |
-
img = cv2.imread(processed_image_path
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
# Skeletonize
|
| 122 |
-
skeleton = skeletonize(
|
| 123 |
|
| 124 |
# Remove small hanging clusters
|
| 125 |
labeled = label(skeleton, connectivity=2)
|
|
|
|
| 34 |
|
| 35 |
# Convert to grayscale and binary
|
| 36 |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 37 |
+
# _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
|
| 38 |
+
|
| 39 |
+
# Apply Gaussian blur to reduce noise
|
| 40 |
+
blurred = cv2.GaussianBlur(gray, (3, 3), 0)
|
| 41 |
+
|
| 42 |
+
# Canny edge detection
|
| 43 |
+
edges = cv2.Canny(blurred, threshold1=50, threshold2=150, apertureSize=3)
|
| 44 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
|
| 45 |
is_inverted = False
|
| 46 |
# binary, is_inverted = self.invert_if_black_dominates(binary)
|
| 47 |
|
| 48 |
if not is_inverted:
|
| 49 |
# Dilate to strengthen borders
|
| 50 |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
|
| 51 |
+
dilated = cv2.dilate(edges, kernel, iterations=2)
|
| 52 |
+
else: dilated = edges
|
| 53 |
|
| 54 |
# Save intermediate results
|
| 55 |
gray_path = f'{self.config.output_folder}/2_gray.jpg'
|
|
|
|
| 57 |
dilated_path = f'{self.config.output_folder}/4_dilated.jpg'
|
| 58 |
|
| 59 |
cv2.imwrite(str(gray_path), gray)
|
| 60 |
+
cv2.imwrite(str(binary_path), edges)
|
| 61 |
cv2.imwrite(str(dilated_path), dilated)
|
| 62 |
|
| 63 |
return str(gray_path), str(binary_path), str(dilated_path), is_inverted
|
|
|
|
| 81 |
# Save result
|
| 82 |
return inverted, black_pixels > white_pixels
|
| 83 |
|
| 84 |
+
def remove_inner_sketch(self, input_path, output_filename="5_remove_inner_sketch.jpg"):
|
| 85 |
img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
|
| 86 |
height, width = img.shape
|
| 87 |
|
|
|
|
| 95 |
mask = np.zeros_like(binary)
|
| 96 |
for cnt in contours:
|
| 97 |
area = cv2.contourArea(cnt)
|
| 98 |
+
if area >= (height * width * self.config.min_area_ratio):
|
| 99 |
cv2.drawContours(mask, [cnt], -1, 255, thickness=cv2.FILLED)
|
| 100 |
|
| 101 |
# Apply mask to original image (keeps only large borders)
|
|
|
|
| 122 |
from skimage.measure import label
|
| 123 |
|
| 124 |
# Load image
|
| 125 |
+
img = cv2.imread(processed_image_path)
|
| 126 |
+
# Convert to grayscale and binary
|
| 127 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 128 |
+
# _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
|
| 129 |
+
|
| 130 |
+
# Apply Gaussian blur to reduce noise
|
| 131 |
+
blurred = cv2.GaussianBlur(gray, (3, 3), 0)
|
| 132 |
+
|
| 133 |
+
# Canny edge detection
|
| 134 |
+
edges = cv2.Canny(blurred, threshold1=50, threshold2=150, apertureSize=3)
|
| 135 |
|
| 136 |
# Skeletonize
|
| 137 |
+
skeleton = skeletonize(edges).astype(np.uint8)
|
| 138 |
|
| 139 |
# Remove small hanging clusters
|
| 140 |
labeled = label(skeleton, connectivity=2)
|
comic_panel_extractor/main.py
CHANGED
|
@@ -32,8 +32,8 @@ class ComicPanelExtractor:
|
|
| 32 |
self.config.black_overlay_input_path = processed_image_path
|
| 33 |
|
| 34 |
# Step 1: Detect and mask text regions
|
| 35 |
-
text_bubbles = self._detect_text_bubbles()
|
| 36 |
-
processed_image_path = self.image_processor.mask_text_regions(processed_image_path, [bubble["bbox"] for bubble in text_bubbles])
|
| 37 |
|
| 38 |
# Step 2: Preprocess image
|
| 39 |
_, _, processed_image_path, is_inverted = self.image_processor.preprocess_image(processed_image_path)
|
|
|
|
| 32 |
self.config.black_overlay_input_path = processed_image_path
|
| 33 |
|
| 34 |
# Step 1: Detect and mask text regions
|
| 35 |
+
# text_bubbles = self._detect_text_bubbles()
|
| 36 |
+
# processed_image_path = self.image_processor.mask_text_regions(processed_image_path, [bubble["bbox"] for bubble in text_bubbles])
|
| 37 |
|
| 38 |
# Step 2: Preprocess image
|
| 39 |
_, _, processed_image_path, is_inverted = self.image_processor.preprocess_image(processed_image_path)
|
comic_panel_extractor/panel_extractor.py
CHANGED
|
@@ -36,7 +36,7 @@ class PanelExtractor:
|
|
| 36 |
def __init__(self, config: Config):
|
| 37 |
self.config = config
|
| 38 |
|
| 39 |
-
def extract_panels(self, dilated_path: str, row_thresh: int = 20, col_thresh: int = 20
|
| 40 |
"""Extract comic panels using black percentage scan."""
|
| 41 |
dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
|
| 42 |
original = cv2.imread(self.config.input_path)
|
|
@@ -47,7 +47,7 @@ class PanelExtractor:
|
|
| 47 |
height, width = dilated.shape
|
| 48 |
|
| 49 |
# Find row gutters and panel rows
|
| 50 |
-
panel_rows = self._find_panel_rows(dilated, row_thresh
|
| 51 |
|
| 52 |
# Extract panels from each row
|
| 53 |
all_panels = []
|
|
@@ -57,7 +57,7 @@ class PanelExtractor:
|
|
| 57 |
|
| 58 |
# Filter panels by size
|
| 59 |
filtered_panels = self._filter_panels_by_size(
|
| 60 |
-
all_panels, width, height
|
| 61 |
)
|
| 62 |
|
| 63 |
# Extract panel images and save
|
|
@@ -67,7 +67,7 @@ class PanelExtractor:
|
|
| 67 |
|
| 68 |
return panel_images, panel_data, all_panel_path
|
| 69 |
|
| 70 |
-
def _find_panel_rows(self, dilated: np.ndarray, row_thresh: int
|
| 71 |
"""Find panel rows where consecutive rows meet the threshold and height constraint."""
|
| 72 |
height, width = dilated.shape
|
| 73 |
|
|
@@ -92,7 +92,7 @@ class PanelExtractor:
|
|
| 92 |
if y != start_row:
|
| 93 |
# Only extend if combined height meets min_height_ratio
|
| 94 |
combined_height = y - start_row + 1
|
| 95 |
-
if combined_height / height >= min_height_ratio:
|
| 96 |
prev_row = y
|
| 97 |
row_gutters.append((start_row, prev_row))
|
| 98 |
start_row = y
|
|
@@ -115,7 +115,7 @@ class PanelExtractor:
|
|
| 115 |
|
| 116 |
return row_gutters
|
| 117 |
|
| 118 |
-
def _find_panel_columns(self, dilated: np.ndarray, col_thresh: int
|
| 119 |
"""
|
| 120 |
Find panel columns where consecutive columns meet the threshold and width constraint.
|
| 121 |
"""
|
|
@@ -142,7 +142,7 @@ class PanelExtractor:
|
|
| 142 |
if x != start_col:
|
| 143 |
# Only extend if combined width meets min_width_ratio
|
| 144 |
combined_width = x - start_col + 1
|
| 145 |
-
if combined_width / width >= min_width_ratio:
|
| 146 |
prev_col = x
|
| 147 |
col_gutters.append((start_col, prev_col))
|
| 148 |
start_col = x
|
|
@@ -197,28 +197,25 @@ class PanelExtractor:
|
|
| 197 |
|
| 198 |
return [(x1, y1, x2, y2) for x1, x2 in panel_cols]
|
| 199 |
|
| 200 |
-
def _filter_panels_by_size(self, panels: List[Tuple[int, int, int, int]],
|
| 201 |
-
width: int, height: int, min_width_ratio: float,
|
| 202 |
-
min_height_ratio: float, min_area_ratio: float) -> List[Tuple[int, int, int, int]]:
|
| 203 |
"""Filter panels by size constraints."""
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
if (x2 - x1) >= min_allowed_width and (y2 - y1) >= min_allowed_height]
|
| 222 |
|
| 223 |
def count_panel_files(self, folder_path: str) -> int:
|
| 224 |
"""
|
|
|
|
| 36 |
def __init__(self, config: Config):
|
| 37 |
self.config = config
|
| 38 |
|
| 39 |
+
def extract_panels(self, dilated_path: str, row_thresh: int = 20, col_thresh: int = 20) -> Tuple[List[np.ndarray], List[PanelData]]:
|
| 40 |
"""Extract comic panels using black percentage scan."""
|
| 41 |
dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
|
| 42 |
original = cv2.imread(self.config.input_path)
|
|
|
|
| 47 |
height, width = dilated.shape
|
| 48 |
|
| 49 |
# Find row gutters and panel rows
|
| 50 |
+
panel_rows = self._find_panel_rows(dilated, row_thresh)
|
| 51 |
|
| 52 |
# Extract panels from each row
|
| 53 |
all_panels = []
|
|
|
|
| 57 |
|
| 58 |
# Filter panels by size
|
| 59 |
filtered_panels = self._filter_panels_by_size(
|
| 60 |
+
all_panels, width, height
|
| 61 |
)
|
| 62 |
|
| 63 |
# Extract panel images and save
|
|
|
|
| 67 |
|
| 68 |
return panel_images, panel_data, all_panel_path
|
| 69 |
|
| 70 |
+
def _find_panel_rows(self, dilated: np.ndarray, row_thresh: int) -> List[Tuple[int, int]]:
|
| 71 |
"""Find panel rows where consecutive rows meet the threshold and height constraint."""
|
| 72 |
height, width = dilated.shape
|
| 73 |
|
|
|
|
| 92 |
if y != start_row:
|
| 93 |
# Only extend if combined height meets min_height_ratio
|
| 94 |
combined_height = y - start_row + 1
|
| 95 |
+
if combined_height / height >= self.config.min_height_ratio:
|
| 96 |
prev_row = y
|
| 97 |
row_gutters.append((start_row, prev_row))
|
| 98 |
start_row = y
|
|
|
|
| 115 |
|
| 116 |
return row_gutters
|
| 117 |
|
| 118 |
+
def _find_panel_columns(self, dilated: np.ndarray, col_thresh: int) -> List[Tuple[int, int]]:
|
| 119 |
"""
|
| 120 |
Find panel columns where consecutive columns meet the threshold and width constraint.
|
| 121 |
"""
|
|
|
|
| 142 |
if x != start_col:
|
| 143 |
# Only extend if combined width meets min_width_ratio
|
| 144 |
combined_width = x - start_col + 1
|
| 145 |
+
if combined_width / width >= self.config.min_width_ratio:
|
| 146 |
prev_col = x
|
| 147 |
col_gutters.append((start_col, prev_col))
|
| 148 |
start_col = x
|
|
|
|
| 197 |
|
| 198 |
return [(x1, y1, x2, y2) for x1, x2 in panel_cols]
|
| 199 |
|
| 200 |
+
def _filter_panels_by_size(self, panels: List[Tuple[int, int, int, int]], width: int, height: int) -> List[Tuple[int, int, int, int]]:
|
|
|
|
|
|
|
| 201 |
"""Filter panels by size constraints."""
|
| 202 |
+
new_panel = []
|
| 203 |
+
image_area = width * height
|
| 204 |
+
|
| 205 |
+
for x1, y1, x2, y2 in panels:
|
| 206 |
+
w = x2 - x1 # Corrected
|
| 207 |
+
h = y2 - y1 # Corrected
|
| 208 |
+
area = w * h
|
| 209 |
+
|
| 210 |
+
if (
|
| 211 |
+
area >= self.config.min_area_ratio * image_area and
|
| 212 |
+
w >= self.config.min_width_ratio * width and
|
| 213 |
+
h >= self.config.min_height_ratio * height
|
| 214 |
+
):
|
| 215 |
+
new_panel.append((x1, y1, x2, y2))
|
| 216 |
+
|
| 217 |
+
return new_panel
|
| 218 |
+
|
|
|
|
| 219 |
|
| 220 |
def count_panel_files(self, folder_path: str) -> int:
|
| 221 |
"""
|