Spaces:

jebin2
/

comic-panel-extractor

Running

App Files Files Community

jebin2 commited on Jul 28

Commit

b269113

1 Parent(s): d28110d

removed ocr

Browse files

Files changed (3) hide show

comic_panel_extractor/image_processor.py +24 -9
comic_panel_extractor/main.py +2 -2
comic_panel_extractor/panel_extractor.py +25 -28

comic_panel_extractor/image_processor.py CHANGED Viewed

@@ -34,15 +34,22 @@ class ImageProcessor:
         # Convert to grayscale and binary
         gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-        _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
         is_inverted = False
         # binary, is_inverted = self.invert_if_black_dominates(binary)
         if not is_inverted:
             # Dilate to strengthen borders
             kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
-            dilated = cv2.dilate(binary, kernel, iterations=2)
-        else: dilated = binary
         # Save intermediate results
         gray_path = f'{self.config.output_folder}/2_gray.jpg'
@@ -50,7 +57,7 @@ class ImageProcessor:
         dilated_path = f'{self.config.output_folder}/4_dilated.jpg'
         cv2.imwrite(str(gray_path), gray)
-        cv2.imwrite(str(binary_path), binary)
         cv2.imwrite(str(dilated_path), dilated)
         return str(gray_path), str(binary_path), str(dilated_path), is_inverted
@@ -74,7 +81,7 @@ class ImageProcessor:
         # Save result
         return inverted, black_pixels > white_pixels
-    def remove_inner_sketch(self, input_path, output_filename="5_remove_inner_sketch.jpg", min_area_ratio=0):
         img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
         height, width = img.shape
@@ -88,7 +95,7 @@ class ImageProcessor:
         mask = np.zeros_like(binary)
         for cnt in contours:
             area = cv2.contourArea(cnt)
-            if area >= (height * width * min_area_ratio):
                 cv2.drawContours(mask, [cnt], -1, 255, thickness=cv2.FILLED)
         # Apply mask to original image (keeps only large borders)
@@ -115,11 +122,19 @@ class ImageProcessor:
         from skimage.measure import label
         # Load image
-        img = cv2.imread(processed_image_path, cv2.IMREAD_GRAYSCALE)
-        _, binary = cv2.threshold(img, 128, 1, cv2.THRESH_BINARY_INV)  # invert, binary mask (0,1)
         # Skeletonize
-        skeleton = skeletonize(binary).astype(np.uint8)
         # Remove small hanging clusters
         labeled = label(skeleton, connectivity=2)

         # Convert to grayscale and binary
         gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        # _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
+        # Apply Gaussian blur to reduce noise
+        blurred = cv2.GaussianBlur(gray, (3, 3), 0)
+        # Canny edge detection
+        edges = cv2.Canny(blurred, threshold1=50, threshold2=150, apertureSize=3)
+        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
         is_inverted = False
         # binary, is_inverted = self.invert_if_black_dominates(binary)
         if not is_inverted:
             # Dilate to strengthen borders
             kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+            dilated = cv2.dilate(edges, kernel, iterations=2)
+        else: dilated = edges
         # Save intermediate results
         gray_path = f'{self.config.output_folder}/2_gray.jpg'
         dilated_path = f'{self.config.output_folder}/4_dilated.jpg'
         cv2.imwrite(str(gray_path), gray)
+        cv2.imwrite(str(binary_path), edges)
         cv2.imwrite(str(dilated_path), dilated)
         return str(gray_path), str(binary_path), str(dilated_path), is_inverted
         # Save result
         return inverted, black_pixels > white_pixels
+    def remove_inner_sketch(self, input_path, output_filename="5_remove_inner_sketch.jpg"):
         img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
         height, width = img.shape
         mask = np.zeros_like(binary)
         for cnt in contours:
             area = cv2.contourArea(cnt)
+            if area >= (height * width * self.config.min_area_ratio):
                 cv2.drawContours(mask, [cnt], -1, 255, thickness=cv2.FILLED)
         # Apply mask to original image (keeps only large borders)
         from skimage.measure import label
         # Load image
+        img = cv2.imread(processed_image_path)
+        # Convert to grayscale and binary
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        # _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
+        # Apply Gaussian blur to reduce noise
+        blurred = cv2.GaussianBlur(gray, (3, 3), 0)
+        # Canny edge detection
+        edges = cv2.Canny(blurred, threshold1=50, threshold2=150, apertureSize=3)
         # Skeletonize
+        skeleton = skeletonize(edges).astype(np.uint8)
         # Remove small hanging clusters
         labeled = label(skeleton, connectivity=2)

comic_panel_extractor/main.py CHANGED Viewed

@@ -32,8 +32,8 @@ class ComicPanelExtractor:
         self.config.black_overlay_input_path = processed_image_path
         # Step 1: Detect and mask text regions
-        text_bubbles = self._detect_text_bubbles()
-        processed_image_path = self.image_processor.mask_text_regions(processed_image_path, [bubble["bbox"] for bubble in text_bubbles])
         # Step 2: Preprocess image
         _, _, processed_image_path, is_inverted = self.image_processor.preprocess_image(processed_image_path)

         self.config.black_overlay_input_path = processed_image_path
         # Step 1: Detect and mask text regions
+        # text_bubbles = self._detect_text_bubbles()
+        # processed_image_path = self.image_processor.mask_text_regions(processed_image_path, [bubble["bbox"] for bubble in text_bubbles])
         # Step 2: Preprocess image
         _, _, processed_image_path, is_inverted = self.image_processor.preprocess_image(processed_image_path)

comic_panel_extractor/panel_extractor.py CHANGED Viewed

@@ -36,7 +36,7 @@ class PanelExtractor:
     def __init__(self, config: Config):
         self.config = config
-    def extract_panels(self, dilated_path: str, row_thresh: int = 20, col_thresh: int = 20, min_width_ratio: float = 0.001, min_height_ratio: float = 0.001, min_area_ratio: float = 0) -> Tuple[List[np.ndarray], List[PanelData]]:
         """Extract comic panels using black percentage scan."""
         dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
         original = cv2.imread(self.config.input_path)
@@ -47,7 +47,7 @@ class PanelExtractor:
         height, width = dilated.shape
         # Find row gutters and panel rows
-        panel_rows = self._find_panel_rows(dilated, row_thresh, min_height_ratio)
         # Extract panels from each row
         all_panels = []
@@ -57,7 +57,7 @@ class PanelExtractor:
         # Filter panels by size
         filtered_panels = self._filter_panels_by_size(
-            all_panels, width, height, min_width_ratio, min_height_ratio, min_area_ratio
         )
         # Extract panel images and save
@@ -67,7 +67,7 @@ class PanelExtractor:
         return panel_images, panel_data, all_panel_path
-    def _find_panel_rows(self, dilated: np.ndarray, row_thresh: int, min_height_ratio: float) -> List[Tuple[int, int]]:
         """Find panel rows where consecutive rows meet the threshold and height constraint."""
         height, width = dilated.shape
@@ -92,7 +92,7 @@ class PanelExtractor:
                 if y != start_row:
                     # Only extend if combined height meets min_height_ratio
                     combined_height = y - start_row + 1
-                    if combined_height / height >= min_height_ratio:
                         prev_row = y
                         row_gutters.append((start_row, prev_row))
                         start_row = y
@@ -115,7 +115,7 @@ class PanelExtractor:
         return row_gutters
-    def _find_panel_columns(self, dilated: np.ndarray, col_thresh: int, min_width_ratio: float) -> List[Tuple[int, int]]:
         """
         Find panel columns where consecutive columns meet the threshold and width constraint.
         """
@@ -142,7 +142,7 @@ class PanelExtractor:
                 if x != start_col:
                     # Only extend if combined width meets min_width_ratio
                     combined_width = x - start_col + 1
-                    if combined_width / width >= min_width_ratio:
                         prev_col = x
                         col_gutters.append((start_col, prev_col))
                         start_col = x
@@ -197,28 +197,25 @@ class PanelExtractor:
         return [(x1, y1, x2, y2) for x1, x2 in panel_cols]
-    def _filter_panels_by_size(self, panels: List[Tuple[int, int, int, int]],
-                              width: int, height: int, min_width_ratio: float,
-                              min_height_ratio: float, min_area_ratio: float) -> List[Tuple[int, int, int, int]]:
         """Filter panels by size constraints."""
-        # Remove very small panels first
-        panels = [(x1, y1, x2, y2) for x1, y1, x2, y2 in panels
-                 if (x2 - x1) * (y2 - y1) >= (width * height) * min_area_ratio]
-        if not panels:
-            return []
-        # Calculate average dimensions for smart filtering
-        panel_widths = [x2 - x1 for x1, _, x2, _ in panels]
-        panel_heights = [y2 - y1 for _, y1, _, y2 in panels]
-        avg_width = np.mean(panel_widths)
-        avg_height = np.mean(panel_heights)
-        min_allowed_width = max(avg_width * 0.5, width * min_width_ratio)
-        min_allowed_height = max(avg_height * 0.5, height * min_height_ratio)
-        return [(x1, y1, x2, y2) for x1, y1, x2, y2 in panels
-                if (x2 - x1) >= min_allowed_width and (y2 - y1) >= min_allowed_height]
     def count_panel_files(self, folder_path: str) -> int:
         """

     def __init__(self, config: Config):
         self.config = config
+    def extract_panels(self, dilated_path: str, row_thresh: int = 20, col_thresh: int = 20) -> Tuple[List[np.ndarray], List[PanelData]]:
         """Extract comic panels using black percentage scan."""
         dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
         original = cv2.imread(self.config.input_path)
         height, width = dilated.shape
         # Find row gutters and panel rows
+        panel_rows = self._find_panel_rows(dilated, row_thresh)
         # Extract panels from each row
         all_panels = []
         # Filter panels by size
         filtered_panels = self._filter_panels_by_size(
+            all_panels, width, height
         )
         # Extract panel images and save
         return panel_images, panel_data, all_panel_path
+    def _find_panel_rows(self, dilated: np.ndarray, row_thresh: int) -> List[Tuple[int, int]]:
         """Find panel rows where consecutive rows meet the threshold and height constraint."""
         height, width = dilated.shape
                 if y != start_row:
                     # Only extend if combined height meets min_height_ratio
                     combined_height = y - start_row + 1
+                    if combined_height / height >= self.config.min_height_ratio:
                         prev_row = y
                         row_gutters.append((start_row, prev_row))
                         start_row = y
         return row_gutters
+    def _find_panel_columns(self, dilated: np.ndarray, col_thresh: int) -> List[Tuple[int, int]]:
         """
         Find panel columns where consecutive columns meet the threshold and width constraint.
         """
                 if x != start_col:
                     # Only extend if combined width meets min_width_ratio
                     combined_width = x - start_col + 1
+                    if combined_width / width >= self.config.min_width_ratio:
                         prev_col = x
                         col_gutters.append((start_col, prev_col))
                         start_col = x
         return [(x1, y1, x2, y2) for x1, x2 in panel_cols]
+    def _filter_panels_by_size(self, panels: List[Tuple[int, int, int, int]], width: int, height: int) -> List[Tuple[int, int, int, int]]:
         """Filter panels by size constraints."""
+        new_panel = []
+        image_area = width * height
+        for x1, y1, x2, y2 in panels:
+            w = x2 - x1  # Corrected
+            h = y2 - y1  # Corrected
+            area = w * h
+            if (
+                area >= self.config.min_area_ratio * image_area and
+                w >= self.config.min_width_ratio * width and
+                h >= self.config.min_height_ratio * height
+            ):
+                new_panel.append((x1, y1, x2, y2))
+        return new_panel
     def count_panel_files(self, folder_path: str) -> int:
         """