jebin2 commited on
Commit
b269113
·
1 Parent(s): d28110d

removed ocr

Browse files
comic_panel_extractor/image_processor.py CHANGED
@@ -34,15 +34,22 @@ class ImageProcessor:
34
 
35
  # Convert to grayscale and binary
36
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
37
- _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
 
 
 
 
 
 
 
38
  is_inverted = False
39
  # binary, is_inverted = self.invert_if_black_dominates(binary)
40
 
41
  if not is_inverted:
42
  # Dilate to strengthen borders
43
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
44
- dilated = cv2.dilate(binary, kernel, iterations=2)
45
- else: dilated = binary
46
 
47
  # Save intermediate results
48
  gray_path = f'{self.config.output_folder}/2_gray.jpg'
@@ -50,7 +57,7 @@ class ImageProcessor:
50
  dilated_path = f'{self.config.output_folder}/4_dilated.jpg'
51
 
52
  cv2.imwrite(str(gray_path), gray)
53
- cv2.imwrite(str(binary_path), binary)
54
  cv2.imwrite(str(dilated_path), dilated)
55
 
56
  return str(gray_path), str(binary_path), str(dilated_path), is_inverted
@@ -74,7 +81,7 @@ class ImageProcessor:
74
  # Save result
75
  return inverted, black_pixels > white_pixels
76
 
77
- def remove_inner_sketch(self, input_path, output_filename="5_remove_inner_sketch.jpg", min_area_ratio=0):
78
  img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
79
  height, width = img.shape
80
 
@@ -88,7 +95,7 @@ class ImageProcessor:
88
  mask = np.zeros_like(binary)
89
  for cnt in contours:
90
  area = cv2.contourArea(cnt)
91
- if area >= (height * width * min_area_ratio):
92
  cv2.drawContours(mask, [cnt], -1, 255, thickness=cv2.FILLED)
93
 
94
  # Apply mask to original image (keeps only large borders)
@@ -115,11 +122,19 @@ class ImageProcessor:
115
  from skimage.measure import label
116
 
117
  # Load image
118
- img = cv2.imread(processed_image_path, cv2.IMREAD_GRAYSCALE)
119
- _, binary = cv2.threshold(img, 128, 1, cv2.THRESH_BINARY_INV) # invert, binary mask (0,1)
 
 
 
 
 
 
 
 
120
 
121
  # Skeletonize
122
- skeleton = skeletonize(binary).astype(np.uint8)
123
 
124
  # Remove small hanging clusters
125
  labeled = label(skeleton, connectivity=2)
 
34
 
35
  # Convert to grayscale and binary
36
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
37
+ # _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
38
+
39
+ # Apply Gaussian blur to reduce noise
40
+ blurred = cv2.GaussianBlur(gray, (3, 3), 0)
41
+
42
+ # Canny edge detection
43
+ edges = cv2.Canny(blurred, threshold1=50, threshold2=150, apertureSize=3)
44
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
45
  is_inverted = False
46
  # binary, is_inverted = self.invert_if_black_dominates(binary)
47
 
48
  if not is_inverted:
49
  # Dilate to strengthen borders
50
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
51
+ dilated = cv2.dilate(edges, kernel, iterations=2)
52
+ else: dilated = edges
53
 
54
  # Save intermediate results
55
  gray_path = f'{self.config.output_folder}/2_gray.jpg'
 
57
  dilated_path = f'{self.config.output_folder}/4_dilated.jpg'
58
 
59
  cv2.imwrite(str(gray_path), gray)
60
+ cv2.imwrite(str(binary_path), edges)
61
  cv2.imwrite(str(dilated_path), dilated)
62
 
63
  return str(gray_path), str(binary_path), str(dilated_path), is_inverted
 
81
  # Save result
82
  return inverted, black_pixels > white_pixels
83
 
84
+ def remove_inner_sketch(self, input_path, output_filename="5_remove_inner_sketch.jpg"):
85
  img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
86
  height, width = img.shape
87
 
 
95
  mask = np.zeros_like(binary)
96
  for cnt in contours:
97
  area = cv2.contourArea(cnt)
98
+ if area >= (height * width * self.config.min_area_ratio):
99
  cv2.drawContours(mask, [cnt], -1, 255, thickness=cv2.FILLED)
100
 
101
  # Apply mask to original image (keeps only large borders)
 
122
  from skimage.measure import label
123
 
124
  # Load image
125
+ img = cv2.imread(processed_image_path)
126
+ # Convert to grayscale and binary
127
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
128
+ # _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
129
+
130
+ # Apply Gaussian blur to reduce noise
131
+ blurred = cv2.GaussianBlur(gray, (3, 3), 0)
132
+
133
+ # Canny edge detection
134
+ edges = cv2.Canny(blurred, threshold1=50, threshold2=150, apertureSize=3)
135
 
136
  # Skeletonize
137
+ skeleton = skeletonize(edges).astype(np.uint8)
138
 
139
  # Remove small hanging clusters
140
  labeled = label(skeleton, connectivity=2)
comic_panel_extractor/main.py CHANGED
@@ -32,8 +32,8 @@ class ComicPanelExtractor:
32
  self.config.black_overlay_input_path = processed_image_path
33
 
34
  # Step 1: Detect and mask text regions
35
- text_bubbles = self._detect_text_bubbles()
36
- processed_image_path = self.image_processor.mask_text_regions(processed_image_path, [bubble["bbox"] for bubble in text_bubbles])
37
 
38
  # Step 2: Preprocess image
39
  _, _, processed_image_path, is_inverted = self.image_processor.preprocess_image(processed_image_path)
 
32
  self.config.black_overlay_input_path = processed_image_path
33
 
34
  # Step 1: Detect and mask text regions
35
+ # text_bubbles = self._detect_text_bubbles()
36
+ # processed_image_path = self.image_processor.mask_text_regions(processed_image_path, [bubble["bbox"] for bubble in text_bubbles])
37
 
38
  # Step 2: Preprocess image
39
  _, _, processed_image_path, is_inverted = self.image_processor.preprocess_image(processed_image_path)
comic_panel_extractor/panel_extractor.py CHANGED
@@ -36,7 +36,7 @@ class PanelExtractor:
36
  def __init__(self, config: Config):
37
  self.config = config
38
 
39
- def extract_panels(self, dilated_path: str, row_thresh: int = 20, col_thresh: int = 20, min_width_ratio: float = 0.001, min_height_ratio: float = 0.001, min_area_ratio: float = 0) -> Tuple[List[np.ndarray], List[PanelData]]:
40
  """Extract comic panels using black percentage scan."""
41
  dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
42
  original = cv2.imread(self.config.input_path)
@@ -47,7 +47,7 @@ class PanelExtractor:
47
  height, width = dilated.shape
48
 
49
  # Find row gutters and panel rows
50
- panel_rows = self._find_panel_rows(dilated, row_thresh, min_height_ratio)
51
 
52
  # Extract panels from each row
53
  all_panels = []
@@ -57,7 +57,7 @@ class PanelExtractor:
57
 
58
  # Filter panels by size
59
  filtered_panels = self._filter_panels_by_size(
60
- all_panels, width, height, min_width_ratio, min_height_ratio, min_area_ratio
61
  )
62
 
63
  # Extract panel images and save
@@ -67,7 +67,7 @@ class PanelExtractor:
67
 
68
  return panel_images, panel_data, all_panel_path
69
 
70
- def _find_panel_rows(self, dilated: np.ndarray, row_thresh: int, min_height_ratio: float) -> List[Tuple[int, int]]:
71
  """Find panel rows where consecutive rows meet the threshold and height constraint."""
72
  height, width = dilated.shape
73
 
@@ -92,7 +92,7 @@ class PanelExtractor:
92
  if y != start_row:
93
  # Only extend if combined height meets min_height_ratio
94
  combined_height = y - start_row + 1
95
- if combined_height / height >= min_height_ratio:
96
  prev_row = y
97
  row_gutters.append((start_row, prev_row))
98
  start_row = y
@@ -115,7 +115,7 @@ class PanelExtractor:
115
 
116
  return row_gutters
117
 
118
- def _find_panel_columns(self, dilated: np.ndarray, col_thresh: int, min_width_ratio: float) -> List[Tuple[int, int]]:
119
  """
120
  Find panel columns where consecutive columns meet the threshold and width constraint.
121
  """
@@ -142,7 +142,7 @@ class PanelExtractor:
142
  if x != start_col:
143
  # Only extend if combined width meets min_width_ratio
144
  combined_width = x - start_col + 1
145
- if combined_width / width >= min_width_ratio:
146
  prev_col = x
147
  col_gutters.append((start_col, prev_col))
148
  start_col = x
@@ -197,28 +197,25 @@ class PanelExtractor:
197
 
198
  return [(x1, y1, x2, y2) for x1, x2 in panel_cols]
199
 
200
- def _filter_panels_by_size(self, panels: List[Tuple[int, int, int, int]],
201
- width: int, height: int, min_width_ratio: float,
202
- min_height_ratio: float, min_area_ratio: float) -> List[Tuple[int, int, int, int]]:
203
  """Filter panels by size constraints."""
204
- # Remove very small panels first
205
- panels = [(x1, y1, x2, y2) for x1, y1, x2, y2 in panels
206
- if (x2 - x1) * (y2 - y1) >= (width * height) * min_area_ratio]
207
-
208
- if not panels:
209
- return []
210
-
211
- # Calculate average dimensions for smart filtering
212
- panel_widths = [x2 - x1 for x1, _, x2, _ in panels]
213
- panel_heights = [y2 - y1 for _, y1, _, y2 in panels]
214
- avg_width = np.mean(panel_widths)
215
- avg_height = np.mean(panel_heights)
216
-
217
- min_allowed_width = max(avg_width * 0.5, width * min_width_ratio)
218
- min_allowed_height = max(avg_height * 0.5, height * min_height_ratio)
219
-
220
- return [(x1, y1, x2, y2) for x1, y1, x2, y2 in panels
221
- if (x2 - x1) >= min_allowed_width and (y2 - y1) >= min_allowed_height]
222
 
223
  def count_panel_files(self, folder_path: str) -> int:
224
  """
 
36
  def __init__(self, config: Config):
37
  self.config = config
38
 
39
+ def extract_panels(self, dilated_path: str, row_thresh: int = 20, col_thresh: int = 20) -> Tuple[List[np.ndarray], List[PanelData]]:
40
  """Extract comic panels using black percentage scan."""
41
  dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
42
  original = cv2.imread(self.config.input_path)
 
47
  height, width = dilated.shape
48
 
49
  # Find row gutters and panel rows
50
+ panel_rows = self._find_panel_rows(dilated, row_thresh)
51
 
52
  # Extract panels from each row
53
  all_panels = []
 
57
 
58
  # Filter panels by size
59
  filtered_panels = self._filter_panels_by_size(
60
+ all_panels, width, height
61
  )
62
 
63
  # Extract panel images and save
 
67
 
68
  return panel_images, panel_data, all_panel_path
69
 
70
+ def _find_panel_rows(self, dilated: np.ndarray, row_thresh: int) -> List[Tuple[int, int]]:
71
  """Find panel rows where consecutive rows meet the threshold and height constraint."""
72
  height, width = dilated.shape
73
 
 
92
  if y != start_row:
93
  # Only extend if combined height meets min_height_ratio
94
  combined_height = y - start_row + 1
95
+ if combined_height / height >= self.config.min_height_ratio:
96
  prev_row = y
97
  row_gutters.append((start_row, prev_row))
98
  start_row = y
 
115
 
116
  return row_gutters
117
 
118
+ def _find_panel_columns(self, dilated: np.ndarray, col_thresh: int) -> List[Tuple[int, int]]:
119
  """
120
  Find panel columns where consecutive columns meet the threshold and width constraint.
121
  """
 
142
  if x != start_col:
143
  # Only extend if combined width meets min_width_ratio
144
  combined_width = x - start_col + 1
145
+ if combined_width / width >= self.config.min_width_ratio:
146
  prev_col = x
147
  col_gutters.append((start_col, prev_col))
148
  start_col = x
 
197
 
198
  return [(x1, y1, x2, y2) for x1, x2 in panel_cols]
199
 
200
+ def _filter_panels_by_size(self, panels: List[Tuple[int, int, int, int]], width: int, height: int) -> List[Tuple[int, int, int, int]]:
 
 
201
  """Filter panels by size constraints."""
202
+ new_panel = []
203
+ image_area = width * height
204
+
205
+ for x1, y1, x2, y2 in panels:
206
+ w = x2 - x1 # Corrected
207
+ h = y2 - y1 # Corrected
208
+ area = w * h
209
+
210
+ if (
211
+ area >= self.config.min_area_ratio * image_area and
212
+ w >= self.config.min_width_ratio * width and
213
+ h >= self.config.min_height_ratio * height
214
+ ):
215
+ new_panel.append((x1, y1, x2, y2))
216
+
217
+ return new_panel
218
+
 
219
 
220
  def count_panel_files(self, folder_path: str) -> int:
221
  """