jebin2 commited on
Commit
bb49e0d
Β·
1 Parent(s): db7fdd8
comic_panel_extractor/annorator_server.py CHANGED
@@ -9,6 +9,7 @@ from io import BytesIO
9
  import shutil
10
  from .config import Config
11
  from typing import List, Optional, Union, Dict, Any
 
12
 
13
  app = APIRouter()
14
 
@@ -226,7 +227,7 @@ def parse_yolo_line(line: str, image_width: int, image_height: int) -> Dict[str,
226
  async def list_all_images():
227
  image_info_list = []
228
  for root, _, files in os.walk(IMAGE_ROOT):
229
- for file in files:
230
  if file.lower().endswith((".jpg", ".jpeg", ".png")):
231
  image_path = os.path.join(root, file)
232
  rel_path = os.path.relpath(image_path, IMAGE_ROOT)
@@ -270,8 +271,9 @@ async def get_annotations(image_name: str):
270
  raise HTTPException(status_code=404, detail="Image not found")
271
 
272
  annotations, (width, height) = load_yolo_annotations(image_path, label_path)
 
273
  return {
274
- "annotations": annotations, # Changed from "boxes"
275
  "original_width": width,
276
  "original_height": height
277
  }
@@ -286,7 +288,7 @@ async def get_detected_annotations(image_name: str):
286
 
287
  annotations, (width, height) = load_yolo_annotations(image_path, label_path, True)
288
  return {
289
- "annotations": annotations,
290
  "original_width": width,
291
  "original_height": height
292
  }
 
9
  import shutil
10
  from .config import Config
11
  from typing import List, Optional, Union, Dict, Any
12
+ from . import utils
13
 
14
  app = APIRouter()
15
 
 
227
  async def list_all_images():
228
  image_info_list = []
229
  for root, _, files in os.walk(IMAGE_ROOT):
230
+ for file in sorted(files):
231
  if file.lower().endswith((".jpg", ".jpeg", ".png")):
232
  image_path = os.path.join(root, file)
233
  rel_path = os.path.relpath(image_path, IMAGE_ROOT)
 
271
  raise HTTPException(status_code=404, detail="Image not found")
272
 
273
  annotations, (width, height) = load_yolo_annotations(image_path, label_path)
274
+
275
  return {
276
+ "annotations": utils.normalize_segmentation(annotations),
277
  "original_width": width,
278
  "original_height": height
279
  }
 
288
 
289
  annotations, (width, height) = load_yolo_annotations(image_path, label_path, True)
290
  return {
291
+ "annotations": utils.normalize_segmentation(annotations),
292
  "original_width": width,
293
  "original_height": height
294
  }
comic_panel_extractor/config.py CHANGED
@@ -10,6 +10,7 @@ class Config:
10
  org_input_path: str = ""
11
  input_path: str = ""
12
  current_path = os.path.abspath(os.path.join(os.path.dirname(__file__)))
 
13
  YOLO_BASE_MODEL_NAME = os.getenv('YOLO_BASE_MODEL_NAME', 'yolo11s-seg')
14
  yolo_base_model_path: str = f'{current_path}/{YOLO_BASE_MODEL_NAME}.pt'
15
  YOLO_MODEL_NAME = f"{os.getenv('YOLO_MODEL_NAME', 'comic_panel')}_{YOLO_BASE_MODEL_NAME}"
 
10
  org_input_path: str = ""
11
  input_path: str = ""
12
  current_path = os.path.abspath(os.path.join(os.path.dirname(__file__)))
13
+ EPOCH = int(os.getenv('EPOCH', '200'))
14
  YOLO_BASE_MODEL_NAME = os.getenv('YOLO_BASE_MODEL_NAME', 'yolo11s-seg')
15
  yolo_base_model_path: str = f'{current_path}/{YOLO_BASE_MODEL_NAME}.pt'
16
  YOLO_MODEL_NAME = f"{os.getenv('YOLO_MODEL_NAME', 'comic_panel')}_{YOLO_BASE_MODEL_NAME}"
comic_panel_extractor/llm_panel_extractor.py CHANGED
@@ -16,16 +16,18 @@ class LLMPanelExtractor:
16
  self.config = config or Config()
17
 
18
  # Check if YOLO model exists; if not, download it to the specified path
19
- if not os.path.exists(self.config.yolo_base_model_path):
 
 
20
  url = "https://huggingface.co/mosesb/best-comic-panel-detection/resolve/main/best.pt"
21
- print(f"Downloading YOLO model to {self.config.yolo_base_model_path}...")
22
  response = requests.get(url)
23
  response.raise_for_status() # Raise an error if the download fails
24
- with open(self.config.yolo_base_model_path, "wb") as f:
25
  f.write(response.content)
26
  print("YOLO model downloaded successfully.")
27
 
28
- self.yolo_model = YOLO(self.config.yolo_base_model_path)
29
  os.makedirs(self.config.output_folder, exist_ok=True)
30
 
31
  def extract_bounding_boxes(self, detection_result_boxes):
@@ -109,7 +111,7 @@ class LLMPanelExtractor:
109
  self.crop_and_save_detected_panels(newly_detected_boxes)
110
 
111
  # Save prediction visualization
112
- visualization_result = first_detection_result.plot()
113
  constant.INDEX += 1
114
  debug_output_path = f"{self.config.output_folder}/{constant.INDEX:04d}_debug.jpg"
115
  Image.fromarray(visualization_result[..., ::-1]).save(debug_output_path)
 
16
  self.config = config or Config()
17
 
18
  # Check if YOLO model exists; if not, download it to the specified path
19
+ yolo_base_model_path = f'{self.config.yolo_base_model_path}_best.pt'
20
+ # yolo_base_model_path = f'{self.config.yolo_trained_model_path}'
21
+ if not os.path.exists(yolo_base_model_path):
22
  url = "https://huggingface.co/mosesb/best-comic-panel-detection/resolve/main/best.pt"
23
+ print(f"Downloading YOLO model to {yolo_base_model_path}...")
24
  response = requests.get(url)
25
  response.raise_for_status() # Raise an error if the download fails
26
+ with open(yolo_base_model_path, "wb") as f:
27
  f.write(response.content)
28
  print("YOLO model downloaded successfully.")
29
 
30
+ self.yolo_model = YOLO(yolo_base_model_path)
31
  os.makedirs(self.config.output_folder, exist_ok=True)
32
 
33
  def extract_bounding_boxes(self, detection_result_boxes):
 
111
  self.crop_and_save_detected_panels(newly_detected_boxes)
112
 
113
  # Save prediction visualization
114
+ visualization_result = first_detection_result.plot(masks=False)
115
  constant.INDEX += 1
116
  debug_output_path = f"{self.config.output_folder}/{constant.INDEX:04d}_debug.jpg"
117
  Image.fromarray(visualization_result[..., ::-1]).save(debug_output_path)
comic_panel_extractor/main.py CHANGED
@@ -11,6 +11,7 @@ import numpy as np
11
  from .border_panel_extractor import BorderPanelExtractor
12
  import shutil
13
  from . import utils
 
14
 
15
  class ComicPanelExtractor:
16
  """Main class that orchestrates the comic panel extraction process."""
@@ -36,11 +37,12 @@ class ComicPanelExtractor:
36
  original_width, original_height = original_image.size
37
  from .llm_panel_extractor import extract_panel_via_llm
38
  all_path, detected_boxes, all_processed_boxes = extract_panel_via_llm(self.config.input_path, self.config, self.reset)
 
39
  if utils.box_covered_ratio(all_processed_boxes, (original_width, original_height)) < 0.95:
40
  print("LLM failed.")
41
  return None, None, all_path
42
  except Exception as e:
43
- print(str(e))
44
 
45
  processed_image_path = self.image_processor.group_colors(self.config.input_path)
46
 
 
11
  from .border_panel_extractor import BorderPanelExtractor
12
  import shutil
13
  from . import utils
14
+ import traceback
15
 
16
  class ComicPanelExtractor:
17
  """Main class that orchestrates the comic panel extraction process."""
 
37
  original_width, original_height = original_image.size
38
  from .llm_panel_extractor import extract_panel_via_llm
39
  all_path, detected_boxes, all_processed_boxes = extract_panel_via_llm(self.config.input_path, self.config, self.reset)
40
+ print("LLM Done.")
41
  if utils.box_covered_ratio(all_processed_boxes, (original_width, original_height)) < 0.95:
42
  print("LLM failed.")
43
  return None, None, all_path
44
  except Exception as e:
45
+ print(f'{str(e)} {traceback.format_exc()}')
46
 
47
  processed_image_path = self.image_processor.group_colors(self.config.input_path)
48
 
comic_panel_extractor/static/annotator.html CHANGED
@@ -522,6 +522,12 @@
522
  πŸ“Έ Comic Panel Annotator
523
  </div>
524
  <div class="nav-actions">
 
 
 
 
 
 
525
  <button class="btn btn-success btn-sm" id="saveBtn">
526
  πŸ’Ύ Save
527
  </button>
@@ -542,9 +548,9 @@
542
  <div class="section-title">Image Selection</div>
543
 
544
  <div class="image-nav">
545
- <button class="btn btn-ghost btn-sm" id="prevBtn" disabled>
546
  ← Prev
547
- </button>
548
  <!-- <div class="nav-counter" id="currentImageDisplay">
549
  No image
550
  </div> -->
@@ -554,9 +560,9 @@
554
  <option value="">Choose an image...</option>
555
  </select>
556
  </div>
557
- <button class="btn btn-ghost btn-sm" id="nextBtn" disabled>
558
  Next β†’
559
- </button>
560
  </div>
561
  <!-- Annotation Mode -->
562
  <div class="sidebar-section">
 
522
  πŸ“Έ Comic Panel Annotator
523
  </div>
524
  <div class="nav-actions">
525
+ <button class="btn btn-ghost btn-sm" id="prevBtn" disabled>
526
+ ← Prev
527
+ </button>
528
+ <button class="btn btn-ghost btn-sm" id="nextBtn" disabled>
529
+ Next β†’
530
+ </button>
531
  <button class="btn btn-success btn-sm" id="saveBtn">
532
  πŸ’Ύ Save
533
  </button>
 
548
  <div class="section-title">Image Selection</div>
549
 
550
  <div class="image-nav">
551
+ <!-- <button class="btn btn-ghost btn-sm" id="prevBtn" disabled>
552
  ← Prev
553
+ </button> -->
554
  <!-- <div class="nav-counter" id="currentImageDisplay">
555
  No image
556
  </div> -->
 
560
  <option value="">Choose an image...</option>
561
  </select>
562
  </div>
563
+ <!-- <button class="btn btn-ghost btn-sm" id="nextBtn" disabled>
564
  Next β†’
565
+ </button> -->
566
  </div>
567
  <!-- Annotation Mode -->
568
  <div class="sidebar-section">
comic_panel_extractor/utils.py CHANGED
@@ -8,6 +8,7 @@ import shutil
8
  from glob import glob
9
  from typing import List, Union
10
  from .config import Config
 
11
 
12
  def remove_duplicate_boxes(boxes, compare_single=None, iou_threshold=0.7):
13
  """
@@ -527,4 +528,113 @@ def backup_file(source_path: str, backup_path: str) -> str:
527
  os.makedirs(os.path.dirname(backup_path), exist_ok=True)
528
  shutil.copy(source_path, backup_path)
529
  print(f"βœ… File backed up to: {backup_path}")
530
- return backup_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  from glob import glob
9
  from typing import List, Union
10
  from .config import Config
11
+ from shapely.geometry import Polygon
12
 
13
  def remove_duplicate_boxes(boxes, compare_single=None, iou_threshold=0.7):
14
  """
 
528
  os.makedirs(os.path.dirname(backup_path), exist_ok=True)
529
  shutil.copy(source_path, backup_path)
530
  print(f"βœ… File backed up to: {backup_path}")
531
+ return backup_path
532
+
533
+ def douglas_peucker_simplify(points, epsilon):
534
+ """Simplify polygon using Douglas-Peucker algorithm"""
535
+ polygon = Polygon(points)
536
+ simplified = polygon.simplify(epsilon, preserve_topology=True)
537
+ return list(simplified.exterior.coords[:-1]) # Remove duplicate last point
538
+
539
+ def filter_close_points(points, min_distance=5.0):
540
+ """Remove points that are closer than min_distance to previous point"""
541
+ if len(points) < 2:
542
+ return points
543
+
544
+ filtered = [points[0]]
545
+
546
+ for i in range(1, len(points)):
547
+ current = np.array(points[i])
548
+ previous = np.array(filtered[-1])
549
+ distance = np.linalg.norm(current - previous)
550
+
551
+ if distance >= min_distance:
552
+ filtered.append(points[i])
553
+
554
+ return filtered
555
+
556
+ def remove_thin_extensions_morphological(annotation_points, kernel_size=5):
557
+ """Remove thin extensions using morphological operations"""
558
+
559
+ # Convert points to image mask
560
+ points_array = np.array(annotation_points)
561
+ min_x, min_y = np.min(points_array, axis=0).astype(int)
562
+ max_x, max_y = np.max(points_array, axis=0).astype(int)
563
+
564
+ # Create binary mask
565
+ mask = np.zeros((max_y - min_y + 20, max_x - min_x + 20), dtype=np.uint8)
566
+
567
+ # Adjust points to mask coordinates
568
+ adjusted_points = points_array - [min_x - 10, min_y - 10]
569
+ adjusted_points = adjusted_points.astype(np.int32)
570
+
571
+ # Fill polygon
572
+ cv2.fillPoly(mask, [adjusted_points], 255)
573
+
574
+ # Morphological operations to remove thin extensions
575
+ kernel = np.ones((kernel_size, kernel_size), np.uint8)
576
+
577
+ # Erosion removes thin parts
578
+ eroded = cv2.erode(mask, kernel, iterations=1)
579
+
580
+ # Dilation restores the main body
581
+ cleaned = cv2.dilate(eroded, kernel, iterations=1)
582
+
583
+ # Extract contour from cleaned mask
584
+ contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
585
+
586
+ if contours:
587
+ # Get the largest contour
588
+ largest_contour = max(contours, key=cv2.contourArea)
589
+
590
+ # Convert back to original coordinate system
591
+ cleaned_points = largest_contour.reshape(-1, 2) + [min_x - 10, min_y - 10]
592
+ return cleaned_points.tolist()
593
+
594
+ return annotation_points
595
+
596
+ def str_format(points_list):
597
+ """Convert points list to segmentation format string"""
598
+ # Points should be a list of tuples/lists [(x1, y1), (x2, y2), ...]
599
+ coords = []
600
+ for point in points_list:
601
+ coords.extend([point[0], point[1]])
602
+
603
+ # Format as string with 6 decimal places
604
+ coords_str = ' '.join(f'{coord:.6f}' for coord in coords)
605
+ print(coords_str)
606
+ return coords_str
607
+
608
+
609
+ def array_format(coords_str):
610
+ """Convert segmentation format string to points list"""
611
+ # Parse coords_str to list of floats
612
+ coords = list(map(float, coords_str.split()))
613
+
614
+ # Convert to list of points [(x1, y1), (x2, y2), ...]
615
+ points = [(coords[i], coords[i+1]) for i in range(0, len(coords), 2)]
616
+ print(points)
617
+ return points
618
+
619
+ def normalize_segmentation(annotations, min_distance=8.0, epsilon=5.0, remove_extensions=True):
620
+ """Complete normalization pipeline for segmentation points"""
621
+ processed_annotations = []
622
+
623
+ for annotation in annotations:
624
+ if annotation["type"] == "segmentation":
625
+ original_points = [(p["x"], p["y"]) for p in annotation["points"]]
626
+ # Step 1: Remove thin extensions first (if enabled)
627
+ normalized_points = remove_thin_extensions_morphological(original_points, kernel_size=7)
628
+
629
+ # Step 2: Filter out points too close together
630
+ normalized_points = filter_close_points(normalized_points, min_distance)
631
+
632
+ # Step 3: Apply Douglas-Peucker simplification
633
+ normalized_points = douglas_peucker_simplify(normalized_points, epsilon)
634
+
635
+ # Update annotation with normalized points
636
+ annotation["points"] = [{"x": p[0], "y": p[1]} for p in normalized_points]
637
+
638
+ processed_annotations.append(annotation)
639
+
640
+ return processed_annotations
comic_panel_extractor/yolo_manager.py CHANGED
@@ -3,6 +3,7 @@ import os
3
  import shutil
4
  from glob import glob
5
  from typing import List, Union
 
6
 
7
  os.environ["TORCH_USE_CUDA_DSA"] = "1"
8
  os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
@@ -106,7 +107,7 @@ class YOLOManager:
106
  train_params = {
107
  'data': data_yaml_path,
108
  'imgsz': Config.DEFAULT_IMAGE_SIZE,
109
- 'epochs': Config.YOLO_BASE_MODEL_NAME,
110
  'batch': 10,
111
  'name': run_name,
112
  'device': device,
@@ -115,6 +116,7 @@ class YOLOManager:
115
  'exist_ok': True,
116
  'pose': False,
117
  'resume': resume_flag,
 
118
  'amp': False, # 🚫 Disable AMP to prevent yolo11n.pt download
119
  }
120
 
 
3
  import shutil
4
  from glob import glob
5
  from typing import List, Union
6
+ from . import utils
7
 
8
  os.environ["TORCH_USE_CUDA_DSA"] = "1"
9
  os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
 
107
  train_params = {
108
  'data': data_yaml_path,
109
  'imgsz': Config.DEFAULT_IMAGE_SIZE,
110
+ 'epochs': Config.EPOCH,
111
  'batch': 10,
112
  'name': run_name,
113
  'device': device,
 
116
  'exist_ok': True,
117
  'pose': False,
118
  'resume': resume_flag,
119
+ 'save_period': 10,
120
  'amp': False, # 🚫 Disable AMP to prevent yolo11n.pt download
121
  }
122