Spaces:

jebin2
/

comic-panel-extractor

Running

App Files Files Community

jebin2 commited on Aug 9

Commit

bb49e0d

1 Parent(s): db7fdd8

new ch

Browse files

Files changed (7) hide show

comic_panel_extractor/annorator_server.py +5 -3
comic_panel_extractor/config.py +1 -0
comic_panel_extractor/llm_panel_extractor.py +7 -5
comic_panel_extractor/main.py +3 -1
comic_panel_extractor/static/annotator.html +10 -4
comic_panel_extractor/utils.py +111 -1
comic_panel_extractor/yolo_manager.py +3 -1

comic_panel_extractor/annorator_server.py CHANGED Viewed

@@ -9,6 +9,7 @@ from io import BytesIO
 import shutil
 from .config import Config
 from typing import List, Optional, Union, Dict, Any
 app = APIRouter()
@@ -226,7 +227,7 @@ def parse_yolo_line(line: str, image_width: int, image_height: int) -> Dict[str,
 async def list_all_images():
     image_info_list = []
     for root, _, files in os.walk(IMAGE_ROOT):
-        for file in files:
             if file.lower().endswith((".jpg", ".jpeg", ".png")):
                 image_path = os.path.join(root, file)
                 rel_path = os.path.relpath(image_path, IMAGE_ROOT)
@@ -270,8 +271,9 @@ async def get_annotations(image_name: str):
         raise HTTPException(status_code=404, detail="Image not found")
     annotations, (width, height) = load_yolo_annotations(image_path, label_path)
     return {
-        "annotations": annotations,  # Changed from "boxes"
         "original_width": width,
         "original_height": height
     }
@@ -286,7 +288,7 @@ async def get_detected_annotations(image_name: str):
     annotations, (width, height) = load_yolo_annotations(image_path, label_path, True)
     return {
-        "annotations": annotations,
         "original_width": width,
         "original_height": height
     }

 import shutil
 from .config import Config
 from typing import List, Optional, Union, Dict, Any
+from . import utils
 app = APIRouter()
 async def list_all_images():
     image_info_list = []
     for root, _, files in os.walk(IMAGE_ROOT):
+        for file in sorted(files):
             if file.lower().endswith((".jpg", ".jpeg", ".png")):
                 image_path = os.path.join(root, file)
                 rel_path = os.path.relpath(image_path, IMAGE_ROOT)
         raise HTTPException(status_code=404, detail="Image not found")
     annotations, (width, height) = load_yolo_annotations(image_path, label_path)
     return {
+        "annotations": utils.normalize_segmentation(annotations),
         "original_width": width,
         "original_height": height
     }
     annotations, (width, height) = load_yolo_annotations(image_path, label_path, True)
     return {
+        "annotations": utils.normalize_segmentation(annotations),
         "original_width": width,
         "original_height": height
     }

comic_panel_extractor/config.py CHANGED Viewed

@@ -10,6 +10,7 @@ class Config:
 	org_input_path: str = ""
 	input_path: str = ""
 	current_path = os.path.abspath(os.path.join(os.path.dirname(__file__)))
 	YOLO_BASE_MODEL_NAME = os.getenv('YOLO_BASE_MODEL_NAME', 'yolo11s-seg')
 	yolo_base_model_path: str = f'{current_path}/{YOLO_BASE_MODEL_NAME}.pt'
 	YOLO_MODEL_NAME = f"{os.getenv('YOLO_MODEL_NAME', 'comic_panel')}_{YOLO_BASE_MODEL_NAME}"

 	org_input_path: str = ""
 	input_path: str = ""
 	current_path = os.path.abspath(os.path.join(os.path.dirname(__file__)))
+	EPOCH = int(os.getenv('EPOCH', '200'))
 	YOLO_BASE_MODEL_NAME = os.getenv('YOLO_BASE_MODEL_NAME', 'yolo11s-seg')
 	yolo_base_model_path: str = f'{current_path}/{YOLO_BASE_MODEL_NAME}.pt'
 	YOLO_MODEL_NAME = f"{os.getenv('YOLO_MODEL_NAME', 'comic_panel')}_{YOLO_BASE_MODEL_NAME}"

comic_panel_extractor/llm_panel_extractor.py CHANGED Viewed

@@ -16,16 +16,18 @@ class LLMPanelExtractor:
 		self.config = config or Config()
 		# Check if YOLO model exists; if not, download it to the specified path
-		if not os.path.exists(self.config.yolo_base_model_path):
 			url = "https://huggingface.co/mosesb/best-comic-panel-detection/resolve/main/best.pt"
-			print(f"Downloading YOLO model to {self.config.yolo_base_model_path}...")
 			response = requests.get(url)
 			response.raise_for_status()  # Raise an error if the download fails
-			with open(self.config.yolo_base_model_path, "wb") as f:
 				f.write(response.content)
 			print("YOLO model downloaded successfully.")
-		self.yolo_model = YOLO(self.config.yolo_base_model_path)
 		os.makedirs(self.config.output_folder, exist_ok=True)
 	def extract_bounding_boxes(self, detection_result_boxes):
@@ -109,7 +111,7 @@ class LLMPanelExtractor:
 					self.crop_and_save_detected_panels(newly_detected_boxes)
 					# Save prediction visualization
-					visualization_result = first_detection_result.plot()
 					constant.INDEX += 1
 					debug_output_path = f"{self.config.output_folder}/{constant.INDEX:04d}_debug.jpg"
 					Image.fromarray(visualization_result[..., ::-1]).save(debug_output_path)

 		self.config = config or Config()
 		# Check if YOLO model exists; if not, download it to the specified path
+		yolo_base_model_path = f'{self.config.yolo_base_model_path}_best.pt'
+		# yolo_base_model_path = f'{self.config.yolo_trained_model_path}'
+		if not os.path.exists(yolo_base_model_path):
 			url = "https://huggingface.co/mosesb/best-comic-panel-detection/resolve/main/best.pt"
+			print(f"Downloading YOLO model to {yolo_base_model_path}...")
 			response = requests.get(url)
 			response.raise_for_status()  # Raise an error if the download fails
+			with open(yolo_base_model_path, "wb") as f:
 				f.write(response.content)
 			print("YOLO model downloaded successfully.")
+		self.yolo_model = YOLO(yolo_base_model_path)
 		os.makedirs(self.config.output_folder, exist_ok=True)
 	def extract_bounding_boxes(self, detection_result_boxes):
 					self.crop_and_save_detected_panels(newly_detected_boxes)
 					# Save prediction visualization
+					visualization_result = first_detection_result.plot(masks=False)
 					constant.INDEX += 1
 					debug_output_path = f"{self.config.output_folder}/{constant.INDEX:04d}_debug.jpg"
 					Image.fromarray(visualization_result[..., ::-1]).save(debug_output_path)

comic_panel_extractor/main.py CHANGED Viewed

@@ -11,6 +11,7 @@ import numpy as np
 from .border_panel_extractor import BorderPanelExtractor
 import shutil
 from . import utils
 class ComicPanelExtractor:
     """Main class that orchestrates the comic panel extraction process."""
@@ -36,11 +37,12 @@ class ComicPanelExtractor:
                 original_width, original_height = original_image.size
             from .llm_panel_extractor import extract_panel_via_llm
             all_path, detected_boxes, all_processed_boxes = extract_panel_via_llm(self.config.input_path, self.config, self.reset)
             if utils.box_covered_ratio(all_processed_boxes, (original_width, original_height)) < 0.95:
                 print("LLM failed.")
             return None, None, all_path
         except Exception as e:
-            print(str(e))
         processed_image_path = self.image_processor.group_colors(self.config.input_path)

 from .border_panel_extractor import BorderPanelExtractor
 import shutil
 from . import utils
+import traceback
 class ComicPanelExtractor:
     """Main class that orchestrates the comic panel extraction process."""
                 original_width, original_height = original_image.size
             from .llm_panel_extractor import extract_panel_via_llm
             all_path, detected_boxes, all_processed_boxes = extract_panel_via_llm(self.config.input_path, self.config, self.reset)
+            print("LLM Done.")
             if utils.box_covered_ratio(all_processed_boxes, (original_width, original_height)) < 0.95:
                 print("LLM failed.")
             return None, None, all_path
         except Exception as e:
+            print(f'{str(e)} {traceback.format_exc()}')
         processed_image_path = self.image_processor.group_colors(self.config.input_path)

comic_panel_extractor/static/annotator.html CHANGED Viewed

@@ -522,6 +522,12 @@
             📸 Comic Panel Annotator
         </div>
         <div class="nav-actions">
             <button class="btn btn-success btn-sm" id="saveBtn">
                 💾 Save
             </button>
@@ -542,9 +548,9 @@
                 <div class="section-title">Image Selection</div>
                 <div class="image-nav">
-                    <button class="btn btn-ghost btn-sm" id="prevBtn" disabled>
                         ← Prev
-                    </button>
                     <!-- <div class="nav-counter" id="currentImageDisplay">
                         No image
                     </div> -->
@@ -554,9 +560,9 @@
                             <option value="">Choose an image...</option>
                         </select>
                     </div>
-                    <button class="btn btn-ghost btn-sm" id="nextBtn" disabled>
                         Next →
-                    </button>
                 </div>
                 <!-- Annotation Mode -->
                 <div class="sidebar-section">

             📸 Comic Panel Annotator
         </div>
         <div class="nav-actions">
+            <button class="btn btn-ghost btn-sm" id="prevBtn" disabled>
+                ← Prev
+            </button>
+            <button class="btn btn-ghost btn-sm" id="nextBtn" disabled>
+                Next →
+            </button>
             <button class="btn btn-success btn-sm" id="saveBtn">
                 💾 Save
             </button>
                 <div class="section-title">Image Selection</div>
                 <div class="image-nav">
+                    <!-- <button class="btn btn-ghost btn-sm" id="prevBtn" disabled>
                         ← Prev
+                    </button> -->
                     <!-- <div class="nav-counter" id="currentImageDisplay">
                         No image
                     </div> -->
                             <option value="">Choose an image...</option>
                         </select>
                     </div>
+                    <!-- <button class="btn btn-ghost btn-sm" id="nextBtn" disabled>
                         Next →
+                    </button> -->
                 </div>
                 <!-- Annotation Mode -->
                 <div class="sidebar-section">

comic_panel_extractor/utils.py CHANGED Viewed

@@ -8,6 +8,7 @@ import shutil
 from glob import glob
 from typing import List, Union
 from .config import Config
 def remove_duplicate_boxes(boxes, compare_single=None, iou_threshold=0.7):
 	"""
@@ -527,4 +528,113 @@ def backup_file(source_path: str, backup_path: str) -> str:
 	os.makedirs(os.path.dirname(backup_path), exist_ok=True)
 	shutil.copy(source_path, backup_path)
 	print(f"✅ File backed up to: {backup_path}")
-	return backup_path

 from glob import glob
 from typing import List, Union
 from .config import Config
+from shapely.geometry import Polygon
 def remove_duplicate_boxes(boxes, compare_single=None, iou_threshold=0.7):
 	"""
 	os.makedirs(os.path.dirname(backup_path), exist_ok=True)
 	shutil.copy(source_path, backup_path)
 	print(f"✅ File backed up to: {backup_path}")
+	return backup_path
+def douglas_peucker_simplify(points, epsilon):
+	"""Simplify polygon using Douglas-Peucker algorithm"""
+	polygon = Polygon(points)
+	simplified = polygon.simplify(epsilon, preserve_topology=True)
+	return list(simplified.exterior.coords[:-1])  # Remove duplicate last point
+def filter_close_points(points, min_distance=5.0):
+	"""Remove points that are closer than min_distance to previous point"""
+	if len(points) < 2:
+		return points
+	filtered = [points[0]]
+	for i in range(1, len(points)):
+		current = np.array(points[i])
+		previous = np.array(filtered[-1])
+		distance = np.linalg.norm(current - previous)
+		if distance >= min_distance:
+			filtered.append(points[i])
+	return filtered
+def remove_thin_extensions_morphological(annotation_points, kernel_size=5):
+	"""Remove thin extensions using morphological operations"""
+	# Convert points to image mask
+	points_array = np.array(annotation_points)
+	min_x, min_y = np.min(points_array, axis=0).astype(int)
+	max_x, max_y = np.max(points_array, axis=0).astype(int)
+	# Create binary mask
+	mask = np.zeros((max_y - min_y + 20, max_x - min_x + 20), dtype=np.uint8)
+	# Adjust points to mask coordinates
+	adjusted_points = points_array - [min_x - 10, min_y - 10]
+	adjusted_points = adjusted_points.astype(np.int32)
+	# Fill polygon
+	cv2.fillPoly(mask, [adjusted_points], 255)
+	# Morphological operations to remove thin extensions
+	kernel = np.ones((kernel_size, kernel_size), np.uint8)
+	# Erosion removes thin parts
+	eroded = cv2.erode(mask, kernel, iterations=1)
+	# Dilation restores the main body
+	cleaned = cv2.dilate(eroded, kernel, iterations=1)
+	# Extract contour from cleaned mask
+	contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+	if contours:
+		# Get the largest contour
+		largest_contour = max(contours, key=cv2.contourArea)
+		# Convert back to original coordinate system
+		cleaned_points = largest_contour.reshape(-1, 2) + [min_x - 10, min_y - 10]
+		return cleaned_points.tolist()
+	return annotation_points
+def str_format(points_list):
+	"""Convert points list to segmentation format string"""
+	# Points should be a list of tuples/lists [(x1, y1), (x2, y2), ...]
+	coords = []
+	for point in points_list:
+		coords.extend([point[0], point[1]])
+	# Format as string with 6 decimal places
+	coords_str = ' '.join(f'{coord:.6f}' for coord in coords)
+	print(coords_str)
+	return coords_str
+def array_format(coords_str):
+	"""Convert segmentation format string to points list"""
+	# Parse coords_str to list of floats
+	coords = list(map(float, coords_str.split()))
+	# Convert to list of points [(x1, y1), (x2, y2), ...]
+	points = [(coords[i], coords[i+1]) for i in range(0, len(coords), 2)]
+	print(points)
+	return points
+def normalize_segmentation(annotations, min_distance=8.0, epsilon=5.0, remove_extensions=True):
+	"""Complete normalization pipeline for segmentation points"""
+	processed_annotations = []
+	for annotation in annotations:
+		if annotation["type"] == "segmentation":
+			original_points = [(p["x"], p["y"]) for p in annotation["points"]]
+			# Step 1: Remove thin extensions first (if enabled)
+			normalized_points = remove_thin_extensions_morphological(original_points, kernel_size=7)
+			# Step 2: Filter out points too close together
+			normalized_points = filter_close_points(normalized_points, min_distance)
+			# Step 3: Apply Douglas-Peucker simplification
+			normalized_points = douglas_peucker_simplify(normalized_points, epsilon)
+			# Update annotation with normalized points
+			annotation["points"] = [{"x": p[0], "y": p[1]} for p in normalized_points]
+		processed_annotations.append(annotation)
+	return processed_annotations

comic_panel_extractor/yolo_manager.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 import shutil
 from glob import glob
 from typing import List, Union
 os.environ["TORCH_USE_CUDA_DSA"] = "1"
 os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
@@ -106,7 +107,7 @@ class YOLOManager:
         train_params = {
             'data': data_yaml_path,
             'imgsz': Config.DEFAULT_IMAGE_SIZE,
-            'epochs': Config.YOLO_BASE_MODEL_NAME,
             'batch': 10,
             'name': run_name,
             'device': device,
@@ -115,6 +116,7 @@ class YOLOManager:
             'exist_ok': True,
             'pose': False,
             'resume': resume_flag,
             'amp': False,  # 🚫 Disable AMP to prevent yolo11n.pt download
         }

 import shutil
 from glob import glob
 from typing import List, Union
+from . import utils
 os.environ["TORCH_USE_CUDA_DSA"] = "1"
 os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
         train_params = {
             'data': data_yaml_path,
             'imgsz': Config.DEFAULT_IMAGE_SIZE,
+            'epochs': Config.EPOCH,
             'batch': 10,
             'name': run_name,
             'device': device,
             'exist_ok': True,
             'pose': False,
             'resume': resume_flag,
+            'save_period': 10,
             'amp': False,  # 🚫 Disable AMP to prevent yolo11n.pt download
         }