Spaces:

Shirochi
/

Glossarion

Running

App Files Files Community

Shirochi commited on Oct 6

Commit

f66ccd1

verified ·

1 Parent(s): 19e6730

Upload 7 files

Browse files

Files changed (7) hide show

bubble_detector.py +2031 -0
local_inpainter.py +0 -0
manga_settings_dialog.py +0 -0
manga_translator.py +0 -0
ocr_manager.py +1970 -0
translator_gui.py +0 -0
unified_api_client.py +0 -0

bubble_detector.py ADDED Viewed

	@@ -0,0 +1,2031 @@

+"""
+bubble_detector.py - Modified version that works in frozen PyInstaller executables
+Replace your bubble_detector.py with this version
+"""
+import os
+import sys
+import json
+import numpy as np
+import cv2
+from typing import List, Tuple, Optional, Dict, Any
+import logging
+import traceback
+import hashlib
+from pathlib import Path
+import threading
+import time
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Check if we're running in a frozen environment
+IS_FROZEN = getattr(sys, 'frozen', False)
+if IS_FROZEN:
+    # In frozen environment, set proper paths for ML libraries
+    MEIPASS = sys._MEIPASS
+    os.environ['TORCH_HOME'] = MEIPASS
+    os.environ['TRANSFORMERS_CACHE'] = os.path.join(MEIPASS, 'transformers')
+    os.environ['HF_HOME'] = os.path.join(MEIPASS, 'huggingface')
+    logger.info(f"Running in frozen environment: {MEIPASS}")
+# Modified import checks for frozen environment
+YOLO_AVAILABLE = False
+YOLO = None
+torch = None
+TORCH_AVAILABLE = False
+ONNX_AVAILABLE = False
+TRANSFORMERS_AVAILABLE = False
+RTDetrForObjectDetection = None
+RTDetrImageProcessor = None
+PIL_AVAILABLE = False
+# Try to import YOLO dependencies with better error handling
+if IS_FROZEN:
+    # In frozen environment, try harder to import
+    try:
+        # First try to import torch components individually
+        import torch
+        import torch.nn
+        import torch.cuda
+        TORCH_AVAILABLE = True
+        logger.info("✓ PyTorch loaded in frozen environment")
+    except Exception as e:
+        logger.warning(f"PyTorch not available in frozen environment: {e}")
+        TORCH_AVAILABLE = False
+        torch = None
+    # Try ultralytics after torch
+    if TORCH_AVAILABLE:
+        try:
+            from ultralytics import YOLO
+            YOLO_AVAILABLE = True
+            logger.info("✓ Ultralytics YOLO loaded in frozen environment")
+        except Exception as e:
+            logger.warning(f"Ultralytics not available in frozen environment: {e}")
+            YOLO_AVAILABLE = False
+    # Try transformers
+    try:
+        import transformers
+        # Try specific imports
+        try:
+            from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
+            TRANSFORMERS_AVAILABLE = True
+            logger.info("✓ Transformers RT-DETR loaded in frozen environment")
+        except ImportError:
+            # Try alternative import
+            try:
+                from transformers import AutoModel, AutoImageProcessor
+                RTDetrForObjectDetection = AutoModel
+                RTDetrImageProcessor = AutoImageProcessor
+                TRANSFORMERS_AVAILABLE = True
+                logger.info("✓ Transformers loaded with AutoModel fallback")
+            except:
+                TRANSFORMERS_AVAILABLE = False
+                logger.warning("Transformers RT-DETR not available in frozen environment")
+    except Exception as e:
+        logger.warning(f"Transformers not available in frozen environment: {e}")
+        TRANSFORMERS_AVAILABLE = False
+else:
+    # Normal environment - original import logic
+    try:
+        from ultralytics import YOLO
+        YOLO_AVAILABLE = True
+    except:
+        YOLO_AVAILABLE = False
+        logger.warning("Ultralytics YOLO not available")
+    try:
+        import torch
+        # Test if cuda attribute exists
+        _ = torch.cuda
+        TORCH_AVAILABLE = True
+    except (ImportError, AttributeError):
+        TORCH_AVAILABLE = False
+        torch = None
+        logger.warning("PyTorch not available or incomplete")
+    try:
+        from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
+        try:
+            from transformers import RTDetrV2ForObjectDetection
+            RTDetrForObjectDetection = RTDetrV2ForObjectDetection
+        except ImportError:
+            pass
+        TRANSFORMERS_AVAILABLE = True
+    except:
+        TRANSFORMERS_AVAILABLE = False
+        logger.info("Transformers not available for RT-DETR")
+# Configure ORT memory behavior before importing
+try:
+    os.environ.setdefault('ORT_DISABLE_MEMORY_ARENA', '1')
+except Exception:
+    pass
+# ONNX Runtime - works well in frozen environments
+try:
+    import onnxruntime as ort
+    ONNX_AVAILABLE = True
+    logger.info("✓ ONNX Runtime available")
+except ImportError:
+    ONNX_AVAILABLE = False
+    logger.warning("ONNX Runtime not available")
+# PIL
+try:
+    from PIL import Image
+    PIL_AVAILABLE = True
+except ImportError:
+    PIL_AVAILABLE = False
+    logger.info("PIL not available")
+class BubbleDetector:
+    """
+    Combined YOLOv8 and RT-DETR speech bubble detector for comics and manga.
+    Supports multiple model formats and provides configurable detection.
+    Backward compatible with existing code while adding RT-DETR support.
+    """
+    # Process-wide shared RT-DETR to avoid concurrent meta-device loads
+    _rtdetr_init_lock = threading.Lock()
+    _rtdetr_shared_model = None
+    _rtdetr_shared_processor = None
+    _rtdetr_loaded = False
+    _rtdetr_repo_id = 'ogkalu/comic-text-and-bubble-detector'
+    # Shared RT-DETR (ONNX) across process to avoid device/context storms
+    _rtdetr_onnx_init_lock = threading.Lock()
+    _rtdetr_onnx_shared_session = None
+    _rtdetr_onnx_loaded = False
+    _rtdetr_onnx_providers = None
+    _rtdetr_onnx_model_path = None
+    # Limit concurrent runs to avoid device hangs. Defaults to 2 for better parallelism.
+    # Can be overridden via env DML_MAX_CONCURRENT or config rtdetr_max_concurrency
+    try:
+        _rtdetr_onnx_max_concurrent = int(os.environ.get('DML_MAX_CONCURRENT', '2'))
+    except Exception:
+        _rtdetr_onnx_max_concurrent = 2
+    _rtdetr_onnx_sema = threading.Semaphore(max(1, _rtdetr_onnx_max_concurrent))
+    _rtdetr_onnx_sema_initialized = False
+    def __init__(self, config_path: str = "config.json"):
+        """
+        Initialize the bubble detector.
+        Args:
+            config_path: Path to configuration file
+        """
+        # Set thread limits early if environment indicates single-threaded mode
+        try:
+            if os.environ.get('OMP_NUM_THREADS') == '1':
+                # Already in single-threaded mode, ensure it's applied to this process
+                # Check if torch is available at module level before trying to use it
+                if TORCH_AVAILABLE and torch is not None:
+                    try:
+                        torch.set_num_threads(1)
+                    except (RuntimeError, AttributeError):
+                        pass
+                try:
+                    import cv2
+                    cv2.setNumThreads(1)
+                except (ImportError, AttributeError):
+                    pass
+        except Exception:
+            pass
+        self.config_path = config_path
+        self.config = self._load_config()
+        # YOLOv8 components (original)
+        self.model = None
+        self.model_loaded = False
+        self.model_type = None  # 'yolo', 'onnx', or 'torch'
+        self.onnx_session = None
+        # RT-DETR components (new)
+        self.rtdetr_model = None
+        self.rtdetr_processor = None
+        self.rtdetr_loaded = False
+        self.rtdetr_repo = 'ogkalu/comic-text-and-bubble-detector'
+        # RT-DETR (ONNX) backend components
+        self.rtdetr_onnx_session = None
+        self.rtdetr_onnx_loaded = False
+        self.rtdetr_onnx_repo = 'ogkalu/comic-text-and-bubble-detector'
+        # RT-DETR class definitions
+        self.CLASS_BUBBLE = 0      # Empty speech bubble
+        self.CLASS_TEXT_BUBBLE = 1 # Bubble with text
+        self.CLASS_TEXT_FREE = 2   # Text without bubble
+        # Detection settings
+        self.default_confidence = 0.3
+        self.default_iou_threshold = 0.45
+        # Allow override from settings
+        try:
+            ocr_cfg = self.config.get('manga_settings', {}).get('ocr', {}) if isinstance(self.config, dict) else {}
+            self.default_max_detections = int(ocr_cfg.get('bubble_max_detections', 100))
+            self.max_det_yolo = int(ocr_cfg.get('bubble_max_detections_yolo', self.default_max_detections))
+            self.max_det_rtdetr = int(ocr_cfg.get('bubble_max_detections_rtdetr', self.default_max_detections))
+        except Exception:
+            self.default_max_detections = 100
+            self.max_det_yolo = 100
+            self.max_det_rtdetr = 100
+        # Cache directory for ONNX conversions
+        self.cache_dir = os.environ.get('BUBBLE_CACHE_DIR', 'models')
+        os.makedirs(self.cache_dir, exist_ok=True)
+        # RT-DETR concurrency setting from config
+        try:
+            rtdetr_max_conc = int(ocr_cfg.get('rtdetr_max_concurrency', 2))
+            # Update class-level semaphore if not yet initialized or if value changed
+            if not BubbleDetector._rtdetr_onnx_sema_initialized or rtdetr_max_conc != BubbleDetector._rtdetr_onnx_max_concurrent:
+                BubbleDetector._rtdetr_onnx_max_concurrent = max(1, rtdetr_max_conc)
+                BubbleDetector._rtdetr_onnx_sema = threading.Semaphore(BubbleDetector._rtdetr_onnx_max_concurrent)
+                BubbleDetector._rtdetr_onnx_sema_initialized = True
+                logger.info(f"RT-DETR concurrency set to: {BubbleDetector._rtdetr_onnx_max_concurrent}")
+        except Exception as e:
+            logger.warning(f"Failed to set RT-DETR concurrency: {e}")
+        # GPU availability
+        self.use_gpu = TORCH_AVAILABLE and torch.cuda.is_available()
+        self.device = 'cuda' if self.use_gpu else 'cpu'
+        # Quantization/precision settings
+        adv_cfg = self.config.get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {}
+        ocr_cfg = self.config.get('manga_settings', {}).get('ocr', {}) if isinstance(self.config, dict) else {}
+        env_quant = os.environ.get('MODEL_QUANTIZE', 'false').lower() == 'true'
+        self.quantize_enabled = bool(env_quant or adv_cfg.get('quantize_models', False) or ocr_cfg.get('quantize_bubble_detector', False))
+        self.quantize_dtype = str(adv_cfg.get('torch_precision', os.environ.get('TORCH_PRECISION', 'auto'))).lower()
+        # Prefer advanced.onnx_quantize; fall back to env or global quantize
+        self.onnx_quantize_enabled = bool(adv_cfg.get('onnx_quantize', os.environ.get('ONNX_QUANTIZE', 'false').lower() == 'true' or self.quantize_enabled))
+        # Stop flag support
+        self.stop_flag = None
+        self._stopped = False
+        self.log_callback = None
+        logger.info(f"🗨️ BubbleDetector initialized")
+        logger.info(f"   GPU: {'Available' if self.use_gpu else 'Not available'}")
+        logger.info(f"   YOLO: {'Available' if YOLO_AVAILABLE else 'Not installed'}")
+        logger.info(f"   ONNX: {'Available' if ONNX_AVAILABLE else 'Not installed'}")
+        logger.info(f"   RT-DETR: {'Available' if TRANSFORMERS_AVAILABLE else 'Not installed'}")
+        logger.info(f"   Quantization: {'ENABLED' if self.quantize_enabled else 'disabled'} (torch_precision={self.quantize_dtype}, onnx_quantize={'on' if self.onnx_quantize_enabled else 'off'})" )
+    def _load_config(self) -> Dict[str, Any]:
+        """Load configuration from file."""
+        if os.path.exists(self.config_path):
+            try:
+                with open(self.config_path, 'r', encoding='utf-8') as f:
+                    return json.load(f)
+            except Exception as e:
+                logger.warning(f"Failed to load config: {e}")
+        return {}
+    def _save_config(self):
+        """Save configuration to file."""
+        try:
+            with open(self.config_path, 'w', encoding='utf-8') as f:
+                json.dump(self.config, f, indent=2)
+        except Exception as e:
+            logger.error(f"Failed to save config: {e}")
+    def set_stop_flag(self, stop_flag):
+        """Set the stop flag for checking interruptions"""
+        self.stop_flag = stop_flag
+        self._stopped = False
+    def set_log_callback(self, log_callback):
+        """Set log callback for GUI integration"""
+        self.log_callback = log_callback
+    def _check_stop(self) -> bool:
+        """Check if stop has been requested"""
+        if self._stopped:
+            return True
+        if self.stop_flag and self.stop_flag.is_set():
+            self._stopped = True
+            return True
+        # Check global manga translator cancellation
+        try:
+            from manga_translator import MangaTranslator
+            if MangaTranslator.is_globally_cancelled():
+                self._stopped = True
+                return True
+        except Exception:
+            pass
+        return False
+    def _log(self, message: str, level: str = "info"):
+        """Log message with stop suppression"""
+        # Suppress logs when stopped (allow only essential stop confirmation messages)
+        if self._check_stop():
+            essential_stop_keywords = [
+                "⏹️ Translation stopped by user",
+                "⏹️ Bubble detection stopped",
+                "cleanup", "🧹"
+            ]
+            if not any(keyword in message for keyword in essential_stop_keywords):
+                return
+        if self.log_callback:
+            self.log_callback(message, level)
+        else:
+            logger.info(message) if level == 'info' else getattr(logger, level, logger.info)(message)
+    def reset_stop_flags(self):
+        """Reset stop flags when starting new processing"""
+        self._stopped = False
+    def load_model(self, model_path: str, force_reload: bool = False) -> bool:
+        """
+        Load a YOLOv8 model for bubble detection.
+        Args:
+            model_path: Path to model file (.pt, .onnx, or .torchscript)
+            force_reload: Force reload even if model is already loaded
+        Returns:
+            True if model loaded successfully, False otherwise
+        """
+        try:
+            # If given a Hugging Face repo ID (e.g., 'owner/name'), fetch detector.onnx into models/
+            if model_path and (('/' in model_path) and not os.path.exists(model_path)):
+                try:
+                    from huggingface_hub import hf_hub_download
+                    os.makedirs(self.cache_dir, exist_ok=True)
+                    logger.info(f"📥 Resolving repo '{model_path}' to detector.onnx in {self.cache_dir}...")
+                    resolved = hf_hub_download(repo_id=model_path, filename='detector.onnx', cache_dir=self.cache_dir, local_dir=self.cache_dir, local_dir_use_symlinks=False)
+                    if resolved and os.path.exists(resolved):
+                        model_path = resolved
+                        logger.info(f"✅ Downloaded detector.onnx to: {model_path}")
+                except Exception as repo_err:
+                    logger.error(f"Failed to download from repo '{model_path}': {repo_err}")
+            if not os.path.exists(model_path):
+                logger.error(f"Model file not found: {model_path}")
+                return False
+            # Check if it's the same model already loaded
+            if self.model_loaded and not force_reload:
+                last_path = self.config.get('last_model_path', '')
+                if last_path == model_path:
+                    logger.info("Model already loaded (same path)")
+                    return True
+                else:
+                    logger.info(f"Model path changed from {last_path} to {model_path}, reloading...")
+                    force_reload = True
+            # Clear previous model if force reload
+            if force_reload:
+                logger.info("Force reloading model...")
+                self.model = None
+                self.onnx_session = None
+                self.model_loaded = False
+                self.model_type = None
+            logger.info(f"📥 Loading bubble detection model: {model_path}")
+            # Determine model type by extension
+            ext = Path(model_path).suffix.lower()
+            if ext in ['.pt', '.pth']:
+                if not YOLO_AVAILABLE:
+                    logger.warning("Ultralytics package not available in this build")
+                    logger.info("Bubble detection will be disabled - this is normal for lightweight builds")
+                    # Don't return False immediately, try other fallbacks
+                    self.model_loaded = False
+                    return False
+                # Load YOLOv8 model
+                try:
+                    self.model = YOLO(model_path)
+                    self.model_type = 'yolo'
+                    # Set to eval mode
+                    if hasattr(self.model, 'model'):
+                        self.model.model.eval()
+                    # Move to GPU if available
+                    if self.use_gpu and TORCH_AVAILABLE:
+                        try:
+                            self.model.to('cuda')
+                        except Exception as gpu_error:
+                            logger.warning(f"Could not move model to GPU: {gpu_error}")
+                    logger.info("✅ YOLOv8 model loaded successfully")
+                    # Apply optional FP16 precision to reduce VRAM if enabled
+                    if self.quantize_enabled and self.use_gpu and TORCH_AVAILABLE:
+                        try:
+                            m = self.model.model if hasattr(self.model, 'model') else self.model
+                            m.half()
+                            logger.info("🔻 Applied FP16 precision to YOLO model (GPU)")
+                        except Exception as _e:
+                            logger.warning(f"Could not switch YOLO model to FP16: {_e}")
+                except Exception as yolo_error:
+                    logger.error(f"Failed to load YOLO model: {yolo_error}")
+                    return False
+            elif ext == '.onnx':
+                if not ONNX_AVAILABLE:
+                    logger.warning("ONNX Runtime not available in this build")
+                    logger.info("ONNX model support disabled - this is normal for lightweight builds")
+                    return False
+                try:
+                    # Load ONNX model
+                    providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if self.use_gpu else ['CPUExecutionProvider']
+                    session_path = model_path
+                    if self.quantize_enabled:
+                        try:
+                            from onnxruntime.quantization import quantize_dynamic, QuantType
+                            quant_path = os.path.splitext(model_path)[0] + ".int8.onnx"
+                            if not os.path.exists(quant_path) or os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true':
+                                logger.info("🔻 Quantizing ONNX model weights to INT8 (dynamic)...")
+                                quantize_dynamic(model_input=model_path, model_output=quant_path, weight_type=QuantType.QInt8, op_types_to_quantize=['Conv', 'MatMul'])
+                            session_path = quant_path
+                            self.config['last_onnx_quantized_path'] = quant_path
+                            self._save_config()
+                            logger.info(f"✅ Using quantized ONNX model: {quant_path}")
+                        except Exception as qe:
+                            logger.warning(f"ONNX quantization not applied: {qe}")
+                    # Use conservative ORT memory options to reduce RAM growth
+                    so = ort.SessionOptions()
+                    try:
+                        so.enable_mem_pattern = False
+                        so.enable_cpu_mem_arena = False
+                    except Exception:
+                        pass
+                    self.onnx_session = ort.InferenceSession(session_path, sess_options=so, providers=providers)
+                    self.model_type = 'onnx'
+                    logger.info("✅ ONNX model loaded successfully")
+                except Exception as onnx_error:
+                    logger.error(f"Failed to load ONNX model: {onnx_error}")
+                    return False
+            elif ext == '.torchscript':
+                if not TORCH_AVAILABLE:
+                    logger.warning("PyTorch not available in this build")
+                    logger.info("TorchScript model support disabled - this is normal for lightweight builds")
+                    return False
+                try:
+                    # Add safety check for torch being None
+                    if torch is None:
+                        logger.error("PyTorch module is None - cannot load TorchScript model")
+                        return False
+                    # Load TorchScript model
+                    self.model = torch.jit.load(model_path, map_location='cpu')
+                    self.model.eval()
+                    self.model_type = 'torch'
+                    if self.use_gpu:
+                        try:
+                            self.model = self.model.cuda()
+                        except Exception as gpu_error:
+                            logger.warning(f"Could not move TorchScript model to GPU: {gpu_error}")
+                    logger.info("✅ TorchScript model loaded successfully")
+                    # Optional FP16 precision on GPU
+                    if self.quantize_enabled and self.use_gpu and TORCH_AVAILABLE:
+                        try:
+                            self.model = self.model.half()
+                            logger.info("🔻 Applied FP16 precision to TorchScript model (GPU)")
+                        except Exception as _e:
+                            logger.warning(f"Could not switch TorchScript model to FP16: {_e}")
+                except Exception as torch_error:
+                    logger.error(f"Failed to load TorchScript model: {torch_error}")
+                    return False
+            else:
+                logger.error(f"Unsupported model format: {ext}")
+                logger.info("Supported formats: .pt/.pth (YOLOv8), .onnx (ONNX), .torchscript (TorchScript)")
+                return False
+            # Only set loaded if we actually succeeded
+            self.model_loaded = True
+            self.config['last_model_path'] = model_path
+            self.config['model_type'] = self.model_type
+            self._save_config()
+            return True
+        except Exception as e:
+            logger.error(f"Failed to load model: {e}")
+            logger.error(traceback.format_exc())
+            self.model_loaded = False
+            # Provide helpful context for .exe users
+            logger.info("Note: If running from .exe, some ML libraries may not be included")
+            logger.info("This is normal for lightweight builds - bubble detection will be disabled")
+            return False
+    def load_rtdetr_model(self, model_path: str = None, model_id: str = None, force_reload: bool = False) -> bool:
+        """
+        Load RT-DETR model for advanced bubble and text detection.
+        This implementation avoids the 'meta tensor' copy error by:
+        - Serializing the entire load under a class lock (no concurrent loads)
+        - Loading directly onto the target device (CUDA if available) via device_map='auto'
+        - Avoiding .to() on a potentially-meta model; no device migration post-load
+        Args:
+            model_path: Optional path to local model
+            model_id: Optional HuggingFace model ID (default: 'ogkalu/comic-text-and-bubble-detector')
+            force_reload: Force reload even if already loaded
+        Returns:
+            True if successful, False otherwise
+        """
+        if not TRANSFORMERS_AVAILABLE:
+            logger.error("Transformers library required for RT-DETR. Install with: pip install transformers")
+            return False
+        if not PIL_AVAILABLE:
+            logger.error("PIL required for RT-DETR. Install with: pip install pillow")
+            return False
+        if self.rtdetr_loaded and not force_reload:
+            logger.info("RT-DETR model already loaded")
+            return True
+        # Fast path: if shared already loaded and not forcing reload, attach
+        if BubbleDetector._rtdetr_loaded and not force_reload:
+            self.rtdetr_model = BubbleDetector._rtdetr_shared_model
+            self.rtdetr_processor = BubbleDetector._rtdetr_shared_processor
+            self.rtdetr_loaded = True
+            logger.info("RT-DETR model attached from shared cache")
+            return True
+        # Serialize the ENTIRE loading sequence to avoid concurrent init issues
+        with BubbleDetector._rtdetr_init_lock:
+            try:
+                # Re-check after acquiring lock
+                if BubbleDetector._rtdetr_loaded and not force_reload:
+                    self.rtdetr_model = BubbleDetector._rtdetr_shared_model
+                    self.rtdetr_processor = BubbleDetector._rtdetr_shared_processor
+                    self.rtdetr_loaded = True
+                    logger.info("RT-DETR model attached from shared cache (post-lock)")
+                    return True
+                # Use custom model_id if provided, otherwise use default
+                repo_id = model_id if model_id else self.rtdetr_repo
+                logger.info(f"📥 Loading RT-DETR model from {repo_id}...")
+                # Ensure TorchDynamo/compile doesn't interfere on some builds
+                try:
+                    os.environ.setdefault('TORCHDYNAMO_DISABLE', '1')
+                except Exception:
+                    pass
+                # Decide device strategy
+                gpu_available = bool(TORCH_AVAILABLE and hasattr(torch, 'cuda') and torch.cuda.is_available())
+                device_map = 'auto' if gpu_available else None
+                # Choose dtype
+                dtype = None
+                if TORCH_AVAILABLE:
+                    try:
+                        dtype = torch.float16 if gpu_available else torch.float32
+                    except Exception:
+                        dtype = None
+                low_cpu = True if gpu_available else False
+                # Load processor (once)
+                self.rtdetr_processor = RTDetrImageProcessor.from_pretrained(
+                    repo_id,
+                    size={"width": 640, "height": 640},
+                    cache_dir=self.cache_dir if not model_path else None
+                )
+                # Prepare kwargs for from_pretrained
+                from_kwargs = {
+                    'cache_dir': self.cache_dir if not model_path else None,
+                    'low_cpu_mem_usage': low_cpu,
+                    'device_map': device_map,
+                }
+                # Note: dtype is handled via torch_dtype parameter in newer transformers
+                if dtype is not None:
+                    from_kwargs['torch_dtype'] = dtype
+                # First attempt: load directly to target (CUDA if available)
+                try:
+                    self.rtdetr_model = RTDetrForObjectDetection.from_pretrained(
+                        model_path if model_path else repo_id,
+                        **from_kwargs,
+                    )
+                except Exception as primary_err:
+                    # Fallback to a simple CPU load (no device move) if CUDA path fails
+                    logger.warning(f"RT-DETR primary load failed ({primary_err}); retrying on CPU...")
+                    from_kwargs_fallback = {
+                        'cache_dir': self.cache_dir if not model_path else None,
+                        'low_cpu_mem_usage': False,
+                        'device_map': None,
+                    }
+                    if TORCH_AVAILABLE:
+                        from_kwargs_fallback['torch_dtype'] = torch.float32
+                    self.rtdetr_model = RTDetrForObjectDetection.from_pretrained(
+                        model_path if model_path else repo_id,
+                        **from_kwargs_fallback,
+                    )
+                # Optional dynamic quantization for linear layers (CPU only)
+                if self.quantize_enabled and TORCH_AVAILABLE and (not gpu_available):
+                    try:
+                        try:
+                            import torch.ao.quantization as tq
+                            quantize_dynamic = tq.quantize_dynamic  # type: ignore
+                        except Exception:
+                            import torch.quantization as tq  # type: ignore
+                            quantize_dynamic = tq.quantize_dynamic  # type: ignore
+                        self.rtdetr_model = quantize_dynamic(self.rtdetr_model, {torch.nn.Linear}, dtype=torch.qint8)
+                        logger.info("🔻 Applied dynamic INT8 quantization to RT-DETR linear layers (CPU)")
+                    except Exception as qe:
+                        logger.warning(f"RT-DETR dynamic quantization skipped: {qe}")
+                # Finalize
+                self.rtdetr_model.eval()
+                # Sanity check: ensure no parameter is left on 'meta' device
+                try:
+                    for n, p in self.rtdetr_model.named_parameters():
+                        dev = getattr(p, 'device', None)
+                        if dev is not None and getattr(dev, 'type', '') == 'meta':
+                            raise RuntimeError(f"Parameter {n} is on 'meta' device after load")
+                except Exception as e:
+                    logger.error(f"RT-DETR load sanity check failed: {e}")
+                    self.rtdetr_loaded = False
+                    return False
+                # Publish shared cache
+                BubbleDetector._rtdetr_shared_model = self.rtdetr_model
+                BubbleDetector._rtdetr_shared_processor = self.rtdetr_processor
+                BubbleDetector._rtdetr_loaded = True
+                BubbleDetector._rtdetr_repo_id = repo_id
+                self.rtdetr_loaded = True
+                # Save the model ID that was used
+                self.config['rtdetr_loaded'] = True
+                self.config['rtdetr_model_id'] = repo_id
+                self._save_config()
+                loc = 'CUDA' if gpu_available else 'CPU'
+                logger.info(f"✅ RT-DETR model loaded successfully ({loc})")
+                logger.info("   Classes: Empty bubbles, Text bubbles, Free text")
+                # Auto-convert to ONNX for RT-DETR only if explicitly enabled
+                if os.environ.get('AUTO_CONVERT_RTDETR_ONNX', 'false').lower() == 'true':
+                    onnx_path = os.path.join(self.cache_dir, 'rtdetr_comic.onnx')
+                    if self.convert_to_onnx('rtdetr', onnx_path):
+                        logger.info("🚀 RT-DETR converted to ONNX for faster inference")
+                        # Store ONNX path for later use
+                        self.config['rtdetr_onnx_path'] = onnx_path
+                        self._save_config()
+                        # Optionally quantize ONNX for reduced RAM
+                        if self.onnx_quantize_enabled:
+                            try:
+                                from onnxruntime.quantization import quantize_dynamic, QuantType
+                                quant_path = os.path.splitext(onnx_path)[0] + ".int8.onnx"
+                                if not os.path.exists(quant_path) or os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true':
+                                    logger.info("🔻 Quantizing RT-DETR ONNX to INT8 (dynamic)...")
+                                    quantize_dynamic(model_input=onnx_path, model_output=quant_path, weight_type=QuantType.QInt8, op_types_to_quantize=['Conv', 'MatMul'])
+                                self.config['rtdetr_onnx_quantized_path'] = quant_path
+                                self._save_config()
+                                logger.info(f"✅ Quantized RT-DETR ONNX saved to: {quant_path}")
+                            except Exception as qe:
+                                logger.warning(f"ONNX quantization for RT-DETR skipped: {qe}")
+                    else:
+                        logger.info("ℹ️ Skipping RT-DETR ONNX export (converter not supported in current environment)")
+                return True
+            except Exception as e:
+                logger.error(f"❌ Failed to load RT-DETR: {e}")
+                self.rtdetr_loaded = False
+                return False
+    def check_rtdetr_available(self, model_id: str = None) -> bool:
+        """
+        Check if RT-DETR model is available (cached).
+        Args:
+            model_id: Optional HuggingFace model ID
+        Returns:
+            True if model is cached and available
+        """
+        try:
+            from pathlib import Path
+            # Use provided model_id or default
+            repo_id = model_id if model_id else self.rtdetr_repo
+            # Check HuggingFace cache
+            cache_dir = Path.home() / ".cache" / "huggingface" / "hub"
+            model_id_formatted = repo_id.replace("/", "--")
+            # Look for model folder
+            model_folders = list(cache_dir.glob(f"models--{model_id_formatted}*"))
+            if model_folders:
+                for folder in model_folders:
+                    if (folder / "snapshots").exists():
+                        snapshots = list((folder / "snapshots").iterdir())
+                        if snapshots:
+                            return True
+            return False
+        except Exception:
+            return False
+    def detect_bubbles(self,
+                      image_path: str,
+                      confidence: float = None,
+                      iou_threshold: float = None,
+                      max_detections: int = None,
+                      use_rtdetr: bool = None) -> List[Tuple[int, int, int, int]]:
+        """
+        Detect speech bubbles in an image (backward compatible method).
+        Args:
+            image_path: Path to image file
+            confidence: Minimum confidence threshold (0-1)
+            iou_threshold: IOU threshold for NMS (0-1)
+            max_detections: Maximum number of detections to return
+            use_rtdetr: If True, use RT-DETR instead of YOLOv8 (if available)
+        Returns:
+            List of bubble bounding boxes as (x, y, width, height) tuples
+        """
+        # Check for stop at start
+        if self._check_stop():
+            self._log("⏹️ Bubble detection stopped by user", "warning")
+            return []
+        # Decide which model to use
+        if use_rtdetr is None:
+            # Auto-select: prefer RT-DETR if available
+            use_rtdetr = self.rtdetr_loaded
+        if use_rtdetr:
+            # Prefer ONNX backend if available, else PyTorch
+            if getattr(self, 'rtdetr_onnx_loaded', False):
+                results = self.detect_with_rtdetr_onnx(
+                    image_path=image_path,
+                    confidence=confidence,
+                    return_all_bubbles=True
+                )
+                return results
+            if self.rtdetr_loaded:
+                results = self.detect_with_rtdetr(
+                    image_path=image_path,
+                    confidence=confidence,
+                    return_all_bubbles=True
+                )
+                return results
+        # Original YOLOv8 detection
+        if not self.model_loaded:
+            logger.error("No model loaded. Call load_model() first.")
+            return []
+        # Use defaults if not specified
+        confidence = confidence or self.default_confidence
+        iou_threshold = iou_threshold or self.default_iou_threshold
+        max_detections = max_detections or self.default_max_detections
+        try:
+            # Load image
+            image = cv2.imread(image_path)
+            if image is None:
+                logger.error(f"Failed to load image: {image_path}")
+                return []
+            h, w = image.shape[:2]
+            self._log(f"🔍 Detecting bubbles in {w}x{h} image")
+            # Check for stop before inference
+            if self._check_stop():
+                self._log("⏹️ Bubble detection inference stopped by user", "warning")
+                return []
+            if self.model_type == 'yolo':
+                # YOLOv8 inference
+                results = self.model(
+                    image_path,
+                    conf=confidence,
+                    iou=iou_threshold,
+                    max_det=min(max_detections, getattr(self, 'max_det_yolo', max_detections)),
+                    verbose=False
+                )
+                bubbles = []
+                for r in results:
+                    if r.boxes is not None:
+                        for box in r.boxes:
+                            # Get box coordinates
+                            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
+                            x, y = int(x1), int(y1)
+                            width = int(x2 - x1)
+                            height = int(y2 - y1)
+                            # Get confidence
+                            conf = float(box.conf[0])
+                            # Add to list
+                            if len(bubbles) < max_detections:
+                                bubbles.append((x, y, width, height))
+                            logger.debug(f"   Bubble: ({x},{y}) {width}x{height} conf={conf:.2f}")
+            elif self.model_type == 'onnx':
+                # ONNX inference
+                bubbles = self._detect_with_onnx(image, confidence, iou_threshold, max_detections)
+            elif self.model_type == 'torch':
+                # TorchScript inference
+                bubbles = self._detect_with_torchscript(image, confidence, iou_threshold, max_detections)
+            else:
+                logger.error(f"Unknown model type: {self.model_type}")
+                return []
+            logger.info(f"✅ Detected {len(bubbles)} speech bubbles")
+            time.sleep(0.1)  # Brief pause for stability
+            logger.debug("💤 Bubble detection pausing briefly for stability")
+            return bubbles
+        except Exception as e:
+            logger.error(f"Detection failed: {e}")
+            logger.error(traceback.format_exc())
+            return []
+    def detect_with_rtdetr(self,
+                          image_path: str = None,
+                          image: np.ndarray = None,
+                          confidence: float = None,
+                          return_all_bubbles: bool = False) -> Any:
+        """
+        Detect using RT-DETR model with 3-class detection (PyTorch backend).
+        Args:
+            image_path: Path to image file
+            image: Image array (BGR format)
+            confidence: Confidence threshold
+            return_all_bubbles: If True, return list of bubble boxes (for compatibility)
+                               If False, return dict with all classes
+        Returns:
+            List of bubbles if return_all_bubbles=True, else dict with classes
+        """
+        # Check for stop at start
+        if self._check_stop():
+            self._log("⏹️ RT-DETR detection stopped by user", "warning")
+            if return_all_bubbles:
+                return []
+            return {'bubbles': [], 'text_bubbles': [], 'text_free': []}
+        if not self.rtdetr_loaded:
+            self._log("RT-DETR not loaded. Call load_rtdetr_model() first.", "warning")
+            if return_all_bubbles:
+                return []
+            return {'bubbles': [], 'text_bubbles': [], 'text_free': []}
+        confidence = confidence or self.default_confidence
+        try:
+            # Load image
+            if image_path:
+                image = cv2.imread(image_path)
+            elif image is None:
+                logger.error("No image provided")
+                if return_all_bubbles:
+                    return []
+                return {'bubbles': [], 'text_bubbles': [], 'text_free': []}
+            # Convert BGR to RGB for PIL
+            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            pil_image = Image.fromarray(image_rgb)
+            # Prepare image for model
+            inputs = self.rtdetr_processor(images=pil_image, return_tensors="pt")
+            # Move inputs to the same device as the model and match model dtype for floating tensors
+            model_device = next(self.rtdetr_model.parameters()).device if self.rtdetr_model is not None else (torch.device('cpu') if TORCH_AVAILABLE else 'cpu')
+            model_dtype = None
+            if TORCH_AVAILABLE and self.rtdetr_model is not None:
+                try:
+                    model_dtype = next(self.rtdetr_model.parameters()).dtype
+                except Exception:
+                    model_dtype = None
+            if TORCH_AVAILABLE:
+                new_inputs = {}
+                for k, v in inputs.items():
+                    if isinstance(v, torch.Tensor):
+                        v = v.to(model_device)
+                        if model_dtype is not None and torch.is_floating_point(v):
+                            v = v.to(model_dtype)
+                    new_inputs[k] = v
+                inputs = new_inputs
+            # Run inference with autocast when model is half/bfloat16 on CUDA
+            use_amp = TORCH_AVAILABLE and hasattr(model_device, 'type') and model_device.type == 'cuda' and (model_dtype in (torch.float16, torch.bfloat16))
+            autocast_dtype = model_dtype if model_dtype in (torch.float16, torch.bfloat16) else None
+            with torch.no_grad():
+                if use_amp and autocast_dtype is not None:
+                    with torch.autocast('cuda', dtype=autocast_dtype):
+                        outputs = self.rtdetr_model(**inputs)
+                else:
+                    outputs = self.rtdetr_model(**inputs)
+                # Brief pause for stability after inference
+                time.sleep(0.1)
+                logger.debug("💤 RT-DETR inference pausing briefly for stability")
+            # Post-process results
+            target_sizes = torch.tensor([pil_image.size[::-1]]) if TORCH_AVAILABLE else None
+            if TORCH_AVAILABLE and hasattr(model_device, 'type') and model_device.type == "cuda":
+                target_sizes = target_sizes.to(model_device)
+            results = self.rtdetr_processor.post_process_object_detection(
+                outputs,
+                target_sizes=target_sizes,
+                threshold=confidence
+            )[0]
+            # Apply per-detector cap if configured
+            cap = getattr(self, 'max_det_rtdetr', self.default_max_detections)
+            if cap and len(results['boxes']) > cap:
+                # Keep top-scoring first
+                scores = results['scores']
+                top_idx = scores.topk(k=cap).indices if hasattr(scores, 'topk') else range(cap)
+                results = {
+                    'boxes': [results['boxes'][i] for i in top_idx],
+                    'scores': [results['scores'][i] for i in top_idx],
+                    'labels': [results['labels'][i] for i in top_idx]
+                }
+            logger.info(f"📊 RT-DETR found {len(results['boxes'])} detections above {confidence:.2f} confidence")
+            # Apply NMS to remove duplicate detections
+            # Group detections by class
+            class_detections = {self.CLASS_BUBBLE: [], self.CLASS_TEXT_BUBBLE: [], self.CLASS_TEXT_FREE: []}
+            for box, score, label in zip(results['boxes'], results['scores'], results['labels']):
+                x1, y1, x2, y2 = map(float, box.tolist())
+                label_id = label.item()
+                if label_id in class_detections:
+                    class_detections[label_id].append((x1, y1, x2, y2, float(score.item())))
+            # Apply NMS per class to remove duplicates
+            def compute_iou(box1, box2):
+                """Compute IoU between two boxes (x1, y1, x2, y2)"""
+                x1_1, y1_1, x2_1, y2_1 = box1[:4]
+                x1_2, y1_2, x2_2, y2_2 = box2[:4]
+                # Intersection
+                x_left = max(x1_1, x1_2)
+                y_top = max(y1_1, y1_2)
+                x_right = min(x2_1, x2_2)
+                y_bottom = min(y2_1, y2_2)
+                if x_right < x_left or y_bottom < y_top:
+                    return 0.0
+                intersection = (x_right - x_left) * (y_bottom - y_top)
+                # Union
+                area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
+                area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
+                union = area1 + area2 - intersection
+                return intersection / union if union > 0 else 0.0
+            def apply_nms(boxes_with_scores, iou_threshold=0.45):
+                """Apply Non-Maximum Suppression"""
+                if not boxes_with_scores:
+                    return []
+                # Sort by score (descending)
+                sorted_boxes = sorted(boxes_with_scores, key=lambda x: x[4], reverse=True)
+                keep = []
+                while sorted_boxes:
+                    # Keep the box with highest score
+                    current = sorted_boxes.pop(0)
+                    keep.append(current)
+                    # Remove boxes with high IoU
+                    sorted_boxes = [box for box in sorted_boxes if compute_iou(current, box) < iou_threshold]
+                return keep
+            # Apply NMS and organize by class
+            detections = {
+                'bubbles': [],       # Empty speech bubbles
+                'text_bubbles': [],  # Bubbles with text
+                'text_free': []      # Text without bubbles
+            }
+            for class_id, boxes_list in class_detections.items():
+                nms_boxes = apply_nms(boxes_list, iou_threshold=self.default_iou_threshold)
+                for x1, y1, x2, y2, scr in nms_boxes:
+                    width = int(x2 - x1)
+                    height = int(y2 - y1)
+                    # Store as (x, y, width, height) to match YOLOv8 format
+                    bbox = (int(x1), int(y1), width, height)
+                    if class_id == self.CLASS_BUBBLE:
+                        detections['bubbles'].append(bbox)
+                    elif class_id == self.CLASS_TEXT_BUBBLE:
+                        detections['text_bubbles'].append(bbox)
+                    elif class_id == self.CLASS_TEXT_FREE:
+                        detections['text_free'].append(bbox)
+                    # Stop early if we hit the configured cap across all classes
+                    total_count = len(detections['bubbles']) + len(detections['text_bubbles']) + len(detections['text_free'])
+                    if total_count >= (self.config.get('manga_settings', {}).get('ocr', {}).get('bubble_max_detections', self.default_max_detections) if isinstance(self.config, dict) else self.default_max_detections):
+                        break
+            # Log results
+            total = len(detections['bubbles']) + len(detections['text_bubbles']) + len(detections['text_free'])
+            logger.info(f"✅ RT-DETR detected {total} objects:")
+            logger.info(f"   - Empty bubbles: {len(detections['bubbles'])}")
+            logger.info(f"   - Text bubbles: {len(detections['text_bubbles'])}")
+            logger.info(f"   - Free text: {len(detections['text_free'])}")
+            # Return format based on compatibility mode
+            if return_all_bubbles:
+                # Return all bubbles (empty + with text) for backward compatibility
+                all_bubbles = detections['bubbles'] + detections['text_bubbles']
+                return all_bubbles
+            else:
+                return detections
+        except Exception as e:
+            logger.error(f"RT-DETR detection failed: {e}")
+            logger.error(traceback.format_exc())
+            if return_all_bubbles:
+                return []
+            return {'bubbles': [], 'text_bubbles': [], 'text_free': []}
+    def detect_all_text_regions(self, image_path: str = None, image: np.ndarray = None) -> List[Tuple[int, int, int, int]]:
+        """
+        Detect all text regions using RT-DETR (both in bubbles and free text).
+        Returns:
+            List of bounding boxes for all text regions
+        """
+        if not self.rtdetr_loaded:
+            logger.warning("RT-DETR required for text detection")
+            return []
+        detections = self.detect_with_rtdetr(image_path=image_path, image=image, return_all_bubbles=False)
+        # Combine text bubbles and free text
+        all_text = detections['text_bubbles'] + detections['text_free']
+        logger.info(f"📝 Found {len(all_text)} text regions total")
+        return all_text
+    def _detect_with_onnx(self, image: np.ndarray, confidence: float,
+                         iou_threshold: float, max_detections: int) -> List[Tuple[int, int, int, int]]:
+        """Run detection using ONNX model."""
+        # Preprocess image
+        img_size = 640  # Standard YOLOv8 input size
+        img_resized = cv2.resize(image, (img_size, img_size))
+        img_norm = img_resized.astype(np.float32) / 255.0
+        img_transposed = np.transpose(img_norm, (2, 0, 1))
+        img_batch = np.expand_dims(img_transposed, axis=0)
+        # Run inference
+        input_name = self.onnx_session.get_inputs()[0].name
+        outputs = self.onnx_session.run(None, {input_name: img_batch})
+        # Process outputs (YOLOv8 format)
+        predictions = outputs[0][0]  # Remove batch dimension
+        # Filter by confidence and apply NMS
+        bubbles = []
+        boxes = []
+        scores = []
+        for pred in predictions.T:  # Transpose to get predictions per detection
+            if len(pred) >= 5:
+                x_center, y_center, width, height, obj_conf = pred[:5]
+                if obj_conf >= confidence:
+                    # Convert to corner coordinates
+                    x1 = x_center - width / 2
+                    y1 = y_center - height / 2
+                    # Scale to original image size
+                    h, w = image.shape[:2]
+                    x1 = int(x1 * w / img_size)
+                    y1 = int(y1 * h / img_size)
+                    width = int(width * w / img_size)
+                    height = int(height * h / img_size)
+                    boxes.append([x1, y1, x1 + width, y1 + height])
+                    scores.append(float(obj_conf))
+        # Apply NMS
+        if boxes:
+            indices = cv2.dnn.NMSBoxes(boxes, scores, confidence, iou_threshold)
+            if len(indices) > 0:
+                indices = indices.flatten()[:max_detections]
+                for i in indices:
+                    x1, y1, x2, y2 = boxes[i]
+                    bubbles.append((x1, y1, x2 - x1, y2 - y1))
+        return bubbles
+    def _detect_with_torchscript(self, image: np.ndarray, confidence: float,
+                                 iou_threshold: float, max_detections: int) -> List[Tuple[int, int, int, int]]:
+        """Run detection using TorchScript model."""
+        # Similar to ONNX but using PyTorch tensors
+        img_size = 640
+        img_resized = cv2.resize(image, (img_size, img_size))
+        img_norm = img_resized.astype(np.float32) / 255.0
+        img_tensor = torch.from_numpy(img_norm).permute(2, 0, 1).unsqueeze(0)
+        if self.use_gpu:
+            img_tensor = img_tensor.cuda()
+        with torch.no_grad():
+            outputs = self.model(img_tensor)
+        # Process outputs similar to ONNX
+        # Implementation depends on exact model output format
+        # This is a placeholder - adjust based on your model
+        return []
+    def visualize_detections(self, image_path: str, bubbles: List[Tuple[int, int, int, int]] = None,
+                            output_path: str = None, use_rtdetr: bool = False) -> np.ndarray:
+        """
+        Visualize detected bubbles on the image.
+        Args:
+            image_path: Path to original image
+            bubbles: List of bubble bounding boxes (if None, will detect)
+            output_path: Optional path to save visualization
+            use_rtdetr: Use RT-DETR for visualization with class colors
+        Returns:
+            Image with drawn bounding boxes
+        """
+        image = cv2.imread(image_path)
+        if image is None:
+            logger.error(f"Failed to load image: {image_path}")
+            return None
+        vis_image = image.copy()
+        if use_rtdetr and self.rtdetr_loaded:
+            # RT-DETR visualization with different colors per class
+            detections = self.detect_with_rtdetr(image_path=image_path, return_all_bubbles=False)
+            # Colors for each class
+            colors = {
+                'bubbles': (0, 255, 0),       # Green for empty bubbles
+                'text_bubbles': (255, 0, 0),  # Blue for text bubbles
+                'text_free': (0, 0, 255)      # Red for free text
+            }
+            # Draw detections
+            for class_name, bboxes in detections.items():
+                color = colors[class_name]
+                for i, (x, y, w, h) in enumerate(bboxes):
+                    # Draw rectangle
+                    cv2.rectangle(vis_image, (x, y), (x + w, y + h), color, 2)
+                    # Add label
+                    label = f"{class_name.replace('_', ' ').title()} {i+1}"
+                    label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+                    cv2.rectangle(vis_image, (x, y - label_size[1] - 4),
+                                (x + label_size[0], y), color, -1)
+                    cv2.putText(vis_image, label, (x, y - 2),
+                              cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
+        else:
+            # Original YOLOv8 visualization
+            if bubbles is None:
+                bubbles = self.detect_bubbles(image_path)
+            # Draw bounding boxes
+            for i, (x, y, w, h) in enumerate(bubbles):
+                # Draw rectangle
+                color = (0, 255, 0)  # Green
+                thickness = 2
+                cv2.rectangle(vis_image, (x, y), (x + w, y + h), color, thickness)
+                # Add label
+                label = f"Bubble {i+1}"
+                label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+                cv2.rectangle(vis_image, (x, y - label_size[1] - 4), (x + label_size[0], y), color, -1)
+                cv2.putText(vis_image, label, (x, y - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
+        # Save if output path provided
+        if output_path:
+            cv2.imwrite(output_path, vis_image)
+            logger.info(f"💾 Visualization saved to: {output_path}")
+        return vis_image
+    def convert_to_onnx(self, model_path: str, output_path: str = None) -> bool:
+        """
+        Convert a YOLOv8 or RT-DETR model to ONNX format.
+        Args:
+            model_path: Path to model file or 'rtdetr' for loaded RT-DETR
+            output_path: Path for ONNX output (auto-generated if None)
+        Returns:
+            True if conversion successful, False otherwise
+        """
+        try:
+            logger.info(f"🔄 Converting {model_path} to ONNX...")
+            # Generate output path if not provided
+            if output_path is None:
+                if model_path == 'rtdetr' and self.rtdetr_loaded:
+                    base_name = 'rtdetr_comic'
+                else:
+                    base_name = Path(model_path).stem
+                output_path = os.path.join(self.cache_dir, f"{base_name}.onnx")
+            # Check if already exists
+            if os.path.exists(output_path) and not os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true':
+                logger.info(f"✅ ONNX model already exists: {output_path}")
+                return True
+            # Handle RT-DETR conversion
+            if model_path == 'rtdetr' and self.rtdetr_loaded:
+                if not TORCH_AVAILABLE:
+                    logger.error("PyTorch required for RT-DETR ONNX conversion")
+                    return False
+                # RT-DETR specific conversion
+                self.rtdetr_model.eval()
+                # Create dummy input (pixel values): BxCxHxW
+                dummy_input = torch.randn(1, 3, 640, 640)
+                if self.device == 'cuda':
+                    dummy_input = dummy_input.to('cuda')
+                # Wrap the model to return only tensors (logits, pred_boxes)
+                class _RTDetrExportWrapper(torch.nn.Module):
+                    def __init__(self, mdl):
+                        super().__init__()
+                        self.mdl = mdl
+                    def forward(self, images):
+                        out = self.mdl(pixel_values=images)
+                        # Handle dict/ModelOutput/tuple outputs
+                        logits = None
+                        boxes = None
+                        try:
+                            if isinstance(out, dict):
+                                logits = out.get('logits', None)
+                                boxes = out.get('pred_boxes', out.get('boxes', None))
+                            else:
+                                logits = getattr(out, 'logits', None)
+                                boxes = getattr(out, 'pred_boxes', getattr(out, 'boxes', None))
+                        except Exception:
+                            pass
+                        if (logits is None or boxes is None) and isinstance(out, (tuple, list)) and len(out) >= 2:
+                            logits, boxes = out[0], out[1]
+                        return logits, boxes
+                wrapper = _RTDetrExportWrapper(self.rtdetr_model)
+                if self.device == 'cuda':
+                    wrapper = wrapper.to('cuda')
+                # Try PyTorch 2.x dynamo_export first (more tolerant of newer aten ops)
+                try:
+                    success = False
+                    try:
+                        from torch.onnx import dynamo_export
+                        try:
+                            exp = dynamo_export(wrapper, dummy_input)
+                        except TypeError:
+                            # Older PyTorch dynamo_export may not support this calling convention
+                            exp = dynamo_export(wrapper, dummy_input)
+                        # exp may have save(); otherwise, it may expose model_proto
+                        try:
+                            exp.save(output_path)  # type: ignore
+                            success = True
+                        except Exception:
+                            try:
+                                import onnx as _onnx
+                                _onnx.save(exp.model_proto, output_path)  # type: ignore
+                                success = True
+                            except Exception as _se:
+                                logger.warning(f"dynamo_export produced model but could not save: {_se}")
+                    except Exception as de:
+                        logger.warning(f"dynamo_export failed; falling back to legacy exporter: {de}")
+                    if success:
+                        logger.info(f"✅ RT-DETR ONNX saved to: {output_path} (dynamo_export)")
+                        return True
+                except Exception as de2:
+                    logger.warning(f"dynamo_export path error: {de2}")
+                # Legacy exporter with opset fallback
+                last_err = None
+                for opset in [19, 18, 17, 16, 15, 14, 13]:
+                    try:
+                        torch.onnx.export(
+                            wrapper,
+                            dummy_input,
+                            output_path,
+                            export_params=True,
+                            opset_version=opset,
+                            do_constant_folding=True,
+                            input_names=['pixel_values'],
+                            output_names=['logits', 'boxes'],
+                            dynamic_axes={
+                                'pixel_values': {0: 'batch', 2: 'height', 3: 'width'},
+                                'logits': {0: 'batch'},
+                                'boxes': {0: 'batch'}
+                            }
+                        )
+                        logger.info(f"✅ RT-DETR ONNX saved to: {output_path} (opset {opset})")
+                        return True
+                    except Exception as _e:
+                        last_err = _e
+                        try:
+                            msg = str(_e)
+                        except Exception:
+                            msg = ''
+                        logger.warning(f"RT-DETR ONNX export failed at opset {opset}: {msg}")
+                        continue
+                logger.error(f"All RT-DETR ONNX export attempts failed. Last error: {last_err}")
+                return False
+            # Handle YOLOv8 conversion - FIXED
+            elif YOLO_AVAILABLE and os.path.exists(model_path):
+                logger.info(f"Loading YOLOv8 model from: {model_path}")
+                # Load model
+                model = YOLO(model_path)
+                # Export to ONNX - this returns the path to the exported model
+                logger.info("Exporting to ONNX format...")
+                exported_path = model.export(format='onnx', imgsz=640, simplify=True)
+                # exported_path could be a string or Path object
+                exported_path = str(exported_path) if exported_path else None
+                if exported_path and os.path.exists(exported_path):
+                    # Move to desired location if different
+                    if exported_path != output_path:
+                        import shutil
+                        logger.info(f"Moving ONNX from {exported_path} to {output_path}")
+                        shutil.move(exported_path, output_path)
+                    logger.info(f"✅ YOLOv8 ONNX saved to: {output_path}")
+                    return True
+                else:
+                    # Fallback: check if it was created with expected name
+                    expected_onnx = model_path.replace('.pt', '.onnx')
+                    if os.path.exists(expected_onnx):
+                        if expected_onnx != output_path:
+                            import shutil
+                            shutil.move(expected_onnx, output_path)
+                        logger.info(f"✅ YOLOv8 ONNX saved to: {output_path}")
+                        return True
+                    else:
+                        logger.error(f"ONNX export failed - no output file found")
+                        return False
+            else:
+                logger.error(f"Cannot convert {model_path}: Model not found or dependencies missing")
+                return False
+        except Exception as e:
+            logger.error(f"Conversion failed: {e}")
+            # Avoid noisy full stack trace in production logs; return False gracefully
+            return False
+    def batch_detect(self, image_paths: List[str], **kwargs) -> Dict[str, List[Tuple[int, int, int, int]]]:
+        """
+        Detect bubbles in multiple images.
+        Args:
+            image_paths: List of image paths
+            **kwargs: Detection parameters (confidence, iou_threshold, max_detections, use_rtdetr)
+        Returns:
+            Dictionary mapping image paths to bubble lists
+        """
+        results = {}
+        for i, image_path in enumerate(image_paths):
+            logger.info(f"Processing image {i+1}/{len(image_paths)}: {os.path.basename(image_path)}")
+            bubbles = self.detect_bubbles(image_path, **kwargs)
+            results[image_path] = bubbles
+        return results
+    def unload(self, release_shared: bool = False):
+        """Release model resources held by this detector instance.
+        Args:
+            release_shared: If True, also clear class-level shared RT-DETR caches.
+        """
+        try:
+            # Release instance-level models and sessions
+            try:
+                if getattr(self, 'onnx_session', None) is not None:
+                    self.onnx_session = None
+            except Exception:
+                pass
+            try:
+                if getattr(self, 'rtdetr_onnx_session', None) is not None:
+                    self.rtdetr_onnx_session = None
+            except Exception:
+                pass
+            for attr in ['model', 'rtdetr_model', 'rtdetr_processor']:
+                try:
+                    if hasattr(self, attr):
+                        setattr(self, attr, None)
+                except Exception:
+                    pass
+            for flag in ['model_loaded', 'rtdetr_loaded', 'rtdetr_onnx_loaded']:
+                try:
+                    if hasattr(self, flag):
+                        setattr(self, flag, False)
+                except Exception:
+                    pass
+            # Optional: release shared caches
+            if release_shared:
+                try:
+                    BubbleDetector._rtdetr_shared_model = None
+                    BubbleDetector._rtdetr_shared_processor = None
+                    BubbleDetector._rtdetr_loaded = False
+                except Exception:
+                    pass
+            # Free CUDA cache and trigger GC
+            try:
+                if TORCH_AVAILABLE and torch is not None and torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+            except Exception:
+                pass
+            try:
+                import gc
+                gc.collect()
+            except Exception:
+                pass
+        except Exception:
+            # Best-effort only
+            pass
+    def get_bubble_masks(self, image_path: str, bubbles: List[Tuple[int, int, int, int]]) -> np.ndarray:
+        """
+        Create a mask image with bubble regions.
+        Args:
+            image_path: Path to original image
+            bubbles: List of bubble bounding boxes
+        Returns:
+            Binary mask with bubble regions as white (255)
+        """
+        image = cv2.imread(image_path)
+        if image is None:
+            return None
+        h, w = image.shape[:2]
+        mask = np.zeros((h, w), dtype=np.uint8)
+        # Fill bubble regions
+        for x, y, bw, bh in bubbles:
+            cv2.rectangle(mask, (x, y), (x + bw, y + bh), 255, -1)
+        return mask
+    def filter_bubbles_by_size(self, bubbles: List[Tuple[int, int, int, int]],
+                              min_area: int = 100,
+                              max_area: int = None) -> List[Tuple[int, int, int, int]]:
+        """
+        Filter bubbles by area.
+        Args:
+            bubbles: List of bubble bounding boxes
+            min_area: Minimum area in pixels
+            max_area: Maximum area in pixels (None for no limit)
+        Returns:
+            Filtered list of bubbles
+        """
+        filtered = []
+        for x, y, w, h in bubbles:
+            area = w * h
+            if area >= min_area and (max_area is None or area <= max_area):
+                filtered.append((x, y, w, h))
+        return filtered
+    def merge_overlapping_bubbles(self, bubbles: List[Tuple[int, int, int, int]],
+                                 overlap_threshold: float = 0.1) -> List[Tuple[int, int, int, int]]:
+        """
+        Merge overlapping bubble detections.
+        Args:
+            bubbles: List of bubble bounding boxes
+            overlap_threshold: Minimum overlap ratio to merge
+        Returns:
+            Merged list of bubbles
+        """
+        if not bubbles:
+            return []
+        # Convert to numpy array for easier manipulation
+        boxes = np.array([(x, y, x+w, y+h) for x, y, w, h in bubbles])
+        merged = []
+        used = set()
+        for i, box1 in enumerate(boxes):
+            if i in used:
+                continue
+            # Start with current box
+            x1, y1, x2, y2 = box1
+            # Check for overlaps with remaining boxes
+            for j in range(i + 1, len(boxes)):
+                if j in used:
+                    continue
+                box2 = boxes[j]
+                # Calculate intersection
+                ix1 = max(x1, box2[0])
+                iy1 = max(y1, box2[1])
+                ix2 = min(x2, box2[2])
+                iy2 = min(y2, box2[3])
+                if ix1 < ix2 and iy1 < iy2:
+                    # Calculate overlap ratio
+                    intersection = (ix2 - ix1) * (iy2 - iy1)
+                    area1 = (x2 - x1) * (y2 - y1)
+                    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+                    overlap = intersection / min(area1, area2)
+                    if overlap >= overlap_threshold:
+                        # Merge boxes
+                        x1 = min(x1, box2[0])
+                        y1 = min(y1, box2[1])
+                        x2 = max(x2, box2[2])
+                        y2 = max(y2, box2[3])
+                        used.add(j)
+            merged.append((int(x1), int(y1), int(x2 - x1), int(y2 - y1)))
+        return merged
+    # ============================
+    # RT-DETR (ONNX) BACKEND
+    # ============================
+    def load_rtdetr_onnx_model(self, model_id: str = None, force_reload: bool = False) -> bool:
+        """
+        Load RT-DETR ONNX model using onnxruntime. Downloads detector.onnx and config.json
+        from the provided Hugging Face repo if not already cached.
+        """
+        if not ONNX_AVAILABLE:
+            logger.error("ONNX Runtime not available for RT-DETR ONNX backend")
+            return False
+        try:
+            # If singleton mode and already loaded, just attach shared session
+            try:
+                adv = (self.config or {}).get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {}
+                singleton = bool(adv.get('use_singleton_models', True))
+            except Exception:
+                singleton = True
+            if singleton and BubbleDetector._rtdetr_onnx_loaded and not force_reload and BubbleDetector._rtdetr_onnx_shared_session is not None:
+                self.rtdetr_onnx_session = BubbleDetector._rtdetr_onnx_shared_session
+                self.rtdetr_onnx_loaded = True
+                return True
+            repo = model_id or self.rtdetr_onnx_repo
+            try:
+                from huggingface_hub import hf_hub_download
+            except Exception as e:
+                logger.error(f"huggingface-hub required to fetch RT-DETR ONNX: {e}")
+                return False
+            # Ensure local models dir (use configured cache_dir directly: e.g., 'models')
+            cache_dir = self.cache_dir
+            os.makedirs(cache_dir, exist_ok=True)
+            # Download files into models/ and avoid symlinks so the file is visible there
+            try:
+                _ = hf_hub_download(repo_id=repo, filename='config.json', cache_dir=cache_dir, local_dir=cache_dir, local_dir_use_symlinks=False)
+            except Exception:
+                pass
+            onnx_fp = hf_hub_download(repo_id=repo, filename='detector.onnx', cache_dir=cache_dir, local_dir=cache_dir, local_dir_use_symlinks=False)
+            BubbleDetector._rtdetr_onnx_model_path = onnx_fp
+            # Pick providers: prefer CUDA if available; otherwise CPU. Do NOT use DML.
+            providers = ['CPUExecutionProvider']
+            try:
+                avail = ort.get_available_providers() if ONNX_AVAILABLE else []
+                if 'CUDAExecutionProvider' in avail:
+                    providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
+            except Exception:
+                pass
+            # Session options with reduced memory arena and optional thread limiting in singleton mode
+            so = ort.SessionOptions()
+            try:
+                so.enable_mem_pattern = False
+                so.enable_cpu_mem_arena = False
+            except Exception:
+                pass
+            # If singleton models mode is enabled in config, limit ORT threading to reduce CPU spikes
+            try:
+                adv = (self.config or {}).get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {}
+                if bool(adv.get('use_singleton_models', True)):
+                    so.intra_op_num_threads = 1
+                    so.inter_op_num_threads = 1
+                    try:
+                        so.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
+                    except Exception:
+                        pass
+                    try:
+                        so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC
+                    except Exception:
+                        pass
+            except Exception:
+                pass
+            # Create session (serialize creation in singleton mode to avoid device storms)
+            if singleton:
+                with BubbleDetector._rtdetr_onnx_init_lock:
+                    # Re-check after acquiring lock
+                    if BubbleDetector._rtdetr_onnx_loaded and BubbleDetector._rtdetr_onnx_shared_session is not None and not force_reload:
+                        self.rtdetr_onnx_session = BubbleDetector._rtdetr_onnx_shared_session
+                        self.rtdetr_onnx_loaded = True
+                        return True
+                    sess = ort.InferenceSession(onnx_fp, providers=providers, sess_options=so)
+                    BubbleDetector._rtdetr_onnx_shared_session = sess
+                    BubbleDetector._rtdetr_onnx_loaded = True
+                    BubbleDetector._rtdetr_onnx_providers = providers
+                    self.rtdetr_onnx_session = sess
+                    self.rtdetr_onnx_loaded = True
+            else:
+                self.rtdetr_onnx_session = ort.InferenceSession(onnx_fp, providers=providers, sess_options=so)
+                self.rtdetr_onnx_loaded = True
+            logger.info("✅ RT-DETR (ONNX) model ready")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to load RT-DETR ONNX: {e}")
+            self.rtdetr_onnx_session = None
+            self.rtdetr_onnx_loaded = False
+            return False
+    def detect_with_rtdetr_onnx(self,
+                                image_path: str = None,
+                                image: np.ndarray = None,
+                                confidence: float = 0.3,
+                                return_all_bubbles: bool = False) -> Any:
+        """Detect using RT-DETR ONNX backend.
+        Returns bubbles list if return_all_bubbles else dict by classes similar to PyTorch path.
+        """
+        if not self.rtdetr_onnx_loaded or self.rtdetr_onnx_session is None:
+            logger.warning("RT-DETR ONNX not loaded")
+            return [] if return_all_bubbles else {'bubbles': [], 'text_bubbles': [], 'text_free': []}
+        try:
+            # Acquire image
+            if image_path is not None:
+                import cv2
+                image = cv2.imread(image_path)
+                if image is None:
+                    raise RuntimeError(f"Failed to read image: {image_path}")
+                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            else:
+                if image is None:
+                    raise RuntimeError("No image provided")
+                # Assume image is BGR np.ndarray if from OpenCV
+                try:
+                    import cv2
+                    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+                except Exception:
+                    image_rgb = image
+            # To PIL then resize 640x640 as in reference
+            from PIL import Image as _PILImage
+            pil_image = _PILImage.fromarray(image_rgb)
+            im_resized = pil_image.resize((640, 640))
+            arr = np.asarray(im_resized, dtype=np.float32) / 255.0
+            arr = np.transpose(arr, (2, 0, 1))  # (3,H,W)
+            im_data = arr[np.newaxis, ...]
+            w, h = pil_image.size
+            orig_size = np.array([[w, h]], dtype=np.int64)
+            # Run with a concurrency guard to prevent device hangs and limit memory usage
+            # Apply semaphore for ALL providers (not just DML) to control concurrency
+            providers = BubbleDetector._rtdetr_onnx_providers or []
+            def _do_run(session):
+                return session.run(None, {
+                    'images': im_data,
+                    'orig_target_sizes': orig_size
+                })
+            # Always use semaphore to limit concurrent RT-DETR calls
+            acquired = False
+            try:
+                BubbleDetector._rtdetr_onnx_sema.acquire()
+                acquired = True
+                # Special DML error handling
+                if 'DmlExecutionProvider' in providers:
+                    try:
+                        outputs = _do_run(self.rtdetr_onnx_session)
+                    except Exception as dml_err:
+                        msg = str(dml_err)
+                        if '887A0005' in msg or '887A0006' in msg or 'Dml' in msg:
+                            # Rebuild CPU session and retry once
+                            try:
+                                base_path = BubbleDetector._rtdetr_onnx_model_path
+                                if base_path:
+                                    so = ort.SessionOptions()
+                                    so.enable_mem_pattern = False
+                                    so.enable_cpu_mem_arena = False
+                                    cpu_providers = ['CPUExecutionProvider']
+                                    # Serialize rebuild
+                                    with BubbleDetector._rtdetr_onnx_init_lock:
+                                        sess = ort.InferenceSession(base_path, providers=cpu_providers, sess_options=so)
+                                        BubbleDetector._rtdetr_onnx_shared_session = sess
+                                        BubbleDetector._rtdetr_onnx_providers = cpu_providers
+                                        self.rtdetr_onnx_session = sess
+                                    outputs = _do_run(self.rtdetr_onnx_session)
+                                else:
+                                    raise
+                            except Exception:
+                                raise
+                        else:
+                            raise
+                else:
+                    # Non-DML providers - just run directly
+                    outputs = _do_run(self.rtdetr_onnx_session)
+            finally:
+                if acquired:
+                    try:
+                        BubbleDetector._rtdetr_onnx_sema.release()
+                    except Exception:
+                        pass
+            # outputs expected: labels, boxes, scores
+            labels, boxes, scores = outputs[:3]
+            if labels.ndim == 2 and labels.shape[0] == 1:
+                labels = labels[0]
+            if scores.ndim == 2 and scores.shape[0] == 1:
+                scores = scores[0]
+            if boxes.ndim == 3 and boxes.shape[0] == 1:
+                boxes = boxes[0]
+            # Apply NMS to remove duplicate detections
+            # Group detections by class and apply NMS per class
+            class_detections = {self.CLASS_BUBBLE: [], self.CLASS_TEXT_BUBBLE: [], self.CLASS_TEXT_FREE: []}
+            for lab, box, scr in zip(labels, boxes, scores):
+                if float(scr) < float(confidence):
+                    continue
+                label_id = int(lab)
+                if label_id in class_detections:
+                    x1, y1, x2, y2 = map(float, box)
+                    class_detections[label_id].append((x1, y1, x2, y2, float(scr)))
+            # Apply NMS per class to remove duplicates
+            def compute_iou(box1, box2):
+                """Compute IoU between two boxes (x1, y1, x2, y2)"""
+                x1_1, y1_1, x2_1, y2_1 = box1[:4]
+                x1_2, y1_2, x2_2, y2_2 = box2[:4]
+                # Intersection
+                x_left = max(x1_1, x1_2)
+                y_top = max(y1_1, y1_2)
+                x_right = min(x2_1, x2_2)
+                y_bottom = min(y2_1, y2_2)
+                if x_right < x_left or y_bottom < y_top:
+                    return 0.0
+                intersection = (x_right - x_left) * (y_bottom - y_top)
+                # Union
+                area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
+                area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
+                union = area1 + area2 - intersection
+                return intersection / union if union > 0 else 0.0
+            def apply_nms(boxes_with_scores, iou_threshold=0.45):
+                """Apply Non-Maximum Suppression"""
+                if not boxes_with_scores:
+                    return []
+                # Sort by score (descending)
+                sorted_boxes = sorted(boxes_with_scores, key=lambda x: x[4], reverse=True)
+                keep = []
+                while sorted_boxes:
+                    # Keep the box with highest score
+                    current = sorted_boxes.pop(0)
+                    keep.append(current)
+                    # Remove boxes with high IoU
+                    sorted_boxes = [box for box in sorted_boxes if compute_iou(current, box) < iou_threshold]
+                return keep
+            # Apply NMS and build final detections
+            detections = {'bubbles': [], 'text_bubbles': [], 'text_free': []}
+            bubbles_all = []
+            for class_id, boxes_list in class_detections.items():
+                nms_boxes = apply_nms(boxes_list, iou_threshold=self.default_iou_threshold)
+                for x1, y1, x2, y2, scr in nms_boxes:
+                    bbox = (int(x1), int(y1), int(x2 - x1), int(y2 - y1))
+                    if class_id == self.CLASS_BUBBLE:
+                        detections['bubbles'].append(bbox)
+                        bubbles_all.append(bbox)
+                    elif class_id == self.CLASS_TEXT_BUBBLE:
+                        detections['text_bubbles'].append(bbox)
+                        bubbles_all.append(bbox)
+                    elif class_id == self.CLASS_TEXT_FREE:
+                        detections['text_free'].append(bbox)
+            return bubbles_all if return_all_bubbles else detections
+        except Exception as e:
+            logger.error(f"RT-DETR ONNX detection failed: {e}")
+            return [] if return_all_bubbles else {'bubbles': [], 'text_bubbles': [], 'text_free': []}
+# Standalone utility functions
+def download_model_from_huggingface(repo_id: str = "ogkalu/comic-speech-bubble-detector-yolov8m",
+                                   filename: str = "comic-speech-bubble-detector-yolov8m.pt",
+                                   cache_dir: str = "models") -> str:
+    """
+    Download model from Hugging Face Hub.
+    Args:
+        repo_id: Hugging Face repository ID
+        filename: Model filename in the repository
+        cache_dir: Local directory to cache the model
+    Returns:
+        Path to downloaded model file
+    """
+    try:
+        from huggingface_hub import hf_hub_download
+        os.makedirs(cache_dir, exist_ok=True)
+        logger.info(f"📥 Downloading {filename} from {repo_id}...")
+        model_path = hf_hub_download(
+            repo_id=repo_id,
+            filename=filename,
+            cache_dir=cache_dir,
+            local_dir=cache_dir
+        )
+        logger.info(f"✅ Model downloaded to: {model_path}")
+        return model_path
+    except ImportError:
+        logger.error("huggingface-hub package required. Install with: pip install huggingface-hub")
+        return None
+    except Exception as e:
+        logger.error(f"Download failed: {e}")
+        return None
+def download_rtdetr_model(cache_dir: str = "models") -> bool:
+    """
+    Download RT-DETR model for advanced detection.
+    Args:
+        cache_dir: Directory to cache the model
+    Returns:
+        True if successful
+    """
+    if not TRANSFORMERS_AVAILABLE:
+        logger.error("Transformers required. Install with: pip install transformers")
+        return False
+    try:
+        logger.info("📥 Downloading RT-DETR model...")
+        from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
+        # This will download and cache the model
+        processor = RTDetrImageProcessor.from_pretrained(
+            "ogkalu/comic-text-and-bubble-detector",
+            cache_dir=cache_dir
+        )
+        model = RTDetrForObjectDetection.from_pretrained(
+            "ogkalu/comic-text-and-bubble-detector",
+            cache_dir=cache_dir
+        )
+        logger.info("✅ RT-DETR model downloaded successfully")
+        return True
+    except Exception as e:
+        logger.error(f"Download failed: {e}")
+        return False
+# Example usage and testing
+if __name__ == "__main__":
+    import sys
+    # Create detector
+    detector = BubbleDetector()
+    if len(sys.argv) > 1:
+        if sys.argv[1] == "download":
+            # Download model from Hugging Face
+            model_path = download_model_from_huggingface()
+            if model_path:
+                print(f"YOLOv8 model downloaded to: {model_path}")
+            # Also download RT-DETR
+            if download_rtdetr_model():
+                print("RT-DETR model downloaded")
+        elif sys.argv[1] == "detect" and len(sys.argv) > 3:
+            # Detect bubbles in an image
+            model_path = sys.argv[2]
+            image_path = sys.argv[3]
+            # Load appropriate model
+            if 'rtdetr' in model_path.lower():
+                if detector.load_rtdetr_model():
+                    # Use RT-DETR
+                    results = detector.detect_with_rtdetr(image_path)
+                    print(f"RT-DETR Detection:")
+                    print(f"  Empty bubbles: {len(results['bubbles'])}")
+                    print(f"  Text bubbles: {len(results['text_bubbles'])}")
+                    print(f"  Free text: {len(results['text_free'])}")
+            else:
+                if detector.load_model(model_path):
+                    bubbles = detector.detect_bubbles(image_path, confidence=0.5)
+                    print(f"YOLOv8 detected {len(bubbles)} bubbles:")
+                    for i, (x, y, w, h) in enumerate(bubbles):
+                        print(f"  Bubble {i+1}: position=({x},{y}) size=({w}x{h})")
+            # Optionally visualize
+            if len(sys.argv) > 4:
+                output_path = sys.argv[4]
+                detector.visualize_detections(image_path, output_path=output_path,
+                                             use_rtdetr='rtdetr' in model_path.lower())
+        elif sys.argv[1] == "test-both" and len(sys.argv) > 2:
+            # Test both models
+            image_path = sys.argv[2]
+            # Load YOLOv8
+            yolo_path = "models/comic-speech-bubble-detector-yolov8m.pt"
+            if os.path.exists(yolo_path):
+                detector.load_model(yolo_path)
+                yolo_bubbles = detector.detect_bubbles(image_path, use_rtdetr=False)
+                print(f"YOLOv8: {len(yolo_bubbles)} bubbles")
+            # Load RT-DETR
+            if detector.load_rtdetr_model():
+                rtdetr_bubbles = detector.detect_bubbles(image_path, use_rtdetr=True)
+                print(f"RT-DETR: {len(rtdetr_bubbles)} bubbles")
+        else:
+            print("Usage:")
+            print("  python bubble_detector.py download")
+            print("  python bubble_detector.py detect <model_path> <image_path> [output_path]")
+            print("  python bubble_detector.py test-both <image_path>")
+    else:
+        print("Bubble Detector Module (YOLOv8 + RT-DETR)")
+        print("Usage:")
+        print("  python bubble_detector.py download")
+        print("  python bubble_detector.py detect <model_path> <image_path> [output_path]")
+        print("  python bubble_detector.py test-both <image_path>")

local_inpainter.py ADDED Viewed