Spaces:

Shirochi
/

Glossarion

Running

App Files Files Community

Shirochi commited on Oct 5

Commit

0bf8c19

verified ·

1 Parent(s): 463ae26

Delete bubble_detector.py

Browse files

Files changed (1) hide show

bubble_detector.py +0 -1920

bubble_detector.py DELETED Viewed

@@ -1,1920 +0,0 @@
-"""
-bubble_detector.py - Modified version that works in frozen PyInstaller executables
-Replace your bubble_detector.py with this version
-"""
-import os
-import sys
-import json
-import numpy as np
-import cv2
-from typing import List, Tuple, Optional, Dict, Any
-import logging
-import traceback
-import hashlib
-from pathlib import Path
-import threading
-import time
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Check if we're running in a frozen environment
-IS_FROZEN = getattr(sys, 'frozen', False)
-if IS_FROZEN:
-    # In frozen environment, set proper paths for ML libraries
-    MEIPASS = sys._MEIPASS
-    os.environ['TORCH_HOME'] = MEIPASS
-    os.environ['TRANSFORMERS_CACHE'] = os.path.join(MEIPASS, 'transformers')
-    os.environ['HF_HOME'] = os.path.join(MEIPASS, 'huggingface')
-    logger.info(f"Running in frozen environment: {MEIPASS}")
-# Modified import checks for frozen environment
-YOLO_AVAILABLE = False
-YOLO = None
-torch = None
-TORCH_AVAILABLE = False
-ONNX_AVAILABLE = False
-TRANSFORMERS_AVAILABLE = False
-RTDetrForObjectDetection = None
-RTDetrImageProcessor = None
-PIL_AVAILABLE = False
-# Try to import YOLO dependencies with better error handling
-if IS_FROZEN:
-    # In frozen environment, try harder to import
-    try:
-        # First try to import torch components individually
-        import torch
-        import torch.nn
-        import torch.cuda
-        TORCH_AVAILABLE = True
-        logger.info("✓ PyTorch loaded in frozen environment")
-    except Exception as e:
-        logger.warning(f"PyTorch not available in frozen environment: {e}")
-        TORCH_AVAILABLE = False
-        torch = None
-    # Try ultralytics after torch
-    if TORCH_AVAILABLE:
-        try:
-            from ultralytics import YOLO
-            YOLO_AVAILABLE = True
-            logger.info("✓ Ultralytics YOLO loaded in frozen environment")
-        except Exception as e:
-            logger.warning(f"Ultralytics not available in frozen environment: {e}")
-            YOLO_AVAILABLE = False
-    # Try transformers
-    try:
-        import transformers
-        # Try specific imports
-        try:
-            from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
-            TRANSFORMERS_AVAILABLE = True
-            logger.info("✓ Transformers RT-DETR loaded in frozen environment")
-        except ImportError:
-            # Try alternative import
-            try:
-                from transformers import AutoModel, AutoImageProcessor
-                RTDetrForObjectDetection = AutoModel
-                RTDetrImageProcessor = AutoImageProcessor
-                TRANSFORMERS_AVAILABLE = True
-                logger.info("✓ Transformers loaded with AutoModel fallback")
-            except:
-                TRANSFORMERS_AVAILABLE = False
-                logger.warning("Transformers RT-DETR not available in frozen environment")
-    except Exception as e:
-        logger.warning(f"Transformers not available in frozen environment: {e}")
-        TRANSFORMERS_AVAILABLE = False
-else:
-    # Normal environment - original import logic
-    try:
-        from ultralytics import YOLO
-        YOLO_AVAILABLE = True
-    except:
-        YOLO_AVAILABLE = False
-        logger.warning("Ultralytics YOLO not available")
-    try:
-        import torch
-        # Test if cuda attribute exists
-        _ = torch.cuda
-        TORCH_AVAILABLE = True
-    except (ImportError, AttributeError):
-        TORCH_AVAILABLE = False
-        torch = None
-        logger.warning("PyTorch not available or incomplete")
-    try:
-        from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
-        try:
-            from transformers import RTDetrV2ForObjectDetection
-            RTDetrForObjectDetection = RTDetrV2ForObjectDetection
-        except ImportError:
-            pass
-        TRANSFORMERS_AVAILABLE = True
-    except:
-        TRANSFORMERS_AVAILABLE = False
-        logger.info("Transformers not available for RT-DETR")
-# Configure ORT memory behavior before importing
-try:
-    os.environ.setdefault('ORT_DISABLE_MEMORY_ARENA', '1')
-except Exception:
-    pass
-# ONNX Runtime - works well in frozen environments
-try:
-    import onnxruntime as ort
-    ONNX_AVAILABLE = True
-    logger.info("✓ ONNX Runtime available")
-except ImportError:
-    ONNX_AVAILABLE = False
-    logger.warning("ONNX Runtime not available")
-# PIL
-try:
-    from PIL import Image
-    PIL_AVAILABLE = True
-except ImportError:
-    PIL_AVAILABLE = False
-    logger.info("PIL not available")
-class BubbleDetector:
-    """
-    Combined YOLOv8 and RT-DETR speech bubble detector for comics and manga.
-    Supports multiple model formats and provides configurable detection.
-    Backward compatible with existing code while adding RT-DETR support.
-    """
-    # Process-wide shared RT-DETR to avoid concurrent meta-device loads
-    _rtdetr_init_lock = threading.Lock()
-    _rtdetr_shared_model = None
-    _rtdetr_shared_processor = None
-    _rtdetr_loaded = False
-    _rtdetr_repo_id = 'ogkalu/comic-text-and-bubble-detector'
-    # Shared RT-DETR (ONNX) across process to avoid device/context storms
-    _rtdetr_onnx_init_lock = threading.Lock()
-    _rtdetr_onnx_shared_session = None
-    _rtdetr_onnx_loaded = False
-    _rtdetr_onnx_providers = None
-    _rtdetr_onnx_model_path = None
-    # Limit concurrent runs to avoid device hangs. Defaults to 2 for better parallelism.
-    # Can be overridden via env DML_MAX_CONCURRENT or config rtdetr_max_concurrency
-    try:
-        _rtdetr_onnx_max_concurrent = int(os.environ.get('DML_MAX_CONCURRENT', '2'))
-    except Exception:
-        _rtdetr_onnx_max_concurrent = 2
-    _rtdetr_onnx_sema = threading.Semaphore(max(1, _rtdetr_onnx_max_concurrent))
-    _rtdetr_onnx_sema_initialized = False
-    def __init__(self, config_path: str = "config.json"):
-        """
-        Initialize the bubble detector.
-        Args:
-            config_path: Path to configuration file
-        """
-        # Set thread limits early if environment indicates single-threaded mode
-        try:
-            if os.environ.get('OMP_NUM_THREADS') == '1':
-                # Already in single-threaded mode, ensure it's applied to this process
-                # Check if torch is available at module level before trying to use it
-                if TORCH_AVAILABLE and torch is not None:
-                    try:
-                        torch.set_num_threads(1)
-                    except (RuntimeError, AttributeError):
-                        pass
-                try:
-                    import cv2
-                    cv2.setNumThreads(1)
-                except (ImportError, AttributeError):
-                    pass
-        except Exception:
-            pass
-        self.config_path = config_path
-        self.config = self._load_config()
-        # YOLOv8 components (original)
-        self.model = None
-        self.model_loaded = False
-        self.model_type = None  # 'yolo', 'onnx', or 'torch'
-        self.onnx_session = None
-        # RT-DETR components (new)
-        self.rtdetr_model = None
-        self.rtdetr_processor = None
-        self.rtdetr_loaded = False
-        self.rtdetr_repo = 'ogkalu/comic-text-and-bubble-detector'
-        # RT-DETR (ONNX) backend components
-        self.rtdetr_onnx_session = None
-        self.rtdetr_onnx_loaded = False
-        self.rtdetr_onnx_repo = 'ogkalu/comic-text-and-bubble-detector'
-        # RT-DETR class definitions
-        self.CLASS_BUBBLE = 0      # Empty speech bubble
-        self.CLASS_TEXT_BUBBLE = 1 # Bubble with text
-        self.CLASS_TEXT_FREE = 2   # Text without bubble
-        # Detection settings
-        self.default_confidence = 0.3
-        self.default_iou_threshold = 0.45
-        # Allow override from settings
-        try:
-            ocr_cfg = self.config.get('manga_settings', {}).get('ocr', {}) if isinstance(self.config, dict) else {}
-            self.default_max_detections = int(ocr_cfg.get('bubble_max_detections', 100))
-            self.max_det_yolo = int(ocr_cfg.get('bubble_max_detections_yolo', self.default_max_detections))
-            self.max_det_rtdetr = int(ocr_cfg.get('bubble_max_detections_rtdetr', self.default_max_detections))
-        except Exception:
-            self.default_max_detections = 100
-            self.max_det_yolo = 100
-            self.max_det_rtdetr = 100
-        # Cache directory for ONNX conversions
-        self.cache_dir = os.environ.get('BUBBLE_CACHE_DIR', 'models')
-        os.makedirs(self.cache_dir, exist_ok=True)
-        # RT-DETR concurrency setting from config
-        try:
-            rtdetr_max_conc = int(ocr_cfg.get('rtdetr_max_concurrency', 2))
-            # Update class-level semaphore if not yet initialized or if value changed
-            if not BubbleDetector._rtdetr_onnx_sema_initialized or rtdetr_max_conc != BubbleDetector._rtdetr_onnx_max_concurrent:
-                BubbleDetector._rtdetr_onnx_max_concurrent = max(1, rtdetr_max_conc)
-                BubbleDetector._rtdetr_onnx_sema = threading.Semaphore(BubbleDetector._rtdetr_onnx_max_concurrent)
-                BubbleDetector._rtdetr_onnx_sema_initialized = True
-                logger.info(f"RT-DETR concurrency set to: {BubbleDetector._rtdetr_onnx_max_concurrent}")
-        except Exception as e:
-            logger.warning(f"Failed to set RT-DETR concurrency: {e}")
-        # GPU availability
-        self.use_gpu = TORCH_AVAILABLE and torch.cuda.is_available()
-        self.device = 'cuda' if self.use_gpu else 'cpu'
-        # Quantization/precision settings
-        adv_cfg = self.config.get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {}
-        ocr_cfg = self.config.get('manga_settings', {}).get('ocr', {}) if isinstance(self.config, dict) else {}
-        env_quant = os.environ.get('MODEL_QUANTIZE', 'false').lower() == 'true'
-        self.quantize_enabled = bool(env_quant or adv_cfg.get('quantize_models', False) or ocr_cfg.get('quantize_bubble_detector', False))
-        self.quantize_dtype = str(adv_cfg.get('torch_precision', os.environ.get('TORCH_PRECISION', 'auto'))).lower()
-        # Prefer advanced.onnx_quantize; fall back to env or global quantize
-        self.onnx_quantize_enabled = bool(adv_cfg.get('onnx_quantize', os.environ.get('ONNX_QUANTIZE', 'false').lower() == 'true' or self.quantize_enabled))
-        # Stop flag support
-        self.stop_flag = None
-        self._stopped = False
-        self.log_callback = None
-        logger.info(f"🗨️ BubbleDetector initialized")
-        logger.info(f"   GPU: {'Available' if self.use_gpu else 'Not available'}")
-        logger.info(f"   YOLO: {'Available' if YOLO_AVAILABLE else 'Not installed'}")
-        logger.info(f"   ONNX: {'Available' if ONNX_AVAILABLE else 'Not installed'}")
-        logger.info(f"   RT-DETR: {'Available' if TRANSFORMERS_AVAILABLE else 'Not installed'}")
-        logger.info(f"   Quantization: {'ENABLED' if self.quantize_enabled else 'disabled'} (torch_precision={self.quantize_dtype}, onnx_quantize={'on' if self.onnx_quantize_enabled else 'off'})" )
-    def _load_config(self) -> Dict[str, Any]:
-        """Load configuration from file."""
-        if os.path.exists(self.config_path):
-            try:
-                with open(self.config_path, 'r', encoding='utf-8') as f:
-                    return json.load(f)
-            except Exception as e:
-                logger.warning(f"Failed to load config: {e}")
-        return {}
-    def _save_config(self):
-        """Save configuration to file."""
-        try:
-            with open(self.config_path, 'w', encoding='utf-8') as f:
-                json.dump(self.config, f, indent=2)
-        except Exception as e:
-            logger.error(f"Failed to save config: {e}")
-    def set_stop_flag(self, stop_flag):
-        """Set the stop flag for checking interruptions"""
-        self.stop_flag = stop_flag
-        self._stopped = False
-    def set_log_callback(self, log_callback):
-        """Set log callback for GUI integration"""
-        self.log_callback = log_callback
-    def _check_stop(self) -> bool:
-        """Check if stop has been requested"""
-        if self._stopped:
-            return True
-        if self.stop_flag and self.stop_flag.is_set():
-            self._stopped = True
-            return True
-        # Check global manga translator cancellation
-        try:
-            from manga_translator import MangaTranslator
-            if MangaTranslator.is_globally_cancelled():
-                self._stopped = True
-                return True
-        except Exception:
-            pass
-        return False
-    def _log(self, message: str, level: str = "info"):
-        """Log message with stop suppression"""
-        # Suppress logs when stopped (allow only essential stop confirmation messages)
-        if self._check_stop():
-            essential_stop_keywords = [
-                "⏹️ Translation stopped by user",
-                "⏹️ Bubble detection stopped",
-                "cleanup", "🧹"
-            ]
-            if not any(keyword in message for keyword in essential_stop_keywords):
-                return
-        if self.log_callback:
-            self.log_callback(message, level)
-        else:
-            logger.info(message) if level == 'info' else getattr(logger, level, logger.info)(message)
-    def reset_stop_flags(self):
-        """Reset stop flags when starting new processing"""
-        self._stopped = False
-    def load_model(self, model_path: str, force_reload: bool = False) -> bool:
-        """
-        Load a YOLOv8 model for bubble detection.
-        Args:
-            model_path: Path to model file (.pt, .onnx, or .torchscript)
-            force_reload: Force reload even if model is already loaded
-        Returns:
-            True if model loaded successfully, False otherwise
-        """
-        try:
-            # If given a Hugging Face repo ID (e.g., 'owner/name'), fetch detector.onnx into models/
-            if model_path and (('/' in model_path) and not os.path.exists(model_path)):
-                try:
-                    from huggingface_hub import hf_hub_download
-                    os.makedirs(self.cache_dir, exist_ok=True)
-                    logger.info(f"📥 Resolving repo '{model_path}' to detector.onnx in {self.cache_dir}...")
-                    resolved = hf_hub_download(repo_id=model_path, filename='detector.onnx', cache_dir=self.cache_dir, local_dir=self.cache_dir, local_dir_use_symlinks=False)
-                    if resolved and os.path.exists(resolved):
-                        model_path = resolved
-                        logger.info(f"✅ Downloaded detector.onnx to: {model_path}")
-                except Exception as repo_err:
-                    logger.error(f"Failed to download from repo '{model_path}': {repo_err}")
-            if not os.path.exists(model_path):
-                logger.error(f"Model file not found: {model_path}")
-                return False
-            # Check if it's the same model already loaded
-            if self.model_loaded and not force_reload:
-                last_path = self.config.get('last_model_path', '')
-                if last_path == model_path:
-                    logger.info("Model already loaded (same path)")
-                    return True
-                else:
-                    logger.info(f"Model path changed from {last_path} to {model_path}, reloading...")
-                    force_reload = True
-            # Clear previous model if force reload
-            if force_reload:
-                logger.info("Force reloading model...")
-                self.model = None
-                self.onnx_session = None
-                self.model_loaded = False
-                self.model_type = None
-            logger.info(f"📥 Loading bubble detection model: {model_path}")
-            # Determine model type by extension
-            ext = Path(model_path).suffix.lower()
-            if ext in ['.pt', '.pth']:
-                if not YOLO_AVAILABLE:
-                    logger.warning("Ultralytics package not available in this build")
-                    logger.info("Bubble detection will be disabled - this is normal for lightweight builds")
-                    # Don't return False immediately, try other fallbacks
-                    self.model_loaded = False
-                    return False
-                # Load YOLOv8 model
-                try:
-                    self.model = YOLO(model_path)
-                    self.model_type = 'yolo'
-                    # Set to eval mode
-                    if hasattr(self.model, 'model'):
-                        self.model.model.eval()
-                    # Move to GPU if available
-                    if self.use_gpu and TORCH_AVAILABLE:
-                        try:
-                            self.model.to('cuda')
-                        except Exception as gpu_error:
-                            logger.warning(f"Could not move model to GPU: {gpu_error}")
-                    logger.info("✅ YOLOv8 model loaded successfully")
-                    # Apply optional FP16 precision to reduce VRAM if enabled
-                    if self.quantize_enabled and self.use_gpu and TORCH_AVAILABLE:
-                        try:
-                            m = self.model.model if hasattr(self.model, 'model') else self.model
-                            m.half()
-                            logger.info("🔻 Applied FP16 precision to YOLO model (GPU)")
-                        except Exception as _e:
-                            logger.warning(f"Could not switch YOLO model to FP16: {_e}")
-                except Exception as yolo_error:
-                    logger.error(f"Failed to load YOLO model: {yolo_error}")
-                    return False
-            elif ext == '.onnx':
-                if not ONNX_AVAILABLE:
-                    logger.warning("ONNX Runtime not available in this build")
-                    logger.info("ONNX model support disabled - this is normal for lightweight builds")
-                    return False
-                try:
-                    # Load ONNX model
-                    providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if self.use_gpu else ['CPUExecutionProvider']
-                    session_path = model_path
-                    if self.quantize_enabled:
-                        try:
-                            from onnxruntime.quantization import quantize_dynamic, QuantType
-                            quant_path = os.path.splitext(model_path)[0] + ".int8.onnx"
-                            if not os.path.exists(quant_path) or os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true':
-                                logger.info("🔻 Quantizing ONNX model weights to INT8 (dynamic)...")
-                                quantize_dynamic(model_input=model_path, model_output=quant_path, weight_type=QuantType.QInt8, op_types_to_quantize=['Conv', 'MatMul'])
-                            session_path = quant_path
-                            self.config['last_onnx_quantized_path'] = quant_path
-                            self._save_config()
-                            logger.info(f"✅ Using quantized ONNX model: {quant_path}")
-                        except Exception as qe:
-                            logger.warning(f"ONNX quantization not applied: {qe}")
-                    # Use conservative ORT memory options to reduce RAM growth
-                    so = ort.SessionOptions()
-                    try:
-                        so.enable_mem_pattern = False
-                        so.enable_cpu_mem_arena = False
-                    except Exception:
-                        pass
-                    self.onnx_session = ort.InferenceSession(session_path, sess_options=so, providers=providers)
-                    self.model_type = 'onnx'
-                    logger.info("✅ ONNX model loaded successfully")
-                except Exception as onnx_error:
-                    logger.error(f"Failed to load ONNX model: {onnx_error}")
-                    return False
-            elif ext == '.torchscript':
-                if not TORCH_AVAILABLE:
-                    logger.warning("PyTorch not available in this build")
-                    logger.info("TorchScript model support disabled - this is normal for lightweight builds")
-                    return False
-                try:
-                    # Add safety check for torch being None
-                    if torch is None:
-                        logger.error("PyTorch module is None - cannot load TorchScript model")
-                        return False
-                    # Load TorchScript model
-                    self.model = torch.jit.load(model_path, map_location='cpu')
-                    self.model.eval()
-                    self.model_type = 'torch'
-                    if self.use_gpu:
-                        try:
-                            self.model = self.model.cuda()
-                        except Exception as gpu_error:
-                            logger.warning(f"Could not move TorchScript model to GPU: {gpu_error}")
-                    logger.info("✅ TorchScript model loaded successfully")
-                    # Optional FP16 precision on GPU
-                    if self.quantize_enabled and self.use_gpu and TORCH_AVAILABLE:
-                        try:
-                            self.model = self.model.half()
-                            logger.info("🔻 Applied FP16 precision to TorchScript model (GPU)")
-                        except Exception as _e:
-                            logger.warning(f"Could not switch TorchScript model to FP16: {_e}")
-                except Exception as torch_error:
-                    logger.error(f"Failed to load TorchScript model: {torch_error}")
-                    return False
-            else:
-                logger.error(f"Unsupported model format: {ext}")
-                logger.info("Supported formats: .pt/.pth (YOLOv8), .onnx (ONNX), .torchscript (TorchScript)")
-                return False
-            # Only set loaded if we actually succeeded
-            self.model_loaded = True
-            self.config['last_model_path'] = model_path
-            self.config['model_type'] = self.model_type
-            self._save_config()
-            return True
-        except Exception as e:
-            logger.error(f"Failed to load model: {e}")
-            logger.error(traceback.format_exc())
-            self.model_loaded = False
-            # Provide helpful context for .exe users
-            logger.info("Note: If running from .exe, some ML libraries may not be included")
-            logger.info("This is normal for lightweight builds - bubble detection will be disabled")
-            return False
-    def load_rtdetr_model(self, model_path: str = None, model_id: str = None, force_reload: bool = False) -> bool:
-        """
-        Load RT-DETR model for advanced bubble and text detection.
-        This implementation avoids the 'meta tensor' copy error by:
-        - Serializing the entire load under a class lock (no concurrent loads)
-        - Loading directly onto the target device (CUDA if available) via device_map='auto'
-        - Avoiding .to() on a potentially-meta model; no device migration post-load
-        Args:
-            model_path: Optional path to local model
-            model_id: Optional HuggingFace model ID (default: 'ogkalu/comic-text-and-bubble-detector')
-            force_reload: Force reload even if already loaded
-        Returns:
-            True if successful, False otherwise
-        """
-        if not TRANSFORMERS_AVAILABLE:
-            logger.error("Transformers library required for RT-DETR. Install with: pip install transformers")
-            return False
-        if not PIL_AVAILABLE:
-            logger.error("PIL required for RT-DETR. Install with: pip install pillow")
-            return False
-        if self.rtdetr_loaded and not force_reload:
-            logger.info("RT-DETR model already loaded")
-            return True
-        # Fast path: if shared already loaded and not forcing reload, attach
-        if BubbleDetector._rtdetr_loaded and not force_reload:
-            self.rtdetr_model = BubbleDetector._rtdetr_shared_model
-            self.rtdetr_processor = BubbleDetector._rtdetr_shared_processor
-            self.rtdetr_loaded = True
-            logger.info("RT-DETR model attached from shared cache")
-            return True
-        # Serialize the ENTIRE loading sequence to avoid concurrent init issues
-        with BubbleDetector._rtdetr_init_lock:
-            try:
-                # Re-check after acquiring lock
-                if BubbleDetector._rtdetr_loaded and not force_reload:
-                    self.rtdetr_model = BubbleDetector._rtdetr_shared_model
-                    self.rtdetr_processor = BubbleDetector._rtdetr_shared_processor
-                    self.rtdetr_loaded = True
-                    logger.info("RT-DETR model attached from shared cache (post-lock)")
-                    return True
-                # Use custom model_id if provided, otherwise use default
-                repo_id = model_id if model_id else self.rtdetr_repo
-                logger.info(f"📥 Loading RT-DETR model from {repo_id}...")
-                # Ensure TorchDynamo/compile doesn't interfere on some builds
-                try:
-                    os.environ.setdefault('TORCHDYNAMO_DISABLE', '1')
-                except Exception:
-                    pass
-                # Decide device strategy
-                gpu_available = bool(TORCH_AVAILABLE and hasattr(torch, 'cuda') and torch.cuda.is_available())
-                device_map = 'auto' if gpu_available else None
-                # Choose dtype
-                dtype = None
-                if TORCH_AVAILABLE:
-                    try:
-                        dtype = torch.float16 if gpu_available else torch.float32
-                    except Exception:
-                        dtype = None
-                low_cpu = True if gpu_available else False
-                # Load processor (once)
-                self.rtdetr_processor = RTDetrImageProcessor.from_pretrained(
-                    repo_id,
-                    size={"width": 640, "height": 640},
-                    cache_dir=self.cache_dir if not model_path else None
-                )
-                # Prepare kwargs for from_pretrained
-                from_kwargs = {
-                    'cache_dir': self.cache_dir if not model_path else None,
-                    'low_cpu_mem_usage': low_cpu,
-                    'device_map': device_map,
-                }
-                if dtype is not None:
-                    from_kwargs['dtype'] = dtype
-                # First attempt: load directly to target (CUDA if available)
-                try:
-                    self.rtdetr_model = RTDetrForObjectDetection.from_pretrained(
-                        model_path if model_path else repo_id,
-                        **from_kwargs,
-                    )
-                except Exception as primary_err:
-                    # Fallback to a simple CPU load (no device move) if CUDA path fails
-                    logger.warning(f"RT-DETR primary load failed ({primary_err}); retrying on CPU...")
-                    from_kwargs_fallback = {
-                        'cache_dir': self.cache_dir if not model_path else None,
-                        'low_cpu_mem_usage': False,
-                        'device_map': None,
-                    }
-                    if TORCH_AVAILABLE:
-                        from_kwargs_fallback['dtype'] = torch.float32
-                    self.rtdetr_model = RTDetrForObjectDetection.from_pretrained(
-                        model_path if model_path else repo_id,
-                        **from_kwargs_fallback,
-                    )
-                # Optional dynamic quantization for linear layers (CPU only)
-                if self.quantize_enabled and TORCH_AVAILABLE and (not gpu_available):
-                    try:
-                        try:
-                            import torch.ao.quantization as tq
-                            quantize_dynamic = tq.quantize_dynamic  # type: ignore
-                        except Exception:
-                            import torch.quantization as tq  # type: ignore
-                            quantize_dynamic = tq.quantize_dynamic  # type: ignore
-                        self.rtdetr_model = quantize_dynamic(self.rtdetr_model, {torch.nn.Linear}, dtype=torch.qint8)
-                        logger.info("🔻 Applied dynamic INT8 quantization to RT-DETR linear layers (CPU)")
-                    except Exception as qe:
-                        logger.warning(f"RT-DETR dynamic quantization skipped: {qe}")
-                # Finalize
-                self.rtdetr_model.eval()
-                # Sanity check: ensure no parameter is left on 'meta' device
-                try:
-                    for n, p in self.rtdetr_model.named_parameters():
-                        dev = getattr(p, 'device', None)
-                        if dev is not None and getattr(dev, 'type', '') == 'meta':
-                            raise RuntimeError(f"Parameter {n} is on 'meta' device after load")
-                except Exception as e:
-                    logger.error(f"RT-DETR load sanity check failed: {e}")
-                    self.rtdetr_loaded = False
-                    return False
-                # Publish shared cache
-                BubbleDetector._rtdetr_shared_model = self.rtdetr_model
-                BubbleDetector._rtdetr_shared_processor = self.rtdetr_processor
-                BubbleDetector._rtdetr_loaded = True
-                BubbleDetector._rtdetr_repo_id = repo_id
-                self.rtdetr_loaded = True
-                # Save the model ID that was used
-                self.config['rtdetr_loaded'] = True
-                self.config['rtdetr_model_id'] = repo_id
-                self._save_config()
-                loc = 'CUDA' if gpu_available else 'CPU'
-                logger.info(f"✅ RT-DETR model loaded successfully ({loc})")
-                logger.info("   Classes: Empty bubbles, Text bubbles, Free text")
-                # Auto-convert to ONNX for RT-DETR only if explicitly enabled
-                if os.environ.get('AUTO_CONVERT_RTDETR_ONNX', 'false').lower() == 'true':
-                    onnx_path = os.path.join(self.cache_dir, 'rtdetr_comic.onnx')
-                    if self.convert_to_onnx('rtdetr', onnx_path):
-                        logger.info("🚀 RT-DETR converted to ONNX for faster inference")
-                        # Store ONNX path for later use
-                        self.config['rtdetr_onnx_path'] = onnx_path
-                        self._save_config()
-                        # Optionally quantize ONNX for reduced RAM
-                        if self.onnx_quantize_enabled:
-                            try:
-                                from onnxruntime.quantization import quantize_dynamic, QuantType
-                                quant_path = os.path.splitext(onnx_path)[0] + ".int8.onnx"
-                                if not os.path.exists(quant_path) or os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true':
-                                    logger.info("🔻 Quantizing RT-DETR ONNX to INT8 (dynamic)...")
-                                    quantize_dynamic(model_input=onnx_path, model_output=quant_path, weight_type=QuantType.QInt8, op_types_to_quantize=['Conv', 'MatMul'])
-                                self.config['rtdetr_onnx_quantized_path'] = quant_path
-                                self._save_config()
-                                logger.info(f"✅ Quantized RT-DETR ONNX saved to: {quant_path}")
-                            except Exception as qe:
-                                logger.warning(f"ONNX quantization for RT-DETR skipped: {qe}")
-                    else:
-                        logger.info("ℹ️ Skipping RT-DETR ONNX export (converter not supported in current environment)")
-                return True
-            except Exception as e:
-                logger.error(f"❌ Failed to load RT-DETR: {e}")
-                self.rtdetr_loaded = False
-                return False
-    def check_rtdetr_available(self, model_id: str = None) -> bool:
-        """
-        Check if RT-DETR model is available (cached).
-        Args:
-            model_id: Optional HuggingFace model ID
-        Returns:
-            True if model is cached and available
-        """
-        try:
-            from pathlib import Path
-            # Use provided model_id or default
-            repo_id = model_id if model_id else self.rtdetr_repo
-            # Check HuggingFace cache
-            cache_dir = Path.home() / ".cache" / "huggingface" / "hub"
-            model_id_formatted = repo_id.replace("/", "--")
-            # Look for model folder
-            model_folders = list(cache_dir.glob(f"models--{model_id_formatted}*"))
-            if model_folders:
-                for folder in model_folders:
-                    if (folder / "snapshots").exists():
-                        snapshots = list((folder / "snapshots").iterdir())
-                        if snapshots:
-                            return True
-            return False
-        except Exception:
-            return False
-    def detect_bubbles(self,
-                      image_path: str,
-                      confidence: float = None,
-                      iou_threshold: float = None,
-                      max_detections: int = None,
-                      use_rtdetr: bool = None) -> List[Tuple[int, int, int, int]]:
-        """
-        Detect speech bubbles in an image (backward compatible method).
-        Args:
-            image_path: Path to image file
-            confidence: Minimum confidence threshold (0-1)
-            iou_threshold: IOU threshold for NMS (0-1)
-            max_detections: Maximum number of detections to return
-            use_rtdetr: If True, use RT-DETR instead of YOLOv8 (if available)
-        Returns:
-            List of bubble bounding boxes as (x, y, width, height) tuples
-        """
-        # Check for stop at start
-        if self._check_stop():
-            self._log("⏹️ Bubble detection stopped by user", "warning")
-            return []
-        # Decide which model to use
-        if use_rtdetr is None:
-            # Auto-select: prefer RT-DETR if available
-            use_rtdetr = self.rtdetr_loaded
-        if use_rtdetr:
-            # Prefer ONNX backend if available, else PyTorch
-            if getattr(self, 'rtdetr_onnx_loaded', False):
-                results = self.detect_with_rtdetr_onnx(
-                    image_path=image_path,
-                    confidence=confidence,
-                    return_all_bubbles=True
-                )
-                return results
-            if self.rtdetr_loaded:
-                results = self.detect_with_rtdetr(
-                    image_path=image_path,
-                    confidence=confidence,
-                    return_all_bubbles=True
-                )
-                return results
-        # Original YOLOv8 detection
-        if not self.model_loaded:
-            logger.error("No model loaded. Call load_model() first.")
-            return []
-        # Use defaults if not specified
-        confidence = confidence or self.default_confidence
-        iou_threshold = iou_threshold or self.default_iou_threshold
-        max_detections = max_detections or self.default_max_detections
-        try:
-            # Load image
-            image = cv2.imread(image_path)
-            if image is None:
-                logger.error(f"Failed to load image: {image_path}")
-                return []
-            h, w = image.shape[:2]
-            self._log(f"🔍 Detecting bubbles in {w}x{h} image")
-            # Check for stop before inference
-            if self._check_stop():
-                self._log("⏹️ Bubble detection inference stopped by user", "warning")
-                return []
-            if self.model_type == 'yolo':
-                # YOLOv8 inference
-                results = self.model(
-                    image_path,
-                    conf=confidence,
-                    iou=iou_threshold,
-                    max_det=min(max_detections, getattr(self, 'max_det_yolo', max_detections)),
-                    verbose=False
-                )
-                bubbles = []
-                for r in results:
-                    if r.boxes is not None:
-                        for box in r.boxes:
-                            # Get box coordinates
-                            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
-                            x, y = int(x1), int(y1)
-                            width = int(x2 - x1)
-                            height = int(y2 - y1)
-                            # Get confidence
-                            conf = float(box.conf[0])
-                            # Add to list
-                            if len(bubbles) < max_detections:
-                                bubbles.append((x, y, width, height))
-                            logger.debug(f"   Bubble: ({x},{y}) {width}x{height} conf={conf:.2f}")
-            elif self.model_type == 'onnx':
-                # ONNX inference
-                bubbles = self._detect_with_onnx(image, confidence, iou_threshold, max_detections)
-            elif self.model_type == 'torch':
-                # TorchScript inference
-                bubbles = self._detect_with_torchscript(image, confidence, iou_threshold, max_detections)
-            else:
-                logger.error(f"Unknown model type: {self.model_type}")
-                return []
-            logger.info(f"✅ Detected {len(bubbles)} speech bubbles")
-            time.sleep(0.1)  # Brief pause for stability
-            logger.debug("💤 Bubble detection pausing briefly for stability")
-            return bubbles
-        except Exception as e:
-            logger.error(f"Detection failed: {e}")
-            logger.error(traceback.format_exc())
-            return []
-    def detect_with_rtdetr(self,
-                          image_path: str = None,
-                          image: np.ndarray = None,
-                          confidence: float = None,
-                          return_all_bubbles: bool = False) -> Any:
-        """
-        Detect using RT-DETR model with 3-class detection (PyTorch backend).
-        Args:
-            image_path: Path to image file
-            image: Image array (BGR format)
-            confidence: Confidence threshold
-            return_all_bubbles: If True, return list of bubble boxes (for compatibility)
-                               If False, return dict with all classes
-        Returns:
-            List of bubbles if return_all_bubbles=True, else dict with classes
-        """
-        # Check for stop at start
-        if self._check_stop():
-            self._log("⏹️ RT-DETR detection stopped by user", "warning")
-            if return_all_bubbles:
-                return []
-            return {'bubbles': [], 'text_bubbles': [], 'text_free': []}
-        if not self.rtdetr_loaded:
-            self._log("RT-DETR not loaded. Call load_rtdetr_model() first.", "warning")
-            if return_all_bubbles:
-                return []
-            return {'bubbles': [], 'text_bubbles': [], 'text_free': []}
-        confidence = confidence or self.default_confidence
-        try:
-            # Load image
-            if image_path:
-                image = cv2.imread(image_path)
-            elif image is None:
-                logger.error("No image provided")
-                if return_all_bubbles:
-                    return []
-                return {'bubbles': [], 'text_bubbles': [], 'text_free': []}
-            # Convert BGR to RGB for PIL
-            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-            pil_image = Image.fromarray(image_rgb)
-            # Prepare image for model
-            inputs = self.rtdetr_processor(images=pil_image, return_tensors="pt")
-            # Move inputs to the same device as the model and match model dtype for floating tensors
-            model_device = next(self.rtdetr_model.parameters()).device if self.rtdetr_model is not None else (torch.device('cpu') if TORCH_AVAILABLE else 'cpu')
-            model_dtype = None
-            if TORCH_AVAILABLE and self.rtdetr_model is not None:
-                try:
-                    model_dtype = next(self.rtdetr_model.parameters()).dtype
-                except Exception:
-                    model_dtype = None
-            if TORCH_AVAILABLE:
-                new_inputs = {}
-                for k, v in inputs.items():
-                    if isinstance(v, torch.Tensor):
-                        v = v.to(model_device)
-                        if model_dtype is not None and torch.is_floating_point(v):
-                            v = v.to(model_dtype)
-                    new_inputs[k] = v
-                inputs = new_inputs
-            # Run inference with autocast when model is half/bfloat16 on CUDA
-            use_amp = TORCH_AVAILABLE and hasattr(model_device, 'type') and model_device.type == 'cuda' and (model_dtype in (torch.float16, torch.bfloat16))
-            autocast_dtype = model_dtype if model_dtype in (torch.float16, torch.bfloat16) else None
-            with torch.no_grad():
-                if use_amp and autocast_dtype is not None:
-                    with torch.autocast('cuda', dtype=autocast_dtype):
-                        outputs = self.rtdetr_model(**inputs)
-                else:
-                    outputs = self.rtdetr_model(**inputs)
-                # Brief pause for stability after inference
-                time.sleep(0.1)
-                logger.debug("💤 RT-DETR inference pausing briefly for stability")
-            # Post-process results
-            target_sizes = torch.tensor([pil_image.size[::-1]]) if TORCH_AVAILABLE else None
-            if TORCH_AVAILABLE and hasattr(model_device, 'type') and model_device.type == "cuda":
-                target_sizes = target_sizes.to(model_device)
-            results = self.rtdetr_processor.post_process_object_detection(
-                outputs,
-                target_sizes=target_sizes,
-                threshold=confidence
-            )[0]
-            # Apply per-detector cap if configured
-            cap = getattr(self, 'max_det_rtdetr', self.default_max_detections)
-            if cap and len(results['boxes']) > cap:
-                # Keep top-scoring first
-                scores = results['scores']
-                top_idx = scores.topk(k=cap).indices if hasattr(scores, 'topk') else range(cap)
-                results = {
-                    'boxes': [results['boxes'][i] for i in top_idx],
-                    'scores': [results['scores'][i] for i in top_idx],
-                    'labels': [results['labels'][i] for i in top_idx]
-                }
-            logger.info(f"📊 RT-DETR found {len(results['boxes'])} detections above {confidence:.2f} confidence")
-            # Organize detections by class
-            detections = {
-                'bubbles': [],       # Empty speech bubbles
-                'text_bubbles': [],  # Bubbles with text
-                'text_free': []      # Text without bubbles
-            }
-            for box, score, label in zip(results['boxes'], results['scores'], results['labels']):
-                x1, y1, x2, y2 = map(int, box.tolist())
-                width = x2 - x1
-                height = y2 - y1
-                # Store as (x, y, width, height) to match YOLOv8 format
-                bbox = (x1, y1, width, height)
-                label_id = label.item()
-                if label_id == self.CLASS_BUBBLE:
-                    detections['bubbles'].append(bbox)
-                elif label_id == self.CLASS_TEXT_BUBBLE:
-                    detections['text_bubbles'].append(bbox)
-                elif label_id == self.CLASS_TEXT_FREE:
-                    detections['text_free'].append(bbox)
-                # Stop early if we hit the configured cap across all classes
-                total_count = len(detections['bubbles']) + len(detections['text_bubbles']) + len(detections['text_free'])
-                if total_count >= (self.config.get('manga_settings', {}).get('ocr', {}).get('bubble_max_detections', self.default_max_detections) if isinstance(self.config, dict) else self.default_max_detections):
-                    break
-            # Log results
-            total = len(detections['bubbles']) + len(detections['text_bubbles']) + len(detections['text_free'])
-            logger.info(f"✅ RT-DETR detected {total} objects:")
-            logger.info(f"   - Empty bubbles: {len(detections['bubbles'])}")
-            logger.info(f"   - Text bubbles: {len(detections['text_bubbles'])}")
-            logger.info(f"   - Free text: {len(detections['text_free'])}")
-            # Return format based on compatibility mode
-            if return_all_bubbles:
-                # Return all bubbles (empty + with text) for backward compatibility
-                all_bubbles = detections['bubbles'] + detections['text_bubbles']
-                return all_bubbles
-            else:
-                return detections
-        except Exception as e:
-            logger.error(f"RT-DETR detection failed: {e}")
-            logger.error(traceback.format_exc())
-            if return_all_bubbles:
-                return []
-            return {'bubbles': [], 'text_bubbles': [], 'text_free': []}
-    def detect_all_text_regions(self, image_path: str = None, image: np.ndarray = None) -> List[Tuple[int, int, int, int]]:
-        """
-        Detect all text regions using RT-DETR (both in bubbles and free text).
-        Returns:
-            List of bounding boxes for all text regions
-        """
-        if not self.rtdetr_loaded:
-            logger.warning("RT-DETR required for text detection")
-            return []
-        detections = self.detect_with_rtdetr(image_path=image_path, image=image, return_all_bubbles=False)
-        # Combine text bubbles and free text
-        all_text = detections['text_bubbles'] + detections['text_free']
-        logger.info(f"📝 Found {len(all_text)} text regions total")
-        return all_text
-    def _detect_with_onnx(self, image: np.ndarray, confidence: float,
-                         iou_threshold: float, max_detections: int) -> List[Tuple[int, int, int, int]]:
-        """Run detection using ONNX model."""
-        # Preprocess image
-        img_size = 640  # Standard YOLOv8 input size
-        img_resized = cv2.resize(image, (img_size, img_size))
-        img_norm = img_resized.astype(np.float32) / 255.0
-        img_transposed = np.transpose(img_norm, (2, 0, 1))
-        img_batch = np.expand_dims(img_transposed, axis=0)
-        # Run inference
-        input_name = self.onnx_session.get_inputs()[0].name
-        outputs = self.onnx_session.run(None, {input_name: img_batch})
-        # Process outputs (YOLOv8 format)
-        predictions = outputs[0][0]  # Remove batch dimension
-        # Filter by confidence and apply NMS
-        bubbles = []
-        boxes = []
-        scores = []
-        for pred in predictions.T:  # Transpose to get predictions per detection
-            if len(pred) >= 5:
-                x_center, y_center, width, height, obj_conf = pred[:5]
-                if obj_conf >= confidence:
-                    # Convert to corner coordinates
-                    x1 = x_center - width / 2
-                    y1 = y_center - height / 2
-                    # Scale to original image size
-                    h, w = image.shape[:2]
-                    x1 = int(x1 * w / img_size)
-                    y1 = int(y1 * h / img_size)
-                    width = int(width * w / img_size)
-                    height = int(height * h / img_size)
-                    boxes.append([x1, y1, x1 + width, y1 + height])
-                    scores.append(float(obj_conf))
-        # Apply NMS
-        if boxes:
-            indices = cv2.dnn.NMSBoxes(boxes, scores, confidence, iou_threshold)
-            if len(indices) > 0:
-                indices = indices.flatten()[:max_detections]
-                for i in indices:
-                    x1, y1, x2, y2 = boxes[i]
-                    bubbles.append((x1, y1, x2 - x1, y2 - y1))
-        return bubbles
-    def _detect_with_torchscript(self, image: np.ndarray, confidence: float,
-                                 iou_threshold: float, max_detections: int) -> List[Tuple[int, int, int, int]]:
-        """Run detection using TorchScript model."""
-        # Similar to ONNX but using PyTorch tensors
-        img_size = 640
-        img_resized = cv2.resize(image, (img_size, img_size))
-        img_norm = img_resized.astype(np.float32) / 255.0
-        img_tensor = torch.from_numpy(img_norm).permute(2, 0, 1).unsqueeze(0)
-        if self.use_gpu:
-            img_tensor = img_tensor.cuda()
-        with torch.no_grad():
-            outputs = self.model(img_tensor)
-        # Process outputs similar to ONNX
-        # Implementation depends on exact model output format
-        # This is a placeholder - adjust based on your model
-        return []
-    def visualize_detections(self, image_path: str, bubbles: List[Tuple[int, int, int, int]] = None,
-                            output_path: str = None, use_rtdetr: bool = False) -> np.ndarray:
-        """
-        Visualize detected bubbles on the image.
-        Args:
-            image_path: Path to original image
-            bubbles: List of bubble bounding boxes (if None, will detect)
-            output_path: Optional path to save visualization
-            use_rtdetr: Use RT-DETR for visualization with class colors
-        Returns:
-            Image with drawn bounding boxes
-        """
-        image = cv2.imread(image_path)
-        if image is None:
-            logger.error(f"Failed to load image: {image_path}")
-            return None
-        vis_image = image.copy()
-        if use_rtdetr and self.rtdetr_loaded:
-            # RT-DETR visualization with different colors per class
-            detections = self.detect_with_rtdetr(image_path=image_path, return_all_bubbles=False)
-            # Colors for each class
-            colors = {
-                'bubbles': (0, 255, 0),       # Green for empty bubbles
-                'text_bubbles': (255, 0, 0),  # Blue for text bubbles
-                'text_free': (0, 0, 255)      # Red for free text
-            }
-            # Draw detections
-            for class_name, bboxes in detections.items():
-                color = colors[class_name]
-                for i, (x, y, w, h) in enumerate(bboxes):
-                    # Draw rectangle
-                    cv2.rectangle(vis_image, (x, y), (x + w, y + h), color, 2)
-                    # Add label
-                    label = f"{class_name.replace('_', ' ').title()} {i+1}"
-                    label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
-                    cv2.rectangle(vis_image, (x, y - label_size[1] - 4),
-                                (x + label_size[0], y), color, -1)
-                    cv2.putText(vis_image, label, (x, y - 2),
-                              cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
-        else:
-            # Original YOLOv8 visualization
-            if bubbles is None:
-                bubbles = self.detect_bubbles(image_path)
-            # Draw bounding boxes
-            for i, (x, y, w, h) in enumerate(bubbles):
-                # Draw rectangle
-                color = (0, 255, 0)  # Green
-                thickness = 2
-                cv2.rectangle(vis_image, (x, y), (x + w, y + h), color, thickness)
-                # Add label
-                label = f"Bubble {i+1}"
-                label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
-                cv2.rectangle(vis_image, (x, y - label_size[1] - 4), (x + label_size[0], y), color, -1)
-                cv2.putText(vis_image, label, (x, y - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
-        # Save if output path provided
-        if output_path:
-            cv2.imwrite(output_path, vis_image)
-            logger.info(f"💾 Visualization saved to: {output_path}")
-        return vis_image
-    def convert_to_onnx(self, model_path: str, output_path: str = None) -> bool:
-        """
-        Convert a YOLOv8 or RT-DETR model to ONNX format.
-        Args:
-            model_path: Path to model file or 'rtdetr' for loaded RT-DETR
-            output_path: Path for ONNX output (auto-generated if None)
-        Returns:
-            True if conversion successful, False otherwise
-        """
-        try:
-            logger.info(f"🔄 Converting {model_path} to ONNX...")
-            # Generate output path if not provided
-            if output_path is None:
-                if model_path == 'rtdetr' and self.rtdetr_loaded:
-                    base_name = 'rtdetr_comic'
-                else:
-                    base_name = Path(model_path).stem
-                output_path = os.path.join(self.cache_dir, f"{base_name}.onnx")
-            # Check if already exists
-            if os.path.exists(output_path) and not os.environ.get('FORCE_ONNX_REBUILD', 'false').lower() == 'true':
-                logger.info(f"✅ ONNX model already exists: {output_path}")
-                return True
-            # Handle RT-DETR conversion
-            if model_path == 'rtdetr' and self.rtdetr_loaded:
-                if not TORCH_AVAILABLE:
-                    logger.error("PyTorch required for RT-DETR ONNX conversion")
-                    return False
-                # RT-DETR specific conversion
-                self.rtdetr_model.eval()
-                # Create dummy input (pixel values): BxCxHxW
-                dummy_input = torch.randn(1, 3, 640, 640)
-                if self.device == 'cuda':
-                    dummy_input = dummy_input.to('cuda')
-                # Wrap the model to return only tensors (logits, pred_boxes)
-                class _RTDetrExportWrapper(torch.nn.Module):
-                    def __init__(self, mdl):
-                        super().__init__()
-                        self.mdl = mdl
-                    def forward(self, images):
-                        out = self.mdl(pixel_values=images)
-                        # Handle dict/ModelOutput/tuple outputs
-                        logits = None
-                        boxes = None
-                        try:
-                            if isinstance(out, dict):
-                                logits = out.get('logits', None)
-                                boxes = out.get('pred_boxes', out.get('boxes', None))
-                            else:
-                                logits = getattr(out, 'logits', None)
-                                boxes = getattr(out, 'pred_boxes', getattr(out, 'boxes', None))
-                        except Exception:
-                            pass
-                        if (logits is None or boxes is None) and isinstance(out, (tuple, list)) and len(out) >= 2:
-                            logits, boxes = out[0], out[1]
-                        return logits, boxes
-                wrapper = _RTDetrExportWrapper(self.rtdetr_model)
-                if self.device == 'cuda':
-                    wrapper = wrapper.to('cuda')
-                # Try PyTorch 2.x dynamo_export first (more tolerant of newer aten ops)
-                try:
-                    success = False
-                    try:
-                        from torch.onnx import dynamo_export
-                        try:
-                            exp = dynamo_export(wrapper, dummy_input)
-                        except TypeError:
-                            # Older PyTorch dynamo_export may not support this calling convention
-                            exp = dynamo_export(wrapper, dummy_input)
-                        # exp may have save(); otherwise, it may expose model_proto
-                        try:
-                            exp.save(output_path)  # type: ignore
-                            success = True
-                        except Exception:
-                            try:
-                                import onnx as _onnx
-                                _onnx.save(exp.model_proto, output_path)  # type: ignore
-                                success = True
-                            except Exception as _se:
-                                logger.warning(f"dynamo_export produced model but could not save: {_se}")
-                    except Exception as de:
-                        logger.warning(f"dynamo_export failed; falling back to legacy exporter: {de}")
-                    if success:
-                        logger.info(f"✅ RT-DETR ONNX saved to: {output_path} (dynamo_export)")
-                        return True
-                except Exception as de2:
-                    logger.warning(f"dynamo_export path error: {de2}")
-                # Legacy exporter with opset fallback
-                last_err = None
-                for opset in [19, 18, 17, 16, 15, 14, 13]:
-                    try:
-                        torch.onnx.export(
-                            wrapper,
-                            dummy_input,
-                            output_path,
-                            export_params=True,
-                            opset_version=opset,
-                            do_constant_folding=True,
-                            input_names=['pixel_values'],
-                            output_names=['logits', 'boxes'],
-                            dynamic_axes={
-                                'pixel_values': {0: 'batch', 2: 'height', 3: 'width'},
-                                'logits': {0: 'batch'},
-                                'boxes': {0: 'batch'}
-                            }
-                        )
-                        logger.info(f"✅ RT-DETR ONNX saved to: {output_path} (opset {opset})")
-                        return True
-                    except Exception as _e:
-                        last_err = _e
-                        try:
-                            msg = str(_e)
-                        except Exception:
-                            msg = ''
-                        logger.warning(f"RT-DETR ONNX export failed at opset {opset}: {msg}")
-                        continue
-                logger.error(f"All RT-DETR ONNX export attempts failed. Last error: {last_err}")
-                return False
-            # Handle YOLOv8 conversion - FIXED
-            elif YOLO_AVAILABLE and os.path.exists(model_path):
-                logger.info(f"Loading YOLOv8 model from: {model_path}")
-                # Load model
-                model = YOLO(model_path)
-                # Export to ONNX - this returns the path to the exported model
-                logger.info("Exporting to ONNX format...")
-                exported_path = model.export(format='onnx', imgsz=640, simplify=True)
-                # exported_path could be a string or Path object
-                exported_path = str(exported_path) if exported_path else None
-                if exported_path and os.path.exists(exported_path):
-                    # Move to desired location if different
-                    if exported_path != output_path:
-                        import shutil
-                        logger.info(f"Moving ONNX from {exported_path} to {output_path}")
-                        shutil.move(exported_path, output_path)
-                    logger.info(f"✅ YOLOv8 ONNX saved to: {output_path}")
-                    return True
-                else:
-                    # Fallback: check if it was created with expected name
-                    expected_onnx = model_path.replace('.pt', '.onnx')
-                    if os.path.exists(expected_onnx):
-                        if expected_onnx != output_path:
-                            import shutil
-                            shutil.move(expected_onnx, output_path)
-                        logger.info(f"✅ YOLOv8 ONNX saved to: {output_path}")
-                        return True
-                    else:
-                        logger.error(f"ONNX export failed - no output file found")
-                        return False
-            else:
-                logger.error(f"Cannot convert {model_path}: Model not found or dependencies missing")
-                return False
-        except Exception as e:
-            logger.error(f"Conversion failed: {e}")
-            # Avoid noisy full stack trace in production logs; return False gracefully
-            return False
-    def batch_detect(self, image_paths: List[str], **kwargs) -> Dict[str, List[Tuple[int, int, int, int]]]:
-        """
-        Detect bubbles in multiple images.
-        Args:
-            image_paths: List of image paths
-            **kwargs: Detection parameters (confidence, iou_threshold, max_detections, use_rtdetr)
-        Returns:
-            Dictionary mapping image paths to bubble lists
-        """
-        results = {}
-        for i, image_path in enumerate(image_paths):
-            logger.info(f"Processing image {i+1}/{len(image_paths)}: {os.path.basename(image_path)}")
-            bubbles = self.detect_bubbles(image_path, **kwargs)
-            results[image_path] = bubbles
-        return results
-    def unload(self, release_shared: bool = False):
-        """Release model resources held by this detector instance.
-        Args:
-            release_shared: If True, also clear class-level shared RT-DETR caches.
-        """
-        try:
-            # Release instance-level models and sessions
-            try:
-                if getattr(self, 'onnx_session', None) is not None:
-                    self.onnx_session = None
-            except Exception:
-                pass
-            try:
-                if getattr(self, 'rtdetr_onnx_session', None) is not None:
-                    self.rtdetr_onnx_session = None
-            except Exception:
-                pass
-            for attr in ['model', 'rtdetr_model', 'rtdetr_processor']:
-                try:
-                    if hasattr(self, attr):
-                        setattr(self, attr, None)
-                except Exception:
-                    pass
-            for flag in ['model_loaded', 'rtdetr_loaded', 'rtdetr_onnx_loaded']:
-                try:
-                    if hasattr(self, flag):
-                        setattr(self, flag, False)
-                except Exception:
-                    pass
-            # Optional: release shared caches
-            if release_shared:
-                try:
-                    BubbleDetector._rtdetr_shared_model = None
-                    BubbleDetector._rtdetr_shared_processor = None
-                    BubbleDetector._rtdetr_loaded = False
-                except Exception:
-                    pass
-            # Free CUDA cache and trigger GC
-            try:
-                if TORCH_AVAILABLE and torch is not None and torch.cuda.is_available():
-                    torch.cuda.empty_cache()
-            except Exception:
-                pass
-            try:
-                import gc
-                gc.collect()
-            except Exception:
-                pass
-        except Exception:
-            # Best-effort only
-            pass
-    def get_bubble_masks(self, image_path: str, bubbles: List[Tuple[int, int, int, int]]) -> np.ndarray:
-        """
-        Create a mask image with bubble regions.
-        Args:
-            image_path: Path to original image
-            bubbles: List of bubble bounding boxes
-        Returns:
-            Binary mask with bubble regions as white (255)
-        """
-        image = cv2.imread(image_path)
-        if image is None:
-            return None
-        h, w = image.shape[:2]
-        mask = np.zeros((h, w), dtype=np.uint8)
-        # Fill bubble regions
-        for x, y, bw, bh in bubbles:
-            cv2.rectangle(mask, (x, y), (x + bw, y + bh), 255, -1)
-        return mask
-    def filter_bubbles_by_size(self, bubbles: List[Tuple[int, int, int, int]],
-                              min_area: int = 100,
-                              max_area: int = None) -> List[Tuple[int, int, int, int]]:
-        """
-        Filter bubbles by area.
-        Args:
-            bubbles: List of bubble bounding boxes
-            min_area: Minimum area in pixels
-            max_area: Maximum area in pixels (None for no limit)
-        Returns:
-            Filtered list of bubbles
-        """
-        filtered = []
-        for x, y, w, h in bubbles:
-            area = w * h
-            if area >= min_area and (max_area is None or area <= max_area):
-                filtered.append((x, y, w, h))
-        return filtered
-    def merge_overlapping_bubbles(self, bubbles: List[Tuple[int, int, int, int]],
-                                 overlap_threshold: float = 0.1) -> List[Tuple[int, int, int, int]]:
-        """
-        Merge overlapping bubble detections.
-        Args:
-            bubbles: List of bubble bounding boxes
-            overlap_threshold: Minimum overlap ratio to merge
-        Returns:
-            Merged list of bubbles
-        """
-        if not bubbles:
-            return []
-        # Convert to numpy array for easier manipulation
-        boxes = np.array([(x, y, x+w, y+h) for x, y, w, h in bubbles])
-        merged = []
-        used = set()
-        for i, box1 in enumerate(boxes):
-            if i in used:
-                continue
-            # Start with current box
-            x1, y1, x2, y2 = box1
-            # Check for overlaps with remaining boxes
-            for j in range(i + 1, len(boxes)):
-                if j in used:
-                    continue
-                box2 = boxes[j]
-                # Calculate intersection
-                ix1 = max(x1, box2[0])
-                iy1 = max(y1, box2[1])
-                ix2 = min(x2, box2[2])
-                iy2 = min(y2, box2[3])
-                if ix1 < ix2 and iy1 < iy2:
-                    # Calculate overlap ratio
-                    intersection = (ix2 - ix1) * (iy2 - iy1)
-                    area1 = (x2 - x1) * (y2 - y1)
-                    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
-                    overlap = intersection / min(area1, area2)
-                    if overlap >= overlap_threshold:
-                        # Merge boxes
-                        x1 = min(x1, box2[0])
-                        y1 = min(y1, box2[1])
-                        x2 = max(x2, box2[2])
-                        y2 = max(y2, box2[3])
-                        used.add(j)
-            merged.append((int(x1), int(y1), int(x2 - x1), int(y2 - y1)))
-        return merged
-    # ============================
-    # RT-DETR (ONNX) BACKEND
-    # ============================
-    def load_rtdetr_onnx_model(self, model_id: str = None, force_reload: bool = False) -> bool:
-        """
-        Load RT-DETR ONNX model using onnxruntime. Downloads detector.onnx and config.json
-        from the provided Hugging Face repo if not already cached.
-        """
-        if not ONNX_AVAILABLE:
-            logger.error("ONNX Runtime not available for RT-DETR ONNX backend")
-            return False
-        try:
-            # If singleton mode and already loaded, just attach shared session
-            try:
-                adv = (self.config or {}).get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {}
-                singleton = bool(adv.get('use_singleton_models', True))
-            except Exception:
-                singleton = True
-            if singleton and BubbleDetector._rtdetr_onnx_loaded and not force_reload and BubbleDetector._rtdetr_onnx_shared_session is not None:
-                self.rtdetr_onnx_session = BubbleDetector._rtdetr_onnx_shared_session
-                self.rtdetr_onnx_loaded = True
-                return True
-            repo = model_id or self.rtdetr_onnx_repo
-            try:
-                from huggingface_hub import hf_hub_download
-            except Exception as e:
-                logger.error(f"huggingface-hub required to fetch RT-DETR ONNX: {e}")
-                return False
-            # Ensure local models dir (use configured cache_dir directly: e.g., 'models')
-            cache_dir = self.cache_dir
-            os.makedirs(cache_dir, exist_ok=True)
-            # Download files into models/ and avoid symlinks so the file is visible there
-            try:
-                _ = hf_hub_download(repo_id=repo, filename='config.json', cache_dir=cache_dir, local_dir=cache_dir, local_dir_use_symlinks=False)
-            except Exception:
-                pass
-            onnx_fp = hf_hub_download(repo_id=repo, filename='detector.onnx', cache_dir=cache_dir, local_dir=cache_dir, local_dir_use_symlinks=False)
-            BubbleDetector._rtdetr_onnx_model_path = onnx_fp
-            # Pick providers: prefer CUDA if available; otherwise CPU. Do NOT use DML.
-            providers = ['CPUExecutionProvider']
-            try:
-                avail = ort.get_available_providers() if ONNX_AVAILABLE else []
-                if 'CUDAExecutionProvider' in avail:
-                    providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-            except Exception:
-                pass
-            # Session options with reduced memory arena and optional thread limiting in singleton mode
-            so = ort.SessionOptions()
-            try:
-                so.enable_mem_pattern = False
-                so.enable_cpu_mem_arena = False
-            except Exception:
-                pass
-            # If singleton models mode is enabled in config, limit ORT threading to reduce CPU spikes
-            try:
-                adv = (self.config or {}).get('manga_settings', {}).get('advanced', {}) if isinstance(self.config, dict) else {}
-                if bool(adv.get('use_singleton_models', True)):
-                    so.intra_op_num_threads = 1
-                    so.inter_op_num_threads = 1
-                    try:
-                        so.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
-                    except Exception:
-                        pass
-                    try:
-                        so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC
-                    except Exception:
-                        pass
-            except Exception:
-                pass
-            # Create session (serialize creation in singleton mode to avoid device storms)
-            if singleton:
-                with BubbleDetector._rtdetr_onnx_init_lock:
-                    # Re-check after acquiring lock
-                    if BubbleDetector._rtdetr_onnx_loaded and BubbleDetector._rtdetr_onnx_shared_session is not None and not force_reload:
-                        self.rtdetr_onnx_session = BubbleDetector._rtdetr_onnx_shared_session
-                        self.rtdetr_onnx_loaded = True
-                        return True
-                    sess = ort.InferenceSession(onnx_fp, providers=providers, sess_options=so)
-                    BubbleDetector._rtdetr_onnx_shared_session = sess
-                    BubbleDetector._rtdetr_onnx_loaded = True
-                    BubbleDetector._rtdetr_onnx_providers = providers
-                    self.rtdetr_onnx_session = sess
-                    self.rtdetr_onnx_loaded = True
-            else:
-                self.rtdetr_onnx_session = ort.InferenceSession(onnx_fp, providers=providers, sess_options=so)
-                self.rtdetr_onnx_loaded = True
-            logger.info("✅ RT-DETR (ONNX) model ready")
-            return True
-        except Exception as e:
-            logger.error(f"Failed to load RT-DETR ONNX: {e}")
-            self.rtdetr_onnx_session = None
-            self.rtdetr_onnx_loaded = False
-            return False
-    def detect_with_rtdetr_onnx(self,
-                                image_path: str = None,
-                                image: np.ndarray = None,
-                                confidence: float = 0.3,
-                                return_all_bubbles: bool = False) -> Any:
-        """Detect using RT-DETR ONNX backend.
-        Returns bubbles list if return_all_bubbles else dict by classes similar to PyTorch path.
-        """
-        if not self.rtdetr_onnx_loaded or self.rtdetr_onnx_session is None:
-            logger.warning("RT-DETR ONNX not loaded")
-            return [] if return_all_bubbles else {'bubbles': [], 'text_bubbles': [], 'text_free': []}
-        try:
-            # Acquire image
-            if image_path is not None:
-                import cv2
-                image = cv2.imread(image_path)
-                if image is None:
-                    raise RuntimeError(f"Failed to read image: {image_path}")
-                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-            else:
-                if image is None:
-                    raise RuntimeError("No image provided")
-                # Assume image is BGR np.ndarray if from OpenCV
-                try:
-                    import cv2
-                    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-                except Exception:
-                    image_rgb = image
-            # To PIL then resize 640x640 as in reference
-            from PIL import Image as _PILImage
-            pil_image = _PILImage.fromarray(image_rgb)
-            im_resized = pil_image.resize((640, 640))
-            arr = np.asarray(im_resized, dtype=np.float32) / 255.0
-            arr = np.transpose(arr, (2, 0, 1))  # (3,H,W)
-            im_data = arr[np.newaxis, ...]
-            w, h = pil_image.size
-            orig_size = np.array([[w, h]], dtype=np.int64)
-            # Run with a concurrency guard to prevent device hangs and limit memory usage
-            # Apply semaphore for ALL providers (not just DML) to control concurrency
-            providers = BubbleDetector._rtdetr_onnx_providers or []
-            def _do_run(session):
-                return session.run(None, {
-                    'images': im_data,
-                    'orig_target_sizes': orig_size
-                })
-            # Always use semaphore to limit concurrent RT-DETR calls
-            acquired = False
-            try:
-                BubbleDetector._rtdetr_onnx_sema.acquire()
-                acquired = True
-                # Special DML error handling
-                if 'DmlExecutionProvider' in providers:
-                    try:
-                        outputs = _do_run(self.rtdetr_onnx_session)
-                    except Exception as dml_err:
-                        msg = str(dml_err)
-                        if '887A0005' in msg or '887A0006' in msg or 'Dml' in msg:
-                            # Rebuild CPU session and retry once
-                            try:
-                                base_path = BubbleDetector._rtdetr_onnx_model_path
-                                if base_path:
-                                    so = ort.SessionOptions()
-                                    so.enable_mem_pattern = False
-                                    so.enable_cpu_mem_arena = False
-                                    cpu_providers = ['CPUExecutionProvider']
-                                    # Serialize rebuild
-                                    with BubbleDetector._rtdetr_onnx_init_lock:
-                                        sess = ort.InferenceSession(base_path, providers=cpu_providers, sess_options=so)
-                                        BubbleDetector._rtdetr_onnx_shared_session = sess
-                                        BubbleDetector._rtdetr_onnx_providers = cpu_providers
-                                        self.rtdetr_onnx_session = sess
-                                    outputs = _do_run(self.rtdetr_onnx_session)
-                                else:
-                                    raise
-                            except Exception:
-                                raise
-                        else:
-                            raise
-                else:
-                    # Non-DML providers - just run directly
-                    outputs = _do_run(self.rtdetr_onnx_session)
-            finally:
-                if acquired:
-                    try:
-                        BubbleDetector._rtdetr_onnx_sema.release()
-                    except Exception:
-                        pass
-            # outputs expected: labels, boxes, scores
-            labels, boxes, scores = outputs[:3]
-            if labels.ndim == 2 and labels.shape[0] == 1:
-                labels = labels[0]
-            if scores.ndim == 2 and scores.shape[0] == 1:
-                scores = scores[0]
-            if boxes.ndim == 3 and boxes.shape[0] == 1:
-                boxes = boxes[0]
-            detections = {'bubbles': [], 'text_bubbles': [], 'text_free': []}
-            bubbles_all = []
-            for lab, box, scr in zip(labels, boxes, scores):
-                if float(scr) < float(confidence):
-                    continue
-                x1, y1, x2, y2 = map(int, box)
-                bbox = (x1, y1, x2 - x1, y2 - y1)
-                label_id = int(lab)
-                if label_id == self.CLASS_BUBBLE:
-                    detections['bubbles'].append(bbox)
-                    bubbles_all.append(bbox)
-                elif label_id == self.CLASS_TEXT_BUBBLE:
-                    detections['text_bubbles'].append(bbox)
-                    bubbles_all.append(bbox)
-                elif label_id == self.CLASS_TEXT_FREE:
-                    detections['text_free'].append(bbox)
-            return bubbles_all if return_all_bubbles else detections
-        except Exception as e:
-            logger.error(f"RT-DETR ONNX detection failed: {e}")
-            return [] if return_all_bubbles else {'bubbles': [], 'text_bubbles': [], 'text_free': []}
-# Standalone utility functions
-def download_model_from_huggingface(repo_id: str = "ogkalu/comic-speech-bubble-detector-yolov8m",
-                                   filename: str = "comic-speech-bubble-detector-yolov8m.pt",
-                                   cache_dir: str = "models") -> str:
-    """
-    Download model from Hugging Face Hub.
-    Args:
-        repo_id: Hugging Face repository ID
-        filename: Model filename in the repository
-        cache_dir: Local directory to cache the model
-    Returns:
-        Path to downloaded model file
-    """
-    try:
-        from huggingface_hub import hf_hub_download
-        os.makedirs(cache_dir, exist_ok=True)
-        logger.info(f"📥 Downloading {filename} from {repo_id}...")
-        model_path = hf_hub_download(
-            repo_id=repo_id,
-            filename=filename,
-            cache_dir=cache_dir,
-            local_dir=cache_dir
-        )
-        logger.info(f"✅ Model downloaded to: {model_path}")
-        return model_path
-    except ImportError:
-        logger.error("huggingface-hub package required. Install with: pip install huggingface-hub")
-        return None
-    except Exception as e:
-        logger.error(f"Download failed: {e}")
-        return None
-def download_rtdetr_model(cache_dir: str = "models") -> bool:
-    """
-    Download RT-DETR model for advanced detection.
-    Args:
-        cache_dir: Directory to cache the model
-    Returns:
-        True if successful
-    """
-    if not TRANSFORMERS_AVAILABLE:
-        logger.error("Transformers required. Install with: pip install transformers")
-        return False
-    try:
-        logger.info("📥 Downloading RT-DETR model...")
-        from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
-        # This will download and cache the model
-        processor = RTDetrImageProcessor.from_pretrained(
-            "ogkalu/comic-text-and-bubble-detector",
-            cache_dir=cache_dir
-        )
-        model = RTDetrForObjectDetection.from_pretrained(
-            "ogkalu/comic-text-and-bubble-detector",
-            cache_dir=cache_dir
-        )
-        logger.info("✅ RT-DETR model downloaded successfully")
-        return True
-    except Exception as e:
-        logger.error(f"Download failed: {e}")
-        return False
-# Example usage and testing
-if __name__ == "__main__":
-    import sys
-    # Create detector
-    detector = BubbleDetector()
-    if len(sys.argv) > 1:
-        if sys.argv[1] == "download":
-            # Download model from Hugging Face
-            model_path = download_model_from_huggingface()
-            if model_path:
-                print(f"YOLOv8 model downloaded to: {model_path}")
-            # Also download RT-DETR
-            if download_rtdetr_model():
-                print("RT-DETR model downloaded")
-        elif sys.argv[1] == "detect" and len(sys.argv) > 3:
-            # Detect bubbles in an image
-            model_path = sys.argv[2]
-            image_path = sys.argv[3]
-            # Load appropriate model
-            if 'rtdetr' in model_path.lower():
-                if detector.load_rtdetr_model():
-                    # Use RT-DETR
-                    results = detector.detect_with_rtdetr(image_path)
-                    print(f"RT-DETR Detection:")
-                    print(f"  Empty bubbles: {len(results['bubbles'])}")
-                    print(f"  Text bubbles: {len(results['text_bubbles'])}")
-                    print(f"  Free text: {len(results['text_free'])}")
-            else:
-                if detector.load_model(model_path):
-                    bubbles = detector.detect_bubbles(image_path, confidence=0.5)
-                    print(f"YOLOv8 detected {len(bubbles)} bubbles:")
-                    for i, (x, y, w, h) in enumerate(bubbles):
-                        print(f"  Bubble {i+1}: position=({x},{y}) size=({w}x{h})")
-            # Optionally visualize
-            if len(sys.argv) > 4:
-                output_path = sys.argv[4]
-                detector.visualize_detections(image_path, output_path=output_path,
-                                             use_rtdetr='rtdetr' in model_path.lower())
-        elif sys.argv[1] == "test-both" and len(sys.argv) > 2:
-            # Test both models
-            image_path = sys.argv[2]
-            # Load YOLOv8
-            yolo_path = "models/comic-speech-bubble-detector-yolov8m.pt"
-            if os.path.exists(yolo_path):
-                detector.load_model(yolo_path)
-                yolo_bubbles = detector.detect_bubbles(image_path, use_rtdetr=False)
-                print(f"YOLOv8: {len(yolo_bubbles)} bubbles")
-            # Load RT-DETR
-            if detector.load_rtdetr_model():
-                rtdetr_bubbles = detector.detect_bubbles(image_path, use_rtdetr=True)
-                print(f"RT-DETR: {len(rtdetr_bubbles)} bubbles")
-        else:
-            print("Usage:")
-            print("  python bubble_detector.py download")
-            print("  python bubble_detector.py detect <model_path> <image_path> [output_path]")
-            print("  python bubble_detector.py test-both <image_path>")
-    else:
-        print("Bubble Detector Module (YOLOv8 + RT-DETR)")
-        print("Usage:")
-        print("  python bubble_detector.py download")
-        print("  python bubble_detector.py detect <model_path> <image_path> [output_path]")
-        print("  python bubble_detector.py test-both <image_path>")