Spaces:

Camais03
/

camie-tagger-v2-app

Running

App Files Files Community

Camais03 commited on Sep 1

Commit

e7d3e33

verified ·

1 Parent(s): ac1737a

Upload 6 files

Browse files

Files changed (6) hide show

utils/__init__.py +1 -0
utils/file_utils.py +127 -0
utils/image_processing.py +511 -0
utils/model_loader.py +379 -0
utils/onnx_processing.py +729 -0
utils/ui_components.py +137 -0

utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Make utils a proper Python package

utils/file_utils.py ADDED Viewed

	@@ -0,0 +1,127 @@

+"""
+File utilities for Image Tagger application.
+"""
+import os
+import time
+def save_tags_to_file(image_path, all_tags, original_filename=None, custom_dir=None, overwrite=False):
+    """
+    Save tags to a text file in a dedicated 'saved_tags' folder or custom directory.
+    Args:
+        image_path: Path to the original image
+        all_tags: List of all tags to save
+        original_filename: Original filename if uploaded through Streamlit
+        custom_dir: Custom directory to save tags to (if None, uses 'saved_tags' folder)
+    Returns:
+        Path to the saved file
+    """
+    # Determine the save directory
+    if custom_dir and os.path.isdir(custom_dir):
+        save_dir = custom_dir
+    else:
+        # Create a dedicated folder for saved tags in the app's root directory
+        app_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        save_dir = os.path.join(app_dir, "saved_tags")
+    # Ensure the directory exists
+    os.makedirs(save_dir, exist_ok=True)
+    # Determine the filename
+    if original_filename:
+        # For uploaded files, use original filename
+        base_name = os.path.splitext(original_filename)[0]
+    else:
+        # For non-uploaded files, use the image path
+        base_name = os.path.splitext(os.path.basename(image_path))[0]
+    # Create the output path
+    output_path = os.path.join(save_dir, f"{base_name}.txt")
+    # If overwrite is False and file exists, add a timestamp to avoid overwriting
+    if not overwrite and os.path.exists(output_path):
+        timestamp = time.strftime("%Y%m%d-%H%M%S")
+        output_path = os.path.join(save_dir, f"{base_name}_{timestamp}.txt")
+    # Write the tags to file
+    with open(output_path, 'w', encoding='utf-8') as f:
+        if all_tags:
+            # Add comma after each tag including the last one
+            tag_text = ", ".join(all_tags) + ","
+            f.write(tag_text)
+    return output_path
+def get_default_save_locations():
+    """
+    Get default save locations for tag files.
+    Returns:
+        List of default save locations
+    """
+    # App directory
+    app_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    save_dir = os.path.join(app_dir, "saved_tags")
+    # Common user directories
+    desktop_dir = os.path.expanduser("~/Desktop")
+    download_dir = os.path.expanduser("~/Downloads")
+    documents_dir = os.path.expanduser("~/Documents")
+    # List of default save locations
+    save_locations = [
+        save_dir,
+        desktop_dir,
+        download_dir,
+        documents_dir,
+    ]
+    # Ensure directories exist
+    for folder in save_locations:
+        os.makedirs(folder, exist_ok=True)
+    return save_locations
+def apply_category_limits(result, category_limits):
+    """
+    Apply category limits to a result dictionary.
+    Args:
+        result: Result dictionary containing tags and all_tags
+        category_limits: Dictionary mapping categories to their tag limits
+                         (0 = exclude category, -1 = no limit/include all)
+    Returns:
+        Updated result dictionary with limits applied
+    """
+    if not category_limits or not result['success']:
+        return result
+    # Get the filtered tags
+    filtered_tags = result['tags']
+    # Apply limits to each category
+    for category, cat_tags in list(filtered_tags.items()):
+        # Get limit for this category, default to -1 (no limit)
+        limit = category_limits.get(category, -1)
+        if limit == 0:
+            # Exclude this category entirely
+            del filtered_tags[category]
+        elif limit > 0 and len(cat_tags) > limit:
+            # Limit to top N tags for this category
+            filtered_tags[category] = cat_tags[:limit]
+    # Regenerate all_tags list after applying limits
+    all_tags = []
+    for category, cat_tags in filtered_tags.items():
+        for tag, _ in cat_tags:
+            all_tags.append(tag)
+    # Update the result with limited tags
+    result['tags'] = filtered_tags
+    result['all_tags'] = all_tags
+    return result

utils/image_processing.py ADDED Viewed

	@@ -0,0 +1,511 @@

+"""
+Image processing functions for the Image Tagger application.
+"""
+import os
+import traceback
+import glob
+def process_image(image_path, model, thresholds, metadata, threshold_profile, active_threshold, active_category_thresholds, min_confidence=0.1):
+    """
+    Process a single image and return the tags.
+    Args:
+        image_path: Path to the image
+        model: The image tagger model
+        thresholds: Thresholds dictionary
+        metadata: Metadata dictionary
+        threshold_profile: Selected threshold profile
+        active_threshold: Overall threshold value
+        active_category_thresholds: Category-specific thresholds
+        min_confidence: Minimum confidence to include in results
+    Returns:
+        Dictionary with tags, all probabilities, and other info
+    """
+    try:
+        # Run inference directly using the model's predict method
+        if threshold_profile in ["Category-specific", "High Precision", "High Recall"]:
+            results = model.predict(
+                image_path=image_path,
+                category_thresholds=active_category_thresholds
+            )
+        else:
+            results = model.predict(
+                image_path=image_path,
+                threshold=active_threshold
+            )
+        # Extract and organize all probabilities
+        all_probs = {}
+        probs = results['refined_probabilities'][0]  # Remove batch dimension
+        for idx in range(len(probs)):
+            prob_value = probs[idx].item()
+            if prob_value >= min_confidence:
+                tag, category = model.dataset.get_tag_info(idx)
+                if category not in all_probs:
+                    all_probs[category] = []
+                all_probs[category].append((tag, prob_value))
+        # Sort tags by probability within each category
+        for category in all_probs:
+            all_probs[category] = sorted(
+                all_probs[category],
+                key=lambda x: x[1],
+                reverse=True
+            )
+        # Get the filtered tags based on the selected threshold
+        tags = {}
+        for category, cat_tags in all_probs.items():
+            threshold = active_category_thresholds.get(category, active_threshold) if active_category_thresholds else active_threshold
+            tags[category] = [(tag, prob) for tag, prob in cat_tags if prob >= threshold]
+        # Create a flat list of all tags above threshold
+        all_tags = []
+        for category, cat_tags in tags.items():
+            for tag, _ in cat_tags:
+                all_tags.append(tag)
+        return {
+            'tags': tags,
+            'all_probs': all_probs,
+            'all_tags': all_tags,
+            'success': True
+        }
+    except Exception as e:
+        print(f"Error processing {image_path}: {str(e)}")
+        traceback.print_exc()
+        return {
+            'tags': {},
+            'all_probs': {},
+            'all_tags': [],
+            'success': False,
+            'error': str(e)
+        }
+def apply_category_limits(result, category_limits):
+    """
+    Apply category limits to a result dictionary.
+    Args:
+        result: Result dictionary containing tags and all_tags
+        category_limits: Dictionary mapping categories to their tag limits
+                         (0 = exclude category, -1 = no limit/include all)
+    Returns:
+        Updated result dictionary with limits applied
+    """
+    if not category_limits or not result['success']:
+        return result
+    # Get the filtered tags
+    filtered_tags = result['tags']
+    # Apply limits to each category
+    for category, cat_tags in list(filtered_tags.items()):
+        # Get limit for this category, default to -1 (no limit)
+        limit = category_limits.get(category, -1)
+        if limit == 0:
+            # Exclude this category entirely
+            del filtered_tags[category]
+        elif limit > 0 and len(cat_tags) > limit:
+            # Limit to top N tags for this category
+            filtered_tags[category] = cat_tags[:limit]
+    # Regenerate all_tags list after applying limits
+    all_tags = []
+    for category, cat_tags in filtered_tags.items():
+        for tag, _ in cat_tags:
+            all_tags.append(tag)
+    # Update the result with limited tags
+    result['tags'] = filtered_tags
+    result['all_tags'] = all_tags
+    return result
+def batch_process_images(folder_path, model, thresholds, metadata, threshold_profile, active_threshold,
+                        active_category_thresholds, save_dir=None, progress_callback=None,
+                        min_confidence=0.1, batch_size=1, category_limits=None):
+    """
+    Process all images in a folder with optional batching for improved performance.
+    Args:
+        folder_path: Path to folder containing images
+        model: The image tagger model
+        thresholds: Thresholds dictionary
+        metadata: Metadata dictionary
+        threshold_profile: Selected threshold profile
+        active_threshold: Overall threshold value
+        active_category_thresholds: Category-specific thresholds
+        save_dir: Directory to save tag files (if None uses default)
+        progress_callback: Optional callback for progress updates
+        min_confidence: Minimum confidence threshold
+        batch_size: Number of images to process at once (default: 1)
+        category_limits: Dictionary mapping categories to their tag limits (0 = unlimited)
+    Returns:
+        Dictionary with results for each image
+    """
+    from .file_utils import save_tags_to_file  # Import here to avoid circular imports
+    import torch
+    from PIL import Image
+    import time
+    print(f"Starting batch processing on {folder_path} with batch size {batch_size}")
+    start_time = time.time()
+    # Find all image files in the folder
+    image_extensions = ['*.jpg', '*.jpeg', '*.png']
+    image_files = []
+    for ext in image_extensions:
+        image_files.extend(glob.glob(os.path.join(folder_path, ext)))
+        image_files.extend(glob.glob(os.path.join(folder_path, ext.upper())))
+    # Use a set to remove duplicate files (Windows filesystems are case-insensitive)
+    if os.name == 'nt':  # Windows
+        # Use lowercase paths for comparison on Windows
+        unique_paths = set()
+        unique_files = []
+        for file_path in image_files:
+            normalized_path = os.path.normpath(file_path).lower()
+            if normalized_path not in unique_paths:
+                unique_paths.add(normalized_path)
+                unique_files.append(file_path)
+        image_files = unique_files
+    # Sort files for consistent processing order
+    image_files.sort()
+    if not image_files:
+        return {
+            'success': False,
+            'error': f"No images found in {folder_path}",
+            'results': {}
+        }
+    print(f"Found {len(image_files)} images to process")
+    # Use the provided save directory or create a default one
+    if save_dir is None:
+        app_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        save_dir = os.path.join(app_dir, "saved_tags")
+    # Ensure the directory exists
+    os.makedirs(save_dir, exist_ok=True)
+    # Process images in batches
+    results = {}
+    total_images = len(image_files)
+    processed = 0
+    # Process in batches
+    for i in range(0, total_images, batch_size):
+        batch_start = time.time()
+        # Get current batch of images
+        batch_files = image_files[i:i+batch_size]
+        batch_size_actual = len(batch_files)
+        print(f"Processing batch {i//batch_size + 1}/{(total_images + batch_size - 1)//batch_size}: {batch_size_actual} images")
+        if batch_size > 1:
+            # True batch processing for multiple images at once
+            try:
+                # Using batch processing if batch_size > 1
+                batch_results = process_image_batch(
+                    image_paths=batch_files,
+                    model=model,
+                    thresholds=thresholds,
+                    metadata=metadata,
+                    threshold_profile=threshold_profile,
+                    active_threshold=active_threshold,
+                    active_category_thresholds=active_category_thresholds,
+                    min_confidence=min_confidence
+                )
+                # Process and save results for each image in the batch
+                for j, image_path in enumerate(batch_files):
+                    # Update progress if callback provided
+                    if progress_callback:
+                        progress_callback(processed + j, total_images, image_path)
+                    if j < len(batch_results):
+                        result = batch_results[j]
+                        # Apply category limits if specified
+                        if category_limits and result['success']:
+                            # Use the apply_category_limits function instead of the inline code
+                            result = apply_category_limits(result, category_limits)
+                            # Debug print if you want
+                            print(f"Applied limits for {os.path.basename(image_path)}, remaining tags: {len(result['all_tags'])}")
+                        # Save the tags to a file
+                        if result['success']:
+                            output_path = save_tags_to_file(
+                                image_path=image_path,
+                                all_tags=result['all_tags'],
+                                custom_dir=save_dir,
+                                overwrite=True
+                            )
+                            result['output_path'] = str(output_path)
+                        # Store the result
+                        results[image_path] = result
+                    else:
+                        # Handle case where batch processing returned fewer results than expected
+                        results[image_path] = {
+                            'success': False,
+                            'error': 'Batch processing error: missing result',
+                            'all_tags': []
+                        }
+            except Exception as e:
+                print(f"Batch processing error: {str(e)}")
+                traceback.print_exc()
+                # Fall back to processing images one by one in this batch
+                for j, image_path in enumerate(batch_files):
+                    if progress_callback:
+                        progress_callback(processed + j, total_images, image_path)
+                    result = process_image(
+                        image_path=image_path,
+                        model=model,
+                        thresholds=thresholds,
+                        metadata=metadata,
+                        threshold_profile=threshold_profile,
+                        active_threshold=active_threshold,
+                        active_category_thresholds=active_category_thresholds,
+                        min_confidence=min_confidence
+                    )
+                    # Apply category limits if specified
+                    if category_limits and result['success']:
+                        # Use the apply_category_limits function
+                        result = apply_category_limits(result, category_limits)
+                    if result['success']:
+                        output_path = save_tags_to_file(
+                            image_path=image_path,
+                            all_tags=result['all_tags'],
+                            custom_dir=save_dir,
+                            overwrite=True
+                        )
+                        result['output_path'] = str(output_path)
+                    results[image_path] = result
+        else:
+            # Process one by one if batch_size is 1
+            for j, image_path in enumerate(batch_files):
+                if progress_callback:
+                    progress_callback(processed + j, total_images, image_path)
+                result = process_image(
+                    image_path=image_path,
+                    model=model,
+                    thresholds=thresholds,
+                    metadata=metadata,
+                    threshold_profile=threshold_profile,
+                    active_threshold=active_threshold,
+                    active_category_thresholds=active_category_thresholds,
+                    min_confidence=min_confidence
+                )
+                # Apply category limits if specified
+                if category_limits and result['success']:
+                    # Use the apply_category_limits function
+                    result = apply_category_limits(result, category_limits)
+                if result['success']:
+                    output_path = save_tags_to_file(
+                        image_path=image_path,
+                        all_tags=result['all_tags'],
+                        custom_dir=save_dir,
+                        overwrite=True
+                    )
+                    result['output_path'] = str(output_path)
+                results[image_path] = result
+        # Update processed count
+        processed += batch_size_actual
+        # Calculate batch timing
+        batch_end = time.time()
+        batch_time = batch_end - batch_start
+        print(f"Batch processed in {batch_time:.2f} seconds ({batch_time/batch_size_actual:.2f} seconds per image)")
+    # Final progress update
+    if progress_callback:
+        progress_callback(total_images, total_images, None)
+    end_time = time.time()
+    total_time = end_time - start_time
+    print(f"Batch processing finished. Total time: {total_time:.2f} seconds, Average: {total_time/total_images:.2f} seconds per image")
+    return {
+        'success': True,
+        'total': total_images,
+        'processed': len(results),
+        'results': results,
+        'save_dir': save_dir,
+        'time_elapsed': end_time - start_time
+    }
+def process_image_batch(image_paths, model, thresholds, metadata, threshold_profile, active_threshold, active_category_thresholds, min_confidence=0.1):
+    """
+    Process a batch of images at once.
+    Args:
+        image_paths: List of paths to the images
+        model: The image tagger model
+        thresholds: Thresholds dictionary
+        metadata: Metadata dictionary
+        threshold_profile: Selected threshold profile
+        active_threshold: Overall threshold value
+        active_category_thresholds: Category-specific thresholds
+        min_confidence: Minimum confidence to include in results
+    Returns:
+        List of dictionaries with tags, all probabilities, and other info for each image
+    """
+    try:
+        import torch
+        from PIL import Image
+        import torchvision.transforms as transforms
+        # Identify the model type we're using for better error handling
+        model_type = model.__class__.__name__
+        print(f"Running batch processing with model type: {model_type}")
+        # Prepare the transformation for the images
+        transform = transforms.Compose([
+            transforms.Resize((512, 512)),  # Adjust based on your model's expected input
+            transforms.ToTensor(),
+        ])
+        # Get model information
+        device = next(model.parameters()).device
+        dtype = next(model.parameters()).dtype
+        print(f"Model is using device: {device}, dtype: {dtype}")
+        # Load and preprocess all images
+        batch_tensor = []
+        valid_images = []
+        for img_path in image_paths:
+            try:
+                img = Image.open(img_path).convert('RGB')
+                img_tensor = transform(img)
+                img_tensor = img_tensor.to(device=device, dtype=dtype)
+                batch_tensor.append(img_tensor)
+                valid_images.append(img_path)
+            except Exception as e:
+                print(f"Error loading image {img_path}: {str(e)}")
+        if not batch_tensor:
+            return []
+        # Stack all tensors into a single batch
+        batch_input = torch.stack(batch_tensor)
+        # Process entire batch at once
+        with torch.no_grad():
+            try:
+                # Forward pass on the whole batch
+                output = model(batch_input)
+                # Handle tuple output format
+                if isinstance(output, tuple):
+                    probs_batch = torch.sigmoid(output[1])
+                else:
+                    probs_batch = torch.sigmoid(output)
+                # Process each image's results
+                results = []
+                for i, img_path in enumerate(valid_images):
+                    probs = probs_batch[i].unsqueeze(0)  # Add batch dimension back
+                    # Extract and organize all probabilities
+                    all_probs = {}
+                    for idx in range(probs.size(1)):
+                        prob_value = probs[0, idx].item()
+                        if prob_value >= min_confidence:
+                            tag, category = model.dataset.get_tag_info(idx)
+                            if category not in all_probs:
+                                all_probs[category] = []
+                            all_probs[category].append((tag, prob_value))
+                    # Sort tags by probability
+                    for category in all_probs:
+                        all_probs[category] = sorted(all_probs[category], key=lambda x: x[1], reverse=True)
+                    # Get filtered tags
+                    tags = {}
+                    for category, cat_tags in all_probs.items():
+                        threshold = active_category_thresholds.get(category, active_threshold) if active_category_thresholds else active_threshold
+                        tags[category] = [(tag, prob) for tag, prob in cat_tags if prob >= threshold]
+                    # Create a flat list of all tags above threshold
+                    all_tags = []
+                    for category, cat_tags in tags.items():
+                        for tag, _ in cat_tags:
+                            all_tags.append(tag)
+                    results.append({
+                        'tags': tags,
+                        'all_probs': all_probs,
+                        'all_tags': all_tags,
+                        'success': True
+                    })
+                return results
+            except RuntimeError as e:
+                # If we encounter CUDA out of memory or another runtime error,
+                # fall back to processing one by one
+                print(f"Error in batch processing: {str(e)}")
+                print("Falling back to one-by-one processing...")
+                # Process one by one as fallback
+                results = []
+                for i, (img_tensor, img_path) in enumerate(zip(batch_tensor, valid_images)):
+                    try:
+                        input_tensor = img_tensor.unsqueeze(0)
+                        output = model(input_tensor)
+                        if isinstance(output, tuple):
+                            probs = torch.sigmoid(output[1])
+                        else:
+                            probs = torch.sigmoid(output)
+                        # Same post-processing as before...
+                        # [Code omitted for brevity]
+                    except Exception as e:
+                        print(f"Error processing image {img_path}: {str(e)}")
+                        results.append({
+                            'tags': {},
+                            'all_probs': {},
+                            'all_tags': [],
+                            'success': False,
+                            'error': str(e)
+                        })
+                return results
+    except Exception as e:
+        print(f"Error in batch processing: {str(e)}")
+        import traceback
+        traceback.print_exc()

utils/model_loader.py ADDED Viewed

	@@ -0,0 +1,379 @@

+import torch
+import torch.nn as nn
+from torch.nn import GroupNorm, LayerNorm
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+import timm
+class ViTWrapper(nn.Module):
+    """Wrapper to make ViT compatible with feature extraction for ImageTagger"""
+    def __init__(self, vit_model):
+        super().__init__()
+        self.vit = vit_model
+        self.out_indices = (-1,)  # mimic timm.features_only
+        # Get patch size and embedding dim from the model
+        self.patch_size = vit_model.patch_embed.patch_size[0]
+        self.embed_dim = vit_model.embed_dim
+    def forward(self, x):
+        B = x.size(0)
+        # ➊ patch tokens
+        x = self.vit.patch_embed(x)                       # (B, N, C)
+        # ➋ prepend CLS
+        cls_tok = self.vit.cls_token.expand(B, -1, -1)    # (B, 1, C)
+        x = torch.cat((cls_tok, x), dim=1)                # (B, 1+N, C)
+        # ➌ add positional encodings (full, incl. CLS)
+        if self.vit.pos_embed is not None:
+            x = x + self.vit.pos_embed[:, : x.size(1), :]
+        x = self.vit.pos_drop(x)
+        for blk in self.vit.blocks:
+            x = blk(x)
+        x = self.vit.norm(x)                              # (B, 1+N, C)
+        # ➍ split back out
+        cls_final   = x[:, 0]              # (B, C)
+        patch_tokens = x[:, 1:]            # (B, N, C)
+        # ➎ reshape patches to (B, C, H, W)
+        B, N, C = patch_tokens.shape
+        h = w = int(N ** 0.5)              # square assumption
+        patch_features = patch_tokens.permute(0, 2, 1).reshape(B, C, h, w)
+        # Return **both**: (patch map, CLS)
+        return patch_features, cls_final
+    def set_grad_checkpointing(self, enable=True):
+        """Enable gradient checkpointing if supported"""
+        if hasattr(self.vit, 'set_grad_checkpointing'):
+            self.vit.set_grad_checkpointing(enable)
+            return True
+        return False
+class ImageTagger(nn.Module):
+    """
+    ImageTagger with Vision Transformer backbone
+    """
+    def __init__(self, total_tags, dataset, model_name='vit_base_patch16_224',
+                 num_heads=16, dropout=0.1, pretrained=True, tag_context_size=256,
+                 use_gradient_checkpointing=False, img_size=224):
+        super().__init__()
+        # Store checkpointing config
+        self.use_gradient_checkpointing = use_gradient_checkpointing
+        self.model_name = model_name
+        self.img_size = img_size
+        # Debug and stats flags
+        self._flags = {
+            'debug': False,
+            'model_stats': True
+        }
+        # Core model config
+        self.dataset = dataset
+        self.tag_context_size = tag_context_size
+        self.total_tags = total_tags
+        print(f"🏗️ Building ImageTagger with ViT backbone and {total_tags} tags")
+        print(f"   Backbone: {model_name}")
+        print(f"   Image size: {img_size}x{img_size}")
+        print(f"   Tag context size: {tag_context_size}")
+        print(f"   Gradient checkpointing: {use_gradient_checkpointing}")
+        print(f"   🎯 Custom embeddings, PyTorch native attention, no ground truth inclusion")
+        # 1. Vision Transformer Backbone
+        print("📦 Loading Vision Transformer backbone...")
+        self._load_vit_backbone()
+        # Get backbone dimensions by running a test forward pass
+        self._determine_backbone_dimensions()
+        self.embedding_dim = self.backbone.embed_dim
+        # 2. Custom Tag Embeddings (no CLIP)
+        print("🎯 Using custom tag embeddings (no CLIP)")
+        self.tag_embedding = nn.Embedding(total_tags, self.embedding_dim)
+        # 3. Shared weights approach - tag bias for initial predictions
+        print("🔗 Using shared weights between initial head and tag embeddings")
+        self.tag_bias = nn.Parameter(torch.zeros(total_tags))
+        # 4. Image token extraction (for attention AND global pooling)
+        self.image_token_proj = nn.Identity()
+        # 5. Tags-as-queries cross-attention (using PyTorch's optimized implementation)
+        self.cross_attention = nn.MultiheadAttention(
+            embed_dim=self.embedding_dim,
+            num_heads=num_heads,
+            dropout=dropout,
+            batch_first=True  # Use (batch, seq, feature) format
+        )
+        self.cross_norm = nn.LayerNorm(self.embedding_dim)
+        # Initialize weights
+        self._init_weights()
+        # Enable gradient checkpointing
+        if self.use_gradient_checkpointing:
+            self._enable_gradient_checkpointing()
+        print(f"✅ ImageTagger with ViT initialized!")
+        self._print_parameter_count()
+    def _load_vit_backbone(self):
+        """Load Vision Transformer model from timm"""
+        print(f"   Loading from timm: {self.model_name}")
+        # Load the ViT model (not features_only, we want the full model for token extraction)
+        vit_model = timm.create_model(
+            self.model_name,
+            pretrained=True,
+            img_size=self.img_size,
+            num_classes=0  # Remove classification head
+        )
+        # Wrap it in our compatibility layer
+        self.backbone = ViTWrapper(vit_model)
+        print(f"   ✅ ViT loaded successfully")
+        print(f"   Patch size: {self.backbone.patch_size}x{self.backbone.patch_size}")
+        print(f"   Embed dim: {self.backbone.embed_dim}")
+    def _determine_backbone_dimensions(self):
+        """Determine backbone output dimensions"""
+        print("   🔍 Determining backbone dimensions...")
+        with torch.no_grad(), torch.autocast('cuda', dtype=torch.bfloat16):
+            # Create a dummy input
+            dummy_input = torch.randn(1, 3, self.img_size, self.img_size)
+            # Get features
+            backbone_features, cls_dummy = self.backbone(dummy_input)
+            feature_tensor = backbone_features
+            self.backbone_dim = feature_tensor.shape[1]
+            self.feature_map_size = feature_tensor.shape[2]
+        print(f"   Backbone output: {self.backbone_dim}D, {self.feature_map_size}x{self.feature_map_size} spatial")
+        print(f"   Total patch tokens: {self.feature_map_size * self.feature_map_size}")
+    def _enable_gradient_checkpointing(self):
+        """Enable gradient checkpointing for memory efficiency"""
+        print("🔄 Enabling gradient checkpointing...")
+        # Enable checkpointing for ViT backbone
+        if self.backbone.set_grad_checkpointing(True):
+            print("   ✅ ViT backbone checkpointing enabled")
+        else:
+            print("   ⚠️ ViT backbone doesn't support built-in checkpointing, will checkpoint manually")
+    def _checkpoint_backbone(self, x):
+        """Wrapper for backbone with gradient checkpointing"""
+        if self.use_gradient_checkpointing and self.training:
+            return checkpoint.checkpoint(self.backbone, x, use_reentrant=False)
+        else:
+            return self.backbone(x)
+    def _checkpoint_image_proj(self, x):
+        """Wrapper for image projection with gradient checkpointing"""
+        if self.use_gradient_checkpointing and self.training:
+            return checkpoint.checkpoint(self.image_token_proj, x, use_reentrant=False)
+        else:
+            return self.image_token_proj(x)
+    def _checkpoint_cross_attention(self, query, key, value):
+        """Wrapper for cross attention with gradient checkpointing"""
+        def _attention_forward(q, k, v):
+            attended_features, _ = self.cross_attention(query=q, key=k, value=v)
+            return self.cross_norm(attended_features)
+        if self.use_gradient_checkpointing and self.training:
+            return checkpoint.checkpoint(_attention_forward, query, key, value, use_reentrant=False)
+        else:
+            return _attention_forward(query, key, value)
+    def _checkpoint_candidate_selection(self, initial_logits):
+        """Wrapper for candidate selection with gradient checkpointing"""
+        def _candidate_forward(logits):
+            return self._get_candidate_tags(logits)
+        if self.use_gradient_checkpointing and self.training:
+            return checkpoint.checkpoint(_candidate_forward, initial_logits, use_reentrant=False)
+        else:
+            return _candidate_forward(initial_logits)
+    def _checkpoint_final_scoring(self, attended_features, candidate_indices):
+        """Wrapper for final scoring with gradient checkpointing"""
+        def _scoring_forward(features, indices):
+            emb = self.tag_embedding(indices)
+            # BF16 in, BF16 out
+            return (features * emb).sum(dim=-1)
+        if self.use_gradient_checkpointing and self.training:
+            return checkpoint.checkpoint(_scoring_forward, attended_features, candidate_indices, use_reentrant=False)
+        else:
+            return _scoring_forward(attended_features, candidate_indices)
+    def _init_weights(self):
+        """Initialize weights for new modules"""
+        def _init_layer(layer):
+            if isinstance(layer, nn.Linear):
+                nn.init.xavier_uniform_(layer.weight)
+                if layer.bias is not None:
+                    nn.init.zeros_(layer.bias)
+            elif isinstance(layer, nn.Conv2d):
+                nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu')
+                if layer.bias is not None:
+                    nn.init.zeros_(layer.bias)
+            elif isinstance(layer, nn.Embedding):
+                nn.init.normal_(layer.weight, mean=0, std=0.02)
+        # Initialize new components
+        self.image_token_proj.apply(_init_layer)
+        # Initialize tag embeddings with normal distribution
+        nn.init.normal_(self.tag_embedding.weight, mean=0, std=0.02)
+        # Initialize tag bias
+        nn.init.zeros_(self.tag_bias)
+    def _print_parameter_count(self):
+        """Print parameter statistics"""
+        total_params = sum(p.numel() for p in self.parameters())
+        trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
+        backbone_params = sum(p.numel() for p in self.backbone.parameters())
+        print(f"📊 Parameter Statistics:")
+        print(f"   Total parameters: {total_params/1e6:.1f}M")
+        print(f"   Trainable parameters: {trainable_params/1e6:.1f}M")
+        print(f"   Frozen parameters: {(total_params-trainable_params)/1e6:.1f}M")
+        print(f"   Backbone parameters: {backbone_params/1e6:.1f}M")
+        if self.use_gradient_checkpointing:
+            print(f"   🔄 Gradient checkpointing enabled for memory efficiency")
+    @property
+    def debug(self):
+        return self._flags['debug']
+    @property
+    def model_stats(self):
+        return self._flags['model_stats']
+    def _get_candidate_tags(self, initial_logits, target_tags=None, hard_negatives=None):
+        """Select candidate tags - no ground truth inclusion"""
+        batch_size = initial_logits.size(0)
+        # Simply select top K candidates based on initial predictions
+        top_probs, top_indices = torch.topk(
+            torch.sigmoid(initial_logits),
+            k=min(self.tag_context_size, self.total_tags),
+            dim=1, largest=True, sorted=True
+        )
+        return top_indices
+    def _analyze_predictions(self, predictions, tag_indices):
+        """Analyze prediction patterns"""
+        if not self.model_stats:
+            return {}
+        if torch._dynamo.is_compiling():
+            return {}
+        with torch.no_grad(), torch.autocast('cuda', dtype=torch.bfloat16):
+            probs = torch.sigmoid(predictions)
+            relevant_probs = torch.gather(probs, 1, tag_indices)
+            return {
+                'prediction_confidence': relevant_probs.mean().item(),
+                'prediction_entropy': -(relevant_probs * torch.log(relevant_probs + 1e-9)).mean().item(),
+                'high_confidence_ratio': (relevant_probs > 0.7).float().mean().item(),
+                'above_threshold_ratio': (relevant_probs > 0.5).float().mean().item(),
+            }
+    def forward(self, x, targets=None, hard_negatives=None):
+        """
+        Forward pass with ViT backbone, CLS token support and gradient-checkpointing.
+        All arithmetic tensors stay in the backbone’s dtype (BF16 under autocast,
+        FP32 otherwise).  Anything that must mix dtypes is cast to match.
+        """
+        batch_size  = x.size(0)
+        model_stats = {} if self.model_stats else {}
+        # ------------------------------------------------------------------
+        # 1. Backbone  →  patch map + CLS token
+        # ------------------------------------------------------------------
+        patch_map, cls_token = self._checkpoint_backbone(x)         # patch_map: [B, C, H, W]
+                                                                    # cls_token: [B, C]
+        # ------------------------------------------------------------------
+        # 2. Tokens  →  global image vector
+        # ------------------------------------------------------------------
+        image_tokens_4d = self._checkpoint_image_proj(patch_map)    # [B, C, H, W]
+        image_tokens    = image_tokens_4d.flatten(2).transpose(1, 2)  # [B, N, C]
+        # “Dual-pool”: mean-pool patches ⊕ CLS
+        global_features = 0.5 * (image_tokens.mean(dim=1, dtype=image_tokens.dtype) + cls_token)  # [B, C]
+        compute_dtype = global_features.dtype                       # BF16 or FP32
+        # ------------------------------------------------------------------
+        # 3. Initial logits  (shared weights)
+        # ------------------------------------------------------------------
+        tag_weights = self.tag_embedding.weight.to(compute_dtype)   # [T, C]
+        tag_bias    = self.tag_bias.to(compute_dtype)               # [T]
+        initial_logits = global_features @ tag_weights.t() + tag_bias   # [B, T]
+        initial_logits = initial_logits.to(compute_dtype)               # keep dtype uniform
+        initial_preds  = initial_logits                                 # alias
+        # ------------------------------------------------------------------
+        # 4. Candidate set
+        # ------------------------------------------------------------------
+        candidate_indices = self._checkpoint_candidate_selection(initial_logits)  # [B, K]
+        tag_embeddings   = self.tag_embedding(candidate_indices).to(compute_dtype)  # [B, K, C]
+        attended_features = self._checkpoint_cross_attention(       # [B, K, C]
+            tag_embeddings, image_tokens, image_tokens
+        )
+        # ------------------------------------------------------------------
+        # 5. Score candidates  &  scatter back
+        # ------------------------------------------------------------------
+        candidate_logits = self._checkpoint_final_scoring(attended_features, candidate_indices)  # [B, K]
+        # --- align dtypes so scatter never throws ---
+        if candidate_logits.dtype != initial_logits.dtype:
+            candidate_logits = candidate_logits.to(initial_logits.dtype)
+        refined_logits = initial_logits.clone()
+        refined_logits.scatter_(1, candidate_indices, candidate_logits)
+        refined_preds = refined_logits
+        # ------------------------------------------------------------------
+        # 6. Optional stats
+        # ------------------------------------------------------------------
+        if self.model_stats and targets is not None and not torch._dynamo.is_compiling():
+            model_stats['initial_prediction_stats'] = self._analyze_predictions(initial_preds,
+                                                                                candidate_indices)
+            model_stats['refined_prediction_stats'] = self._analyze_predictions(refined_preds,
+                                                                                candidate_indices)
+        return {
+            'initial_predictions': initial_preds,
+            'refined_predictions': refined_preds,
+            'selected_candidates': candidate_indices,
+            'model_stats': model_stats
+        }
+    def predict

utils/onnx_processing.py ADDED Viewed

	@@ -0,0 +1,729 @@

+"""
+ONNX-based batch image processing for the Image Tagger application.
+Updated with proper ImageNet normalization and new metadata format.
+"""
+import os
+import json
+import time
+import traceback
+import numpy as np
+import glob
+import onnxruntime as ort
+from PIL import Image
+import torchvision.transforms as transforms
+from concurrent.futures import ThreadPoolExecutor
+def preprocess_image(image_path, image_size=512):
+    """
+    Process an image for ImageTagger inference with proper ImageNet normalization
+    """
+    if not os.path.exists(image_path):
+        raise ValueError(f"Image not found at path: {image_path}")
+    # ImageNet normalization - CRITICAL for your model
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize(
+            mean=[0.485, 0.456, 0.406],
+            std=[0.229, 0.224, 0.225]
+        )
+    ])
+    try:
+        with Image.open(image_path) as img:
+            # Convert RGBA or Palette images to RGB
+            if img.mode in ('RGBA', 'P'):
+                img = img.convert('RGB')
+            # Get original dimensions
+            width, height = img.size
+            aspect_ratio = width / height
+            # Calculate new dimensions to maintain aspect ratio
+            if aspect_ratio > 1:
+                new_width = image_size
+                new_height = int(new_width / aspect_ratio)
+            else:
+                new_height = image_size
+                new_width = int(new_height * aspect_ratio)
+            # Resize with LANCZOS filter
+            img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
+            # Create new image with padding (use ImageNet mean for padding)
+            # Using RGB values close to ImageNet mean: (0.485*255, 0.456*255, 0.406*255)
+            pad_color = (124, 116, 104)
+            new_image = Image.new('RGB', (image_size, image_size), pad_color)
+            paste_x = (image_size - new_width) // 2
+            paste_y = (image_size - new_height) // 2
+            new_image.paste(img, (paste_x, paste_y))
+            # Apply transforms (including ImageNet normalization)
+            img_tensor = transform(new_image)
+            return img_tensor.numpy()
+    except Exception as e:
+        raise Exception(f"Error processing {image_path}: {str(e)}")
+def process_single_image_onnx(image_path, model_path, metadata, threshold_profile="Overall",
+                           active_threshold=0.35, active_category_thresholds=None,
+                           min_confidence=0.1):
+    """
+    Process a single image using ONNX model with new metadata format
+    Args:
+        image_path: Path to the image file
+        model_path: Path to the ONNX model file
+        metadata: Model metadata dictionary
+        threshold_profile: The threshold profile being used
+        active_threshold: Overall threshold value
+        active_category_thresholds: Category-specific thresholds
+        min_confidence: Minimum confidence to include in results
+    Returns:
+        Dictionary with tags and probabilities
+    """
+    try:
+        # Create ONNX tagger for this image (or reuse an existing one)
+        if hasattr(process_single_image_onnx, 'tagger'):
+            tagger = process_single_image_onnx.tagger
+        else:
+            # Create new tagger
+            tagger = ONNXImageTagger(model_path, metadata)
+            # Cache it for future calls
+            process_single_image_onnx.tagger = tagger
+        # Preprocess the image
+        start_time = time.time()
+        img_array = preprocess_image(image_path)
+        # Run inference
+        results = tagger.predict_batch(
+            [img_array],
+            threshold=active_threshold,
+            category_thresholds=active_category_thresholds,
+            min_confidence=min_confidence
+        )
+        inference_time = time.time() - start_time
+        if results:
+            result = results[0]
+            result['inference_time'] = inference_time
+            result['success'] = True
+            return result
+        else:
+            return {
+                'success': False,
+                'error': 'Failed to process image',
+                'all_tags': [],
+                'all_probs': {},
+                'tags': {}
+            }
+    except Exception as e:
+        print(f"Error in process_single_image_onnx: {str(e)}")
+        traceback.print_exc()
+        return {
+            'success': False,
+            'error': str(e),
+            'all_tags': [],
+            'all_probs': {},
+            'tags': {}
+        }
+def preprocess_images_parallel(image_paths, image_size=512, max_workers=8):
+    """Process multiple images in parallel"""
+    processed_images = []
+    valid_paths = []
+    # Define a worker function
+    def process_single_image(path):
+        try:
+            return preprocess_image(path, image_size), path
+        except Exception as e:
+            print(f"Error processing {path}: {str(e)}")
+            return None, path
+    # Process images in parallel
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        results = list(executor.map(process_single_image, image_paths))
+    # Filter results
+    for img_array, path in results:
+        if img_array is not None:
+            processed_images.append(img_array)
+            valid_paths.append(path)
+    return processed_images, valid_paths
+def apply_category_limits(result, category_limits):
+    """
+    Apply category limits to a result dictionary.
+    Args:
+        result: Result dictionary containing tags and all_tags
+        category_limits: Dictionary mapping categories to their tag limits
+                         (0 = exclude category, -1 = no limit/include all)
+    Returns:
+        Updated result dictionary with limits applied
+    """
+    if not category_limits or not result['success']:
+        return result
+    # Get the filtered tags
+    filtered_tags = result['tags']
+    # Apply limits to each category
+    for category, cat_tags in list(filtered_tags.items()):
+        # Get limit for this category, default to -1 (no limit)
+        limit = category_limits.get(category, -1)
+        if limit == 0:
+            # Exclude this category entirely
+            del filtered_tags[category]
+        elif limit > 0 and len(cat_tags) > limit:
+            # Limit to top N tags for this category
+            filtered_tags[category] = cat_tags[:limit]
+    # Regenerate all_tags list after applying limits
+    all_tags = []
+    for category, cat_tags in filtered_tags.items():
+        for tag, _ in cat_tags:
+            all_tags.append(tag)
+    # Update the result with limited tags
+    result['tags'] = filtered_tags
+    result['all_tags'] = all_tags
+    return result
+class ONNXImageTagger:
+    """ONNX-based image tagger for fast batch inference with updated metadata format"""
+    def __init__(self, model_path, metadata):
+        # Load model
+        self.model_path = model_path
+        try:
+            self.session = ort.InferenceSession(
+                model_path,
+                providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
+            )
+            print(f"Using providers: {self.session.get_providers()}")
+        except Exception as e:
+            print(f"CUDA not available, using CPU: {e}")
+            self.session = ort.InferenceSession(
+                model_path,
+                providers=['CPUExecutionProvider']
+            )
+            print(f"Using providers: {self.session.get_providers()}")
+        # Store metadata (passed as dict, not loaded from file)
+        self.metadata = metadata
+        # Extract tag mappings from new metadata structure
+        if 'dataset_info' in metadata:
+            # New metadata format
+            self.tag_mapping = metadata['dataset_info']['tag_mapping']
+            self.idx_to_tag = self.tag_mapping['idx_to_tag']
+            self.tag_to_category = self.tag_mapping['tag_to_category']
+            self.total_tags = metadata['dataset_info']['total_tags']
+        else:
+            # Fallback for older format
+            self.idx_to_tag = metadata.get('idx_to_tag', {})
+            self.tag_to_category = metadata.get('tag_to_category', {})
+            self.total_tags = metadata.get('total_tags', len(self.idx_to_tag))
+        # Get input name
+        self.input_name = self.session.get_inputs()[0].name
+        print(f"Model loaded successfully. Input name: {self.input_name}")
+        print(f"Total tags: {self.total_tags}, Categories: {len(set(self.tag_to_category.values()))}")
+    def predict_batch(self, image_arrays, threshold=0.5, category_thresholds=None, min_confidence=0.1):
+        """Run batch inference on preprocessed image arrays"""
+        # Stack arrays into batch
+        batch_input = np.stack(image_arrays)
+        # Run inference
+        start_time = time.time()
+        outputs = self.session.run(None, {self.input_name: batch_input})
+        inference_time = time.time() - start_time
+        print(f"Batch inference completed in {inference_time:.4f} seconds ({inference_time/len(image_arrays):.4f} s/image)")
+        # Process outputs - handle both single and multi-output models
+        if len(outputs) >= 2:
+            # Multi-output model (initial_predictions, refined_predictions, selected_candidates)
+            initial_logits = outputs[0]
+            refined_logits = outputs[1]
+            # Use refined predictions as main output
+            main_logits = refined_logits
+            print(f"Using refined predictions (shape: {refined_logits.shape})")
+        else:
+            # Single output model
+            main_logits = outputs[0]
+            print(f"Using single output (shape: {main_logits.shape})")
+        # Apply sigmoid to get probabilities
+        main_probs = 1.0 / (1.0 + np.exp(-main_logits))
+        # Process results for each image in batch
+        batch_results = []
+        for i in range(main_probs.shape[0]):
+            probs = main_probs[i]
+            # Extract and organize all probabilities
+            all_probs = {}
+            for idx in range(probs.shape[0]):
+                prob_value = float(probs[idx])
+                if prob_value >= min_confidence:
+                    idx_str = str(idx)
+                    tag_name = self.idx_to_tag.get(idx_str, f"unknown-{idx}")
+                    category = self.tag_to_category.get(tag_name, "general")
+                    if category not in all_probs:
+                        all_probs[category] = []
+                    all_probs[category].append((tag_name, prob_value))
+            # Sort tags by probability within each category
+            for category in all_probs:
+                all_probs[category] = sorted(
+                    all_probs[category],
+                    key=lambda x: x[1],
+                    reverse=True
+                )
+            # Get the filtered tags based on the selected threshold
+            tags = {}
+            for category, cat_tags in all_probs.items():
+                # Use category-specific threshold if available
+                if category_thresholds and category in category_thresholds:
+                    cat_threshold = category_thresholds[category]
+                else:
+                    cat_threshold = threshold
+                tags[category] = [(tag, prob) for tag, prob in cat_tags if prob >= cat_threshold]
+            # Create a flat list of all tags above threshold
+            all_tags = []
+            for category, cat_tags in tags.items():
+                for tag, _ in cat_tags:
+                    all_tags.append(tag)
+            batch_results.append({
+                'tags': tags,
+                'all_probs': all_probs,
+                'all_tags': all_tags,
+                'success': True
+            })
+        return batch_results
+def batch_process_images_onnx(folder_path, model_path, metadata_path, threshold_profile,
+                            active_threshold, active_category_thresholds, save_dir=None,
+                            progress_callback=None, min_confidence=0.1, batch_size=16,
+                            category_limits=None):
+    """
+    Process all images in a folder using the ONNX model with new metadata format.
+    Args:
+        folder_path: Path to folder containing images
+        model_path: Path to the ONNX model file
+        metadata_path: Path to the model metadata file
+        threshold_profile: Selected threshold profile
+        active_threshold: Overall threshold value
+        active_category_thresholds: Category-specific thresholds
+        save_dir: Directory to save tag files (if None uses default)
+        progress_callback: Optional callback for progress updates
+        min_confidence: Minimum confidence threshold
+        batch_size: Number of images to process at once
+        category_limits: Dictionary mapping categories to their tag limits
+    Returns:
+        Dictionary with results for each image
+    """
+    from utils.file_utils import save_tags_to_file  # Import here to avoid circular imports
+    # Find all image files in the folder
+    image_extensions = ['*.jpg', '*.jpeg', '*.png']
+    image_files = []
+    for ext in image_extensions:
+        image_files.extend(glob.glob(os.path.join(folder_path, ext)))
+        image_files.extend(glob.glob(os.path.join(folder_path, ext.upper())))
+    # Remove duplicates (Windows case-insensitive filesystems)
+    if os.name == 'nt':  # Windows
+        unique_paths = set()
+        unique_files = []
+        for file_path in image_files:
+            normalized_path = os.path.normpath(file_path).lower()
+            if normalized_path not in unique_paths:
+                unique_paths.add(normalized_path)
+                unique_files.append(file_path)
+        image_files = unique_files
+    if not image_files:
+        return {
+            'success': False,
+            'error': f"No images found in {folder_path}",
+            'results': {}
+        }
+    # Use the provided save directory or create a default one
+    if save_dir is None:
+        app_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        save_dir = os.path.join(app_dir, "saved_tags")
+    # Ensure the directory exists
+    os.makedirs(save_dir, exist_ok=True)
+    # Load metadata
+    try:
+        with open(metadata_path, 'r') as f:
+            metadata = json.load(f)
+    except Exception as e:
+        return {
+            'success': False,
+            'error': f"Failed to load metadata: {e}",
+            'results': {}
+        }
+    # Create ONNX tagger
+    try:
+        tagger = ONNXImageTagger(model_path, metadata)
+    except Exception as e:
+        return {
+            'success': False,
+            'error': f"Failed to load model: {e}",
+            'results': {}
+        }
+    # Process images in batches
+    results = {}
+    total_images = len(image_files)
+    processed = 0
+    start_time = time.time()
+    # Process in batches
+    for i in range(0, total_images, batch_size):
+        batch_start = time.time()
+        # Get current batch of images
+        batch_files = image_files[i:i+batch_size]
+        batch_size_actual = len(batch_files)
+        # Update progress if callback provided
+        if progress_callback:
+            progress_callback(processed, total_images, batch_files[0] if batch_files else None)
+        print(f"Processing batch {i//batch_size + 1}/{(total_images + batch_size - 1)//batch_size}: {batch_size_actual} images")
+        try:
+            # Preprocess images in parallel
+            processed_images, valid_paths = preprocess_images_parallel(batch_files)
+            if processed_images:
+                # Run batch prediction
+                batch_results = tagger.predict_batch(
+                    processed_images,
+                    threshold=active_threshold,
+                    category_thresholds=active_category_thresholds,
+                    min_confidence=min_confidence
+                )
+                # Process results for each image
+                for j, (image_path, result) in enumerate(zip(valid_paths, batch_results)):
+                    # Update progress if callback provided
+                    if progress_callback:
+                        progress_callback(processed + j, total_images, image_path)
+                    # Apply category limits if specified
+                    if category_limits and result['success']:
+                        print(f"Applying limits to {os.path.basename(image_path)}: {len(result['all_tags'])} → ", end="")
+                        result = apply_category_limits(result, category_limits)
+                        print(f"{len(result['all_tags'])} tags")
+                    # Save the tags to a file
+                    if result['success']:
+                        try:
+                            output_path = save_tags_to_file(
+                                image_path=image_path,
+                                all_tags=result['all_tags'],
+                                custom_dir=save_dir,
+                                overwrite=True
+                            )
+                            result['output_path'] = str(output_path)
+                        except Exception as e:
+                            print(f"Error saving tags for {image_path}: {e}")
+                            result['save_error'] = str(e)
+                    # Store the result
+                    results[image_path] = result
+            processed += batch_size_actual
+            # Calculate batch timing
+            batch_end = time.time()
+            batch_time = batch_end - batch_start
+            print(f"Batch processed in {batch_time:.2f} seconds ({batch_time/batch_size_actual:.2f} seconds per image)")
+        except Exception as e:
+            print(f"Error processing batch: {str(e)}")
+            traceback.print_exc()
+            # Process failed images one by one as fallback
+            for j, image_path in enumerate(batch_files):
+                try:
+                    # Update progress if callback provided
+                    if progress_callback:
+                        progress_callback(processed + j, total_images, image_path)
+                    # Preprocess single image
+                    img_array = preprocess_image(image_path)
+                    # Run inference on single image
+                    single_results = tagger.predict_batch(
+                        [img_array],
+                        threshold=active_threshold,
+                        category_thresholds=active_category_thresholds,
+                        min_confidence=min_confidence
+                    )
+                    if single_results:
+                        result = single_results[0]
+                        # Apply category limits if specified
+                        if category_limits and result['success']:
+                            result = apply_category_limits(result, category_limits)
+                        # Save the tags to a file
+                        if result['success']:
+                            try:
+                                output_path = save_tags_to_file(
+                                    image_path=image_path,
+                                    all_tags=result['all_tags'],
+                                    custom_dir=save_dir,
+                                    overwrite=True
+                                )
+                                result['output_path'] = str(output_path)
+                            except Exception as e:
+                                print(f"Error saving tags for {image_path}: {e}")
+                                result['save_error'] = str(e)
+                        results[image_path] = result
+                    else:
+                        results[image_path] = {
+                            'success': False,
+                            'error': 'Failed to process image',
+                            'all_tags': []
+                        }
+                except Exception as img_e:
+                    print(f"Error processing single image {image_path}: {str(img_e)}")
+                    results[image_path] = {
+                        'success': False,
+                        'error': str(img_e),
+                        'all_tags': []
+                    }
+            processed += batch_size_actual
+    # Final progress update
+    if progress_callback:
+        progress_callback(total_images, total_images, None)
+    end_time = time.time()
+    total_time = end_time - start_time
+    print(f"Batch processing finished. Total time: {total_time:.2f} seconds, Average: {total_time/total_images:.2f} seconds per image")
+    return {
+        'success': True,
+        'total': total_images,
+        'processed': len(results),
+        'results': results,
+        'save_dir': save_dir,
+        'time_elapsed': end_time - start_time
+    }
+def test_onnx_imagetagger(model_path, metadata_path, image_path, threshold=0.5, top_k=256):
+    """
+    Test ImageTagger ONNX model with proper handling of all outputs and new metadata format
+    Args:
+        model_path: Path to ONNX model file
+        metadata_path: Path to metadata JSON file
+        image_path: Path to test image
+        threshold: Confidence threshold for predictions
+        top_k: Maximum number of predictions to show
+    """
+    import onnxruntime as ort
+    import numpy as np
+    import json
+    import time
+    from collections import defaultdict
+    print(f"Loading ImageTagger ONNX model from {model_path}")
+    # Load metadata with proper error handling
+    try:
+        with open(metadata_path, 'r') as f:
+            metadata = json.load(f)
+    except Exception as e:
+        raise ValueError(f"Failed to load metadata: {e}")
+    # Extract tag mappings from new metadata structure
+    try:
+        if 'dataset_info' in metadata:
+            # New metadata format
+            dataset_info = metadata['dataset_info']
+            tag_mapping = dataset_info['tag_mapping']
+            idx_to_tag = tag_mapping['idx_to_tag']
+            tag_to_category = tag_mapping['tag_to_category']
+            total_tags = dataset_info['total_tags']
+        else:
+            # Fallback for older format
+            idx_to_tag = metadata.get('idx_to_tag', {})
+            tag_to_category = metadata.get('tag_to_category', {})
+            total_tags = metadata.get('total_tags', len(idx_to_tag))
+        print(f"Model info: {total_tags} tags, {len(set(tag_to_category.values()))} categories")
+    except KeyError as e:
+        raise ValueError(f"Invalid metadata structure, missing key: {e}")
+    # Initialize ONNX session with robust provider handling
+    providers = []
+    if ort.get_device() == 'GPU':
+        providers.append('CUDAExecutionProvider')
+    providers.append('CPUExecutionProvider')
+    try:
+        session = ort.InferenceSession(model_path, providers=providers)
+        active_provider = session.get_providers()[0]
+        print(f"Using provider: {active_provider}")
+        # Print model info
+        inputs = session.get_inputs()
+        outputs = session.get_outputs()
+        print(f"Model inputs: {len(inputs)}")
+        print(f"Model outputs: {len(outputs)}")
+        for i, output in enumerate(outputs):
+            print(f"  Output {i}: {output.name} {output.shape}")
+    except Exception as e:
+        raise RuntimeError(f"Failed to create ONNX session: {e}")
+    # Preprocess image
+    print(f"Processing image: {image_path}")
+    try:
+        # Get image size from metadata
+        img_size = metadata.get('model_info', {}).get('img_size', 512)
+        img_tensor = preprocess_image(image_path, image_size=img_size)
+        img_numpy = img_tensor[np.newaxis, :]  # Add batch dimension
+        print(f"Input shape: {img_numpy.shape}, dtype: {img_numpy.dtype}")
+    except Exception as e:
+        raise ValueError(f"Image preprocessing failed: {e}")
+    # Run inference
+    input_name = session.get_inputs()[0].name
+    print("Running inference...")
+    start_time = time.time()
+    try:
+        outputs = session.run(None, {input_name: img_numpy})
+        inference_time = time.time() - start_time
+        print(f"Inference completed in {inference_time:.4f} seconds")
+    except Exception as e:
+        raise RuntimeError(f"Inference failed: {e}")
+    # Handle outputs properly
+    if len(outputs) >= 2:
+        initial_logits = outputs[0]
+        refined_logits = outputs[1]
+        selected_candidates = outputs[2] if len(outputs) > 2 else None
+        # Use refined predictions as main output
+        main_logits = refined_logits
+        print(f"Using refined predictions (shape: {refined_logits.shape})")
+    else:
+        # Fallback to single output
+        main_logits = outputs[0]
+        print(f"Using single output (shape: {main_logits.shape})")
+    # Apply sigmoid to get probabilities
+    main_probs = 1.0 / (1.0 + np.exp(-main_logits))
+    # Apply threshold and get predictions
+    predictions_mask = (main_probs >= threshold)
+    indices = np.where(predictions_mask[0])[0]
+    if len(indices) == 0:
+        print(f"No predictions above threshold {threshold}")
+        # Show top 5 regardless of threshold
+        top_indices = np.argsort(main_probs[0])[-5:][::-1]
+        print("Top 5 predictions:")
+        for idx in top_indices:
+            idx_str = str(idx)
+            tag_name = idx_to_tag.get(idx_str, f"unknown-{idx}")
+            prob = float(main_probs[0, idx])
+            print(f"  {tag_name}: {prob:.3f}")
+        return {}
+    # Group by category
+    tags_by_category = defaultdict(list)
+    for idx in indices:
+        idx_str = str(idx)
+        tag_name = idx_to_tag.get(idx_str, f"unknown-{idx}")
+        category = tag_to_category.get(tag_name, "general")
+        prob = float(main_probs[0, idx])
+        tags_by_category[category].append((tag_name, prob))
+    # Sort by probability within each category
+    for category in tags_by_category:
+        tags_by_category[category] = sorted(
+            tags_by_category[category],
+            key=lambda x: x[1],
+            reverse=True
+        )[:top_k]  # Limit per category
+    # Print results
+    total_predictions = sum(len(tags) for tags in tags_by_category.values())
+    print(f"\nPredicted tags (threshold: {threshold}): {total_predictions} total")
+    # Category order for consistent display
+    category_order = ['general', 'character', 'copyright', 'artist', 'meta', 'year', 'rating']
+    for category in category_order:
+        if category in tags_by_category:
+            tags = tags_by_category[category]
+            print(f"\n{category.upper()} ({len(tags)}):")
+            for tag, prob in tags:
+                print(f"  {tag}: {prob:.3f}")
+    # Show any other categories not in standard order
+    for category in sorted(tags_by_category.keys()):
+        if category not in category_order:
+            tags = tags_by_category[category]
+            print(f"\n{category.upper()} ({len(tags)}):")
+            for tag, prob in tags:
+                print(f"  {tag}: {prob:.3f}")
+    # Performance stats
+    print(f"\nPerformance:")
+    print(f"  Inference time: {inference_time:.4f}s")
+    print(f"  Provider: {active_provider}")
+    print(f"  Max confidence: {main_probs.max():.3f}")
+    if total_predictions > 0:
+        avg_conf = np.mean([prob for tags in tags_by_category.values() for _, prob in tags])
+        print(f"  Average confidence: {avg_conf:.3f}")
+    return dict(tags_by_category)

utils/ui_components.py ADDED Viewed

	@@ -0,0 +1,137 @@

+"""
+UI components for the Image Tagger application.
+"""
+import os
+import streamlit as st
+from PIL import Image
+def display_progress_bar(prob):
+    """
+    Create an HTML progress bar for displaying probability.
+    Args:
+        prob: Probability value between 0 and 1
+    Returns:
+        HTML string for the progress bar
+    """
+    # Convert probability to percentage
+    percentage = int(prob * 100)
+    # Choose color based on confidence level
+    if prob >= 0.8:
+        color = "green"
+    elif prob >= 0.5:
+        color = "orange"
+    else:
+        color = "red"
+    # Return HTML for a styled progress bar
+    return f"""
+    <div style="margin-bottom: 5px; display: flex; align-items: center;">
+        <div style="flex-grow: 1; background-color: #f0f0f0; border-radius: 3px; height: 8px; position: relative;">
+            <div style="position: absolute; width: {percentage}%; background-color: {color}; height: 8px; border-radius: 3px;"></div>
+        </div>
+        <div style="margin-left: 8px; min-width: 40px; text-align: right; font-size: 0.9em;">{percentage}%</div>
+    </div>
+    """
+def show_example_images(examples_dir):
+    """
+    Display example images from a directory.
+    Args:
+        examples_dir: Directory containing example images
+    Returns:
+        Selected image path or None
+    """
+    selected_image = None
+    if os.path.exists(examples_dir):
+        example_files = [f for f in os.listdir(examples_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
+        if example_files:
+            st.write("Select an example image:")
+            # Create a 2-column layout for examples
+            example_cols = st.columns(2)
+            for i, example_file in enumerate(example_files):
+                col_idx = i % 2
+                with example_cols[col_idx]:
+                    example_path = os.path.join(examples_dir, example_file)
+                    # Display thumbnail
+                    try:
+                        img = Image.open(example_path)
+                        st.image(img, width=150, caption=example_file)
+                        # Button to select this example
+                        if st.button(f"Use", key=f"example_{i}"):
+                            selected_image = example_path
+                            st.session_state.original_filename = example_file
+                            # Display full image
+                            st.image(img, use_container_width=True)
+                            st.success(f"Example '{example_file}' selected!")
+                    except Exception as e:
+                        st.error(f"Error loading {example_file}: {str(e)}")
+        else:
+            st.info("No example images found.")
+            st.write("Add some JPG or PNG images to the 'examples' directory.")
+    else:
+        st.info("Examples directory not found.")
+        st.write("Create an 'examples' directory and add some JPG or PNG images.")
+    return selected_image
+def display_batch_results(batch_results):
+    """
+    Display batch processing results.
+    Args:
+        batch_results: Dictionary with batch processing results
+    """
+    if batch_results['success']:
+        st.success(f"✅ Processed {batch_results['processed']} of {batch_results['total']} images")
+        # Show details in an expander
+        with st.expander("Batch Processing Results", expanded=True):
+            # Count successes and failures
+            successes = sum(1 for r in batch_results['results'].values() if r['success'])
+            failures = batch_results['total'] - successes
+            st.write(f"- Successfully tagged: {successes}")
+            st.write(f"- Failed to process: {failures}")
+            if failures > 0:
+                # Show errors
+                st.write("### Processing Errors")
+                for img_path, result in batch_results['results'].items():
+                    if not result['success']:
+                        st.write(f"- **{os.path.basename(img_path)}**: {result.get('error', 'Unknown error')}")
+            # Show the location of the output files
+            if successes > 0:
+                st.write("### Output Files")
+                st.write(f"Tag files have been saved to the 'saved_tags' folder.")
+                # Show the first few as examples
+                st.write("Example outputs:")
+                sample_results = [(path, res) for path, res in batch_results['results'].items() if res['success']][:3]
+                for img_path, result in sample_results:
+                    output_path = result.get('output_path', '')
+                    if output_path and os.path.exists(output_path):
+                        st.write(f"- **{os.path.basename(output_path)}**")
+                        # Show file contents in a collapsible code block
+                        with open(output_path, 'r', encoding='utf-8') as f:
+                            content = f.read()
+                        st.code(content, language='text')
+    else:
+        st.error(f"Batch processing failed: {batch_results.get('error', 'Unknown error')}")