Spaces:

Sompote
/

crack_detection

Sleeping

File size: 22,243 Bytes

import gradio as gr
import requests
import json
import base64
from PIL import Image, ImageDraw, ImageFont
import io

def process_with_openrouter(image, prompt, api_key, model="qwen/qwen2.5-vl-32b-instruct", temperature=0.5):
    """Process image with OpenRouter API for object detection"""
    if not api_key:
        return "Please enter your OpenRouter API key", "error"
    
    if image is None:
        return "Please upload an image", "error"
    
    try:
        buffered = io.BytesIO()
        image.save(buffered, format="PNG")
        img_base64 = base64.b64encode(buffered.getvalue()).decode()
        
        headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
        
        data = {
            "model": model,
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {
                            "type": "image_url",
                            "image_url": {"url": f"data:image/png;base64,{img_base64}"}
                        }
                    ]
                }
            ],
            "temperature": temperature
        }
        
        response = requests.post(
            "https://openrouter.ai/api/v1/chat/completions",
            headers=headers,
            json=data,
            timeout=60
        )
        
        if response.status_code == 200:
            result = response.json()
            content = result['choices'][0]['message']['content']
            
            if '```json' in content:
                content = content.split('```json')[1].split('```')[0].strip()
            elif '```' in content:
                content = content.split('```')[1].split('```')[0].strip()
            
            return content, None
        else:
            return f"Error: {response.status_code} - {response.text}", "error"
            
    except Exception as e:
        return f"Error processing request: {str(e)}", "error"

def draw_bounding_boxes(image, detections):
    """Draw bounding boxes with class names only, same color per class"""
    if not detections or len(detections) == 0:
        return image
    
    annotated_image = image.copy()
    draw = ImageDraw.Draw(annotated_image)
    
    try:
        font = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 16)
    except:
        font = ImageFont.load_default()
    
    # Predefined colors for different classes
    class_colors = {
        "Class I": "#FF0000",    # Red
        "Class II": "#00FF00",   # Green
        "Class III": "#0000FF",  # Blue
        "Class IV": "#FFFF00",   # Yellow
        "Class V": "#FF00FF",    # Magenta
        "Class VI": "#00FFFF",   # Cyan
        "Class VII": "#FFA500",  # Orange
        "Class VIII": "#800080", # Purple
        "Class IX": "#008000",   # Dark Green
        "Class X": "#FF1493",    # Deep Pink
    }
    
    # Fallback colors if more than 10 classes
    fallback_colors = ["#8B4513", "#2F4F4F", "#DC143C", "#00CED1", "#FF4500", "#DA70D6", "#32CD32", "#FF6347"]
    
    for i, detection in enumerate(detections):
        if all(key in detection for key in ['x', 'y', 'width', 'height']):
            x = detection['x'] * image.width
            y = detection['y'] * image.height
            width = detection['width'] * image.width
            height = detection['height'] * image.height
            
            # Get class name - this is what we'll display
            class_name = detection.get('class', f'Class {i+1}')
            
            x1, y1 = int(x), int(y)
            x2, y2 = int(x + width), int(y + height)
            
            x1 = max(0, min(x1, image.width))
            y1 = max(0, min(y1, image.height))
            x2 = max(0, min(x2, image.width))
            y2 = max(0, min(y2, image.height))
            
            # Get consistent color for this class
            if class_name in class_colors:
                color = class_colors[class_name]
            else:
                # Use hash of class name to get consistent color
                color_index = hash(class_name) % len(fallback_colors)
                color = fallback_colors[color_index]
            
            # Draw bounding box
            draw.rectangle([x1, y1, x2, y2], outline=color, width=4)
            
            # Calculate label size
            text_bbox = draw.textbbox((0, 0), class_name, font=font)
            text_width = text_bbox[2] - text_bbox[0]
            text_height = text_bbox[3] - text_bbox[1]
            
            # Position label above the box, or below if no space above
            if y1 - text_height - 6 >= 0:
                label_y = y1 - text_height - 6
            else:
                label_y = y2 + 4
            
            label_x = x1
            
            # Ensure label stays within image bounds
            if label_x + text_width + 4 > image.width:
                label_x = image.width - text_width - 4
            
            # Draw label background
            draw.rectangle(
                [label_x - 2, label_y - 2, label_x + text_width + 2, label_y + text_height + 2],
                fill=color,
                outline=color
            )
            
            # Draw class name
            draw.text((label_x, label_y), class_name, fill="white", font=font)
    
    return annotated_image

def create_detection_prompt(class_descriptions, confidence_threshold=0.5, detection_mode="specific"):
    """Create a detection prompt for class descriptions with condition checking"""
    if isinstance(class_descriptions, str):
        class_descriptions = [cls.strip() for cls in class_descriptions.split('\n') if cls.strip()]
    
    # Build detection instructions
    if detection_mode == "specific":
        condition_text = "ONLY detect objects that match these class descriptions and their conditions. Ignore all other objects:"
    elif detection_mode == "include":
        condition_text = "Detect objects matching these class descriptions AND any other objects you can identify:"
    else:  # "exclude"
        condition_text = "Detect all objects EXCEPT those matching these class descriptions. Avoid detecting:"
    
    # Format each class description
    class_specs = []
    for i, description in enumerate(class_descriptions, 1):
        # Parse class name and description if formatted as "Class Name: description"
        if ':' in description:
            class_name, class_desc = description.split(':', 1)
            class_name = class_name.strip()
            class_desc = class_desc.strip()
            class_specs.append(f"Class {i} ({class_name}): {class_desc}")
        else:
            class_specs.append(f"Class {i}: {description}")
    
    classes_text = "\n".join(class_specs) if class_specs else "No class descriptions provided"
    
    prompt = f"""{condition_text}

{classes_text}

Detection Instructions:
- Analyze each object against the class descriptions above
- Check if objects meet the specified conditions for each class
- Only include detections with confidence above {confidence_threshold}
- Assign objects to the most appropriate class based on the descriptions

SCALE/RULER DETECTION FOR CRACK MEASUREMENT:
- First look for scales, rulers, measurement tools, or reference objects in the image
- If found, identify the scale markings and determine the measurement reference
- Use the scale to calculate actual crack widths in millimeters or appropriate units
- For crack classifications, measure crack width using the identified scale
- Include actual measurements in your analysis (e.g., "2.5mm crack width based on ruler scale")
- If no scale is visible, estimate crack width relative to common objects or provide qualitative assessment

Output a JSON list where each entry contains:
- "x": normalized x coordinate (0-1) of top-left corner
- "y": normalized y coordinate (0-1) of top-left corner  
- "width": normalized width (0-1) of the bounding box
- "height": normalized height (0-1) of the bounding box
- "label": brief description with confidence score
- "confidence": confidence score (0-1)
- "class": the assigned class name (e.g., "Class I", "Class II", etc.)
- "description": why this object matches the class criteria
- "class_number": the class number from the list above (1, 2, 3, etc.)
- "measured_width": actual crack width measurement if scale is available (e.g., "2.5mm", "1.2cm")
- "measurement_method": how the measurement was obtained (e.g., "ruler scale", "coin reference", "estimated")

Example format:
[{{"x": 0.1, "y": 0.2, "width": 0.3, "height": 0.4, "label": "Structural crack (0.92)", "confidence": 0.92, "class": "Class I", "description": "Crack width exceeds 2mm threshold based on ruler measurement", "class_number": 1, "measured_width": "2.5mm", "measurement_method": "ruler scale"}}]"""
    
    return prompt

def create_interface():
    """Create the Gradio interface for object detection"""
    with gr.Blocks(title="Class-Based Object Detection", theme=gr.themes.Soft()) as demo:
        gr.Markdown("# 🔍 Class-Based Object Detection with Descriptions")
        gr.Markdown("Define classes with descriptions and conditions. Objects will be classified and annotated with class names only.")
        
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("## ⚙️ Configuration")
                api_key = gr.Textbox(
                    label="OpenRouter API Key",
                    placeholder="Enter your OpenRouter API key...",
                    type="password"
                )
                
                with gr.Row():
                    use_preset = gr.Radio(
                        choices=["Preset Model", "Custom Model"],
                        value="Preset Model",
                        label="Model Selection",
                        info="Choose preset or enter custom OpenRouter model"
                    )
                
                model_preset = gr.Dropdown(
                    choices=[
                        "qwen/qwen2.5-vl-32b-instruct",
                        "qwen/qwen-vl-max",
                        "openai/gpt-5-chat",
                        "openai/gpt-5-mini",
                        "anthropic/claude-opus-4.1",
                        "x-ai/grok-4",
                        "google/gemini-2.5-pro",
                        "google/gemini-1.5-pro",
                        "google/gemini-1.5-flash",
                        "anthropic/claude-3.5-sonnet",
                        "openai/gpt-4o",
                        "openai/gpt-4o-mini"
                    ],
                    value="qwen/qwen2.5-vl-32b-instruct",
                    label="Preset Models",
                    info="Select from popular OpenRouter models",
                    visible=True
                )
                
                custom_model_input = gr.Textbox(
                    label="Custom Model ID",
                    placeholder="Enter any OpenRouter model ID (e.g., google/gemini-1.5-flash, anthropic/claude-3-haiku)",
                    visible=False,
                    info="Copy model IDs from openrouter.ai/models"
                )
                
                detection_mode = gr.Radio(
                    choices=[
                        ("Detect Only These Classes", "specific"),
                        ("Include These Classes + Others", "include"), 
                        ("Exclude These Classes", "exclude")
                    ],
                    value="specific",
                    label="Detection Mode",
                    info="How to handle the specified class descriptions"
                )
                
                class_descriptions = gr.Textbox(
                    label="Class Descriptions",
                    placeholder="""Define each class with its description and conditions, e.g.:
Severe Cracks: Crack width more than 2mm (use ruler/scale if present for measurement)
Minor Cracks: Crack width 0.5-2mm (measure using visible scale)
Rust Damage: Rust spots larger than 5cm in diameter
Concrete Spalling: Concrete spalling deeper than 1cm
Paint Defects: Paint peeling areas greater than 10cm²""",
                    value="""Severe Cracks: Crack width more than 2mm (use ruler/scale if present for measurement)
Minor Cracks: Crack width 0.5-2mm (measure using visible scale)
Rust Damage: Rust spots larger than 5cm in diameter""",
                    lines=8,
                    info="Enter class descriptions, one per line. Format: 'Class Name: Description' or just 'Description'"
                )
                
                confidence_threshold = gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.5,
                    step=0.05,
                    label="Confidence Threshold",
                    info="Minimum confidence for detection"
                )
                
                temperature = gr.Slider(
                    minimum=0,
                    maximum=1,
                    value=0.3,
                    step=0.05,
                    label="Temperature",
                    info="Lower values for more consistent results"
                )
                
                image_input = gr.Image(
                    type="pil",
                    label="Upload Image for Detection"
                )
                
                detect_btn = gr.Button("🚀 Detect Objects", variant="primary", size="lg")
                
            with gr.Column(scale=1):
                gr.Markdown("## 📊 Detection Results")
                
                annotated_image = gr.Image(
                    label="Detected Objects",
                    type="pil"
                )
                
                detection_results = gr.Textbox(
                    label="Detection Details (JSON)",
                    lines=10,
                    show_copy_button=True
                )
                
                detection_summary = gr.Textbox(
                    label="Detection Summary",
                    lines=3
                )
        
        # Show/hide model input based on selection
        def update_model_visibility(use_preset_val):
            if use_preset_val == "Custom Model":
                return gr.update(visible=False), gr.update(visible=True)
            else:
                return gr.update(visible=True), gr.update(visible=False)
        
        use_preset.change(
            update_model_visibility,
            inputs=[use_preset],
            outputs=[model_preset, custom_model_input]
        )
        
        def process_detection(image, class_desc, conf_threshold, api_key_val, use_preset_val, model_preset_val, custom_model_val, temp_val, mode_val):
            if not api_key_val:
                return None, "❌ Please enter your OpenRouter API key", "No API key provided"
            
            if image is None:
                return None, "❌ Please upload an image", "No image uploaded"
            
            if not class_desc or not class_desc.strip():
                return None, "❌ Please enter at least one class description", "No class descriptions provided"
            
            # Determine which model to use
            if use_preset_val == "Custom Model":
                if not custom_model_val or custom_model_val.strip() == "":
                    return None, "❌ Please enter a custom model ID", "Custom model required"
                final_model = custom_model_val.strip()
            else:
                final_model = model_preset_val
            
            try:
                prompt = create_detection_prompt(class_desc, conf_threshold, mode_val)
                
                result, error = process_with_openrouter(image, prompt, api_key_val, final_model, temp_val)
                
                if error:
                    return None, f"❌ Error: {result}", "Detection failed"
                
                detections = json.loads(result)
                
                if isinstance(detections, list) and len(detections) > 0:
                    annotated_img = draw_bounding_boxes(image, detections)
                    
                    filtered_detections = [d for d in detections if d.get('confidence', 1.0) >= conf_threshold]
                    
                    mode_descriptions = {
                        "specific": "Detecting only objects matching class descriptions",
                        "include": "Including specified classes + other objects", 
                        "exclude": "Excluding objects matching class descriptions"
                    }
                    
                    summary_text = f"✅ {mode_descriptions.get(mode_val, 'Detection')} - Found {len(filtered_detections)} objects\n🤖 Model: {final_model}"
                    
                    if filtered_detections:
                        # Group by class and show counts
                        class_counts = {}
                        for det in filtered_detections:
                            class_name = det.get('class', 'unknown')
                            description = det.get('description', '')
                            confidence = det.get('confidence', 1.0)
                            
                            if class_name not in class_counts:
                                class_counts[class_name] = {
                                    'count': 0,
                                    'avg_confidence': 0,
                                    'descriptions': []
                                }
                            
                            class_counts[class_name]['count'] += 1
                            class_counts[class_name]['avg_confidence'] += confidence
                            if description and description not in class_counts[class_name]['descriptions']:
                                class_counts[class_name]['descriptions'].append(description)
                        
                        summary_text += "\n\nClass Detection Results:"
                        for class_name, data in class_counts.items():
                            avg_conf = data['avg_confidence'] / data['count']
                            summary_text += f"\n• {class_name}: {data['count']} detected (avg conf: {avg_conf:.2f})"
                    
                    return annotated_img, json.dumps(filtered_detections, indent=2), summary_text
                else:
                    return image, "No objects detected matching class descriptions", "No detections matching criteria above confidence threshold"
                    
            except json.JSONDecodeError:
                return None, f"❌ Invalid JSON response: {result}", "JSON parsing failed"
            except Exception as e:
                return None, f"❌ Error: {str(e)}", "Processing error"
        
        detect_btn.click(
            process_detection,
            inputs=[image_input, class_descriptions, confidence_threshold, api_key, use_preset, model_preset, custom_model_input, temperature, detection_mode],
            outputs=[annotated_image, detection_results, detection_summary]
        )
        
        gr.Markdown("""
        ## 💡 Usage Tips
        - **Specific Mode**: Only detect objects matching your class descriptions
        - **Include Mode**: Detect your specified classes plus any other objects found
        - **Exclude Mode**: Detect everything except objects matching your class descriptions
        
        ### 🏷️ Class Definition
        **Format Options:**
        1. `Class Name: Description` - e.g., "Severe Cracks: Crack width more than 2mm"
        2. `Description only` - Will be automatically assigned as "Class I", "Class II", etc.
        
        **Annotation Behavior:**
        - Images show only class names (e.g., "Class I", "Class II")
        - Same class = same color throughout the image
        - Clean, simple visual identification
        
        ### 🤖 Model Selection
        **Default Models (Recommended):**
        - `qwen/qwen2.5-vl-32b-instruct` - Advanced Qwen vision model optimized for detailed analysis (Default)
        - `qwen/qwen-vl-max` - Premium Qwen vision model with maximum capabilities
        - `openai/gpt-5-chat` - Latest GPT-5 with advanced vision capabilities
        - `openai/gpt-5-mini` - Faster, efficient GPT-5 variant
        - `anthropic/claude-opus-4.1` - Next-gen Claude with superior reasoning
        - `x-ai/grok-4` - Advanced Grok model with detailed analysis
        
        **Custom Models**: Enter any OpenRouter model ID from [openrouter.ai/models](https://openrouter.ai/models)
        
        ### Example Class Descriptions:
        ```
        Severe Cracks: Crack width more than 2mm (use ruler/scale for measurement)
        Minor Cracks: Crack width 0.5-2mm (measure using visible scale)
        Rust Damage: Rust spots larger than 5cm in diameter
        Concrete Spalling: Concrete spalling deeper than 1cm
        Paint Defects: Paint peeling areas greater than 10cm²
        Water Damage: Water damage stains larger than 15cm
        ```
        
        ### 📏 Scale-Based Measurement:
        - **Automatic Scale Detection**: The system looks for rulers, measuring tools, or reference objects
        - **Precise Measurements**: When scales are found, actual crack widths are calculated
        - **Measurement Methods**: Supports rulers, crack gauges, coins, or other reference objects
        - **Enhanced Classification**: More accurate class assignment based on measured dimensions
        
        - Enter one class description per line
        - Be specific about conditions and measurements
        - Objects will be classified and labeled with class names only
        - Adjust confidence threshold to filter weak detections
        - Get your API key from [openrouter.ai](https://openrouter.ai/)
        """)
    
    return demo

if __name__ == "__main__":
    print("🚀 Starting Object Detection App...")
    demo = create_interface()
    demo.launch(share=False, inbrowser=True)