Spaces:

DocForg
/

Document_Forgery_Detection

Sleeping

App Files Files Community

JKrishnanandhaa commited on Jan 20

Commit

d192120

verified ·

1 Parent(s): 86140ca

Update app.py

Browse files

Files changed (1) hide show

app.py +206 -651

app.py CHANGED Viewed

@@ -1,698 +1,253 @@
 """
-Document Forgery Detection - Gradio Interface for Hugging Face Spaces
-This app provides a web interface for detecting and classifying document forgeries.
 """
-import gradio as gr
-import torch
 import cv2
 import numpy as np
-from PIL import Image
-import json
-from pathlib import Path
-import sys
-from typing import Dict, List, Tuple
-import plotly.graph_objects as go
-# Add src to path
-sys.path.insert(0, str(Path(__file__).parent))
-from src.models import get_model
-from src.config import get_config
-from src.data.preprocessing import DocumentPreprocessor
-from src.data.augmentation import DatasetAwareAugmentation
-from src.features.region_extraction import get_mask_refiner, get_region_extractor
-from src.features.feature_extraction import get_feature_extractor
-from src.training.classifier import ForgeryClassifier
-# Class names
-CLASS_NAMES = {0: 'Copy-Move', 1: 'Splicing', 2: 'Text Substitution'}
-CLASS_COLORS = {
-    0: (217, 83, 79),    # #d9534f - Muted red
-    1: (92, 184, 92),    # #5cb85c - Muted green
-    2: (65, 105, 225)    # #4169E1 - Royal blue
-}
-# Actual model performance metrics
-MODEL_METRICS = {
-    'segmentation': {
-        'dice': 0.6212,
-        'iou': 0.4506,
-        'precision': 0.7077,
-        'recall': 0.5536
-    },
-    'classification': {
-        'overall_accuracy': 0.8897,
-        'per_class': {
-            'copy_move': 0.92,
-            'splicing': 0.85,
-            'generation': 0.90
-        }
-    }
-}
-def create_gauge_chart(value: float, title: str, max_value: float = 1.0) -> go.Figure:
-    """Create a subtle radial gauge chart"""
-    fig = go.Figure(go.Indicator(
-        mode="gauge+number",
-        value=value * 100,
-        domain={'x': [0, 1], 'y': [0, 1]},
-        title={'text': title, 'font': {'size': 14}},
-        number={'suffix': '%', 'font': {'size': 24}},
-        gauge={
-            'axis': {'range': [0, 100], 'tickwidth': 1},
-            'bar': {'color': '#4169E1', 'thickness': 0.7},
-            'bgcolor': 'rgba(0,0,0,0)',
-            'borderwidth': 0,
-            'steps': [
-                {'range': [0, 50], 'color': 'rgba(217, 83, 79, 0.1)'},
-                {'range': [50, 75], 'color': 'rgba(240, 173, 78, 0.1)'},
-                {'range': [75, 100], 'color': 'rgba(92, 184, 92, 0.1)'}
-            ]
-        }
-    ))
-    fig.update_layout(
-        paper_bgcolor='rgba(0,0,0,0)',
-        plot_bgcolor='rgba(0,0,0,0)',
-        height=200,
-        margin=dict(l=20, r=20, t=40, b=20)
-    )
-    return fig
-def create_detection_metrics_gauge(avg_confidence: float, iou: float, precision: float, recall: float, num_detections: int) -> go.Figure:
-    """Create a high-fidelity radial bar chart (concentric rings)"""
-    # Calculate percentages (0-100)
-    metrics = [
-        {'name': 'Confidence', 'val': avg_confidence * 100 if num_detections > 0 else 0, 'color': '#4169E1', 'base': 80},
-        {'name': 'Precision', 'val': precision * 100, 'color': '#5cb85c', 'base': 60},
-        {'name': 'Recall', 'val': recall * 100, 'color': '#f0ad4e', 'base': 40},
-        {'name': 'IoU', 'val': iou * 100, 'color': '#d9534f', 'base': 20}
-    ]
-    fig = go.Figure()
-    for m in metrics:
-        # 1. Add background track (faint gray ring)
-        fig.add_trace(go.Barpolar(
-            r=[15],
-            theta=[180],
-            width=[360],
-            base=m['base'],
-            marker_color='rgba(128,128,128,0.1)',
-            hoverinfo='none',
-            showlegend=False
-        ))
-        # 2. Add the actual metric bar (the colored arc)
-        # 100% = 360 degrees
-        angle_width = m['val'] * 3.6
-        fig.add_trace(go.Barpolar(
-            r=[15],
-            theta=[angle_width / 2],
-            width=[angle_width],
-            base=m['base'],
-            name=f"{m['name']}: {m['val']:.1f}%",
-            marker_color=m['color'],
-            marker_line_width=0,
-            hoverinfo='name'
-        ))
-    fig.update_layout(
-        polar=dict(
-            hole=0.1,
-            radialaxis=dict(visible=False, range=[0, 100]),
-            angularaxis=dict(
-                rotation=90,           # Start at 12 o'clock
-                direction='clockwise', # Go clockwise
-                gridcolor='rgba(128,128,128,0.2)',
-                tickmode='array',
-                tickvals=[0, 90, 180, 270],
-                ticktext=['0%', '25%', '50%', '75%'],
-                showticklabels=True,
-                tickfont=dict(size=12, color='#888')
-            ),
-            bgcolor='rgba(0,0,0,0)'
-        ),
-        showlegend=True,
-        legend=dict(
-            orientation="v",
-            yanchor="middle",
-            y=0.5,
-            xanchor="left",
-            x=1.1,
-            font=dict(size=14, color='white'),
-            itemwidth=30
-        ),
-        paper_bgcolor='rgba(0,0,0,0)',
-        plot_bgcolor='rgba(0,0,0,0)',
-        height=450,
-        margin=dict(l=60, r=180, t=40, b=40)
-    )
-    return fig
-class ForgeryDetector:
-    """Main forgery detection pipeline"""
-    def __init__(self):
-        print("Loading models...")
-        # Load config
-        self.config = get_config('config.yaml')
-        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        # Load segmentation model
-        self.model = get_model(self.config).to(self.device)
-        checkpoint = torch.load('models/best_doctamper.pth', map_location=self.device)
-        self.model.load_state_dict(checkpoint['model_state_dict'])
-        self.model.eval()
-        # Load classifier
-        self.classifier = ForgeryClassifier(self.config)
-        self.classifier.load('models/classifier')
-        # Initialize components
-        self.preprocessor = DocumentPreprocessor(self.config, 'doctamper')
-        self.augmentation = DatasetAwareAugmentation(self.config, 'doctamper', is_training=False)
-        self.mask_refiner = get_mask_refiner(self.config)
-        self.region_extractor = get_region_extractor(self.config)
-        self.feature_extractor = get_feature_extractor(self.config, is_text_document=True)
-        print("✓ Models loaded successfully!")
-    def detect(self, image):
         """
-        Detect forgeries in document image or PDF
         Returns:
-            original_image: Original uploaded image
-            overlay_image: Image with detection overlay
-            gauge_dice: Dice score gauge
-            gauge_accuracy: Accuracy gauge
-            results_html: Detection results as HTML
         """
-        # Handle file path input (from gr.Image with type="filepath")
-        if isinstance(image, str):
-            if image.lower().endswith(('.doc', '.docx')):
-                # Handle Word documents - multiple fallback strategies
-                import tempfile
-                import os
-                import subprocess
-                temp_pdf = None
-                try:
-                    # Strategy 1: Try docx2pdf (Windows with MS Word)
-                    try:
-                        from docx2pdf import convert
-                        temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix='.pdf')
-                        temp_pdf.close()
-                        convert(image, temp_pdf.name)
-                        pdf_path = temp_pdf.name
-                    except Exception as e1:
-                        # Strategy 2: Try LibreOffice (Linux/Mac)
-                        try:
-                            temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix='.pdf')
-                            temp_pdf.close()
-                            subprocess.run([
-                                'libreoffice', '--headless', '--convert-to', 'pdf',
-                                '--outdir', os.path.dirname(temp_pdf.name),
-                                image
-                            ], check=True, capture_output=True)
-                            # LibreOffice creates file with original name + .pdf
-                            base_name = os.path.splitext(os.path.basename(image))[0]
-                            generated_pdf = os.path.join(os.path.dirname(temp_pdf.name), f"{base_name}.pdf")
-                            if os.path.exists(generated_pdf):
-                                os.rename(generated_pdf, temp_pdf.name)
-                                pdf_path = temp_pdf.name
-                            else:
-                                raise Exception("LibreOffice conversion failed")
-                        except Exception as e2:
-                            # Strategy 3: Extract text and create simple image
-                            from docx import Document
-                            doc = Document(image)
-                            # Extract text
-                            text_lines = []
-                            for para in doc.paragraphs[:40]:  # First 40 paragraphs
-                                if para.text.strip():
-                                    text_lines.append(para.text[:100])  # Max 100 chars per line
-                            # Create image with text
-                            img_height = 1400
-                            img_width = 1000
-                            image = np.ones((img_height, img_width, 3), dtype=np.uint8) * 255
-                            y_offset = 60
-                            for line in text_lines[:35]:
-                                cv2.putText(image, line, (40, y_offset),
-                                          cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 0), 1, cv2.LINE_AA)
-                                y_offset += 35
-                            # Skip to end - image is ready
-                            pdf_path = None
-                    # If we got a PDF, convert it to image
-                    if pdf_path and os.path.exists(pdf_path):
-                        import fitz
-                        pdf_document = fitz.open(pdf_path)
-                        page = pdf_document[0]
-                        pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
-                        image = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
-                        if pix.n == 4:
-                            image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
-                        pdf_document.close()
-                        os.unlink(pdf_path)
-                except Exception as e:
-                    raise ValueError(f"Could not process Word document. Please convert to PDF or image first. Error: {str(e)}")
-                finally:
-                    # Clean up temp file if it exists
-                    if temp_pdf and os.path.exists(temp_pdf.name):
-                        try:
-                            os.unlink(temp_pdf.name)
-                        except:
-                            pass
-            elif image.lower().endswith('.pdf'):
-                # Handle PDF files
-                import fitz  # PyMuPDF
-                pdf_document = fitz.open(image)
-                page = pdf_document[0]
-                pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
-                image = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
-                if pix.n == 4:
-                    image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
-                pdf_document.close()
-            else:
-                # Load image file
-                image = Image.open(image)
-                image = np.array(image)
-        # Convert PIL to numpy
-        if isinstance(image, Image.Image):
-            image = np.array(image)
-        # Convert to RGB
-        if len(image.shape) == 2:
-            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
-        elif image.shape[2] == 4:
-            image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
-        original_image = image.copy()
-        # Preprocess
-        preprocessed, _ = self.preprocessor(image, None)
-        # Augment
-        augmented = self.augmentation(preprocessed, None)
-        image_tensor = augmented['image'].unsqueeze(0).to(self.device)
-        # Run localization
-        with torch.no_grad():
-            logits, decoder_features = self.model(image_tensor)
-            prob_map = torch.sigmoid(logits).cpu().numpy()[0, 0]
-        # Resize probability map to match original image size to avoid index mismatch errors
-        prob_map_resized = cv2.resize(
-            prob_map,
-            (original_image.shape[1], original_image.shape[0]),
-            interpolation=cv2.INTER_LINEAR
         )
-        # Refine mask
-        binary_mask = (prob_map_resized > 0.5).astype(np.uint8)
-        refined_mask = self.mask_refiner.refine(prob_map_resized, original_size=original_image.shape[:2])
-        # Ensure refined_mask matches prob_map_resized dimensions
-        if refined_mask.shape != prob_map_resized.shape:
-            refined_mask = cv2.resize(
-                refined_mask,
-                (prob_map_resized.shape[1], prob_map_resized.shape[0]),
                 interpolation=cv2.INTER_NEAREST
             )
-        # Safety check: Ensure prob_map_resized and refined_mask have same dimensions (fallback)
-        if prob_map_resized.shape != refined_mask.shape:
-            prob_map_resized = cv2.resize(
-                prob_map_resized,
-                (refined_mask.shape[1], refined_mask.shape[0]),
-                interpolation=cv2.INTER_LINEAR
-            )
-        # Extract regions
-        regions = self.region_extractor.extract(refined_mask, prob_map_resized, original_image)
-        # Classify regions
-        results = []
-        for region in regions:
-            # Get decoder features and handle shape
-            df = decoder_features[0].cpu()  # Get first decoder feature
-            # Remove batch dimension if present: [1, C, H, W] -> [C, H, W]
-            if df.ndim == 4:
-                df = df.squeeze(0)
-            # Now df should be [C, H, W]
-            _, fh, fw = df.shape
-            region_mask = region['region_mask']
-            if region_mask.shape != (fh, fw):
-                region_mask = cv2.resize(
-                    region_mask.astype(np.uint8),
-                    (fw, fh),
-                    interpolation=cv2.INTER_NEAREST
-            )
-            region_mask = region_mask.astype(bool)
-            # Extract features
-            features = self.feature_extractor.extract(
-                preprocessed,
-                region['region_mask'],
-                [f.cpu() for f in decoder_features]
-            )
-            # Reshape features to 2D array
-            if features.ndim == 1:
-                features = features.reshape(1, -1)
-            # Pad/truncate features to match classifier
-            expected_features = 526
-            current_features = features.shape[1]
-            if current_features < expected_features:
-                padding = np.zeros((features.shape[0], expected_features - current_features))
-                features = np.hstack([features, padding])
-            elif current_features > expected_features:
-                features = features[:, :expected_features]
-            # Classify
-            predictions, confidences = self.classifier.predict(features)
-            forgery_type = int(predictions[0])
-            confidence = float(confidences[0])
-            if confidence > 0.6:
-                results.append({
-                    'region_id': region['region_id'],
-                    'bounding_box': region['bounding_box'],
-                    'forgery_type': CLASS_NAMES[forgery_type],
-                    'confidence': confidence
-                })
-        # Create visualization
-        overlay = self._create_overlay(original_image, results)
-        # Calculate actual detection metrics from probability map and mask
-        num_detections = len(results)
-        avg_confidence = sum(r['confidence'] for r in results) / num_detections if num_detections > 0 else 0
-        # Calculate IoU, Precision, Recall from the refined mask and probability map
-        if num_detections > 0:
-            # Use resized prob_map to match refined_mask dimensions
-            high_conf_mask = (prob_map_resized > 0.7).astype(np.uint8)
-            predicted_positive = np.sum(refined_mask > 0)
-            high_conf_positive = np.sum(high_conf_mask > 0)
-            # Calculate intersection and union
-            intersection = np.sum((refined_mask > 0) & (high_conf_mask > 0))
-            union = np.sum((refined_mask > 0) | (high_conf_mask > 0))
-            # Calculate metrics
-            iou = intersection / union if union > 0 else 0
-            precision = intersection / predicted_positive if predicted_positive > 0 else 0
-            recall = intersection / high_conf_positive if high_conf_positive > 0 else 0
-        else:
-            # No detections - use zeros
-            iou = 0
-            precision = 0
-            recall = 0
-        # Create detection metrics gauge with actual values
-        metrics_gauge = create_detection_metrics_gauge(avg_confidence, iou, precision, recall, num_detections)
-        # Create HTML response
-        results_html = self._create_html_report(results)
-        return overlay, metrics_gauge, results_html
-    def _create_overlay(self, image, results):
-        """Create overlay visualization"""
-        overlay = image.copy()
-        for result in results:
-            bbox = result['bounding_box']
-            x, y, w, h = bbox
-            forgery_type = result['forgery_type']
-            confidence = result['confidence']
-            # Get color
-            forgery_id = [k for k, v in CLASS_NAMES.items() if v == forgery_type][0]
-            color = CLASS_COLORS[forgery_id]
-            # Draw rectangle
-            cv2.rectangle(overlay, (x, y), (x+w, y+h), color, 2)
-            # Draw label
-            label = f"{forgery_type}: {confidence:.1%}"
-            font = cv2.FONT_HERSHEY_SIMPLEX
-            font_scale = 0.5
-            thickness = 1
-            (label_w, label_h), baseline = cv2.getTextSize(label, font, font_scale, thickness)
-            cv2.rectangle(overlay, (x, y-label_h-8), (x+label_w+4, y), color, -1)
-            cv2.putText(overlay, label, (x+2, y-4), font, font_scale, (255, 255, 255), thickness)
-        return overlay
-    def _create_html_report(self, results):
-        """Create HTML report with detection results"""
-        num_detections = len(results)
-        if num_detections == 0:
-            return """
-            <div style='padding:12px; border:1px solid #5cb85c; border-radius:8px;'>
-                ✓ <b>No forgery detected.</b><br>
-                The document appears to be authentic.
-            </div>
-            """
-        # Calculate statistics
-        avg_confidence = sum(r['confidence'] for r in results) / num_detections
-        type_counts = {}
-        for r in results:
-            ft = r['forgery_type']
-            type_counts[ft] = type_counts.get(ft, 0) + 1
-        html = f"""
-        <div style='padding:12px; border:1px solid #d9534f; border-radius:8px;'>
-            <b>⚠️ Forgery Detected</b><br><br>
-            <b>Summary:</b><br>
-            • Regions detected: {num_detections}<br>
-            • Average confidence: {avg_confidence*100:.1f}%<br><br>
-            <b>Detections:</b><br>
-        """
-        for i, result in enumerate(results, 1):
-            forgery_type = result['forgery_type']
-            confidence = result['confidence']
-            bbox = result['bounding_box']
-            forgery_id = [k for k, v in CLASS_NAMES.items() if v == forgery_type][0]
-            color_rgb = CLASS_COLORS[forgery_id]
-            color_hex = f"#{color_rgb[0]:02x}{color_rgb[1]:02x}{color_rgb[2]:02x}"
-            html += f"""
-            <div style='margin:8px 0; padding:8px; border-left:3px solid {color_hex}; background:rgba(0,0,0,0.02);'>
-                <b>Region {i}:</b> {forgery_type} ({confidence*100:.1f}%)<br>
-                <small>Location: ({bbox[0]}, {bbox[1]}) | Size: {bbox[2]}×{bbox[3]}px</small>
-            </div>
-            """
-        html += """
-        </div>
-        """
-        return html
-# Initialize detector
-detector = ForgeryDetector()
-def detect_forgery(file, webcam):
-    """Gradio interface function - handles file uploads and webcam capture"""
-    try:
-        # Use whichever input has data
-        source = file if file is not None else webcam
-        if source is None:
-            empty_html = "<div style='padding:12px; border:1px solid #d9534f; border-radius:8px;'>❌ <b>No input provided.</b> Please upload a file or use webcam.</div>"
-            return None, None, empty_html
-        # Detect forgeries
-        overlay, metrics_gauge, results_html = detector.detect(source)
-        return overlay, metrics_gauge, results_html
-    except Exception as e:
-        import traceback
-        error_details = traceback.format_exc()
-        print(f"Error: {error_details}")
-        error_html = f"""
-        <div style='padding:12px; border:1px solid #d9534f; border-radius:8px;'>
-            ❌ <b>Error:</b> {str(e)}
-        </div>
         """
-        return None, None, error_html
-# Custom CSS - subtle styling
-custom_css = """
-.predict-btn {
-    background-color: #4169E1 !important;
-    color: white !important;
-}
-.clear-btn {
-    background-color: #6A89A7 !important;
-    color: white !important;
-}
-"""
-# Create Gradio interface
-with gr.Blocks(css=custom_css) as demo:
-    gr.Markdown(
         """
-        # 📄 Document Forgery Detection
-        Upload a document image or PDF to detect and classify forgeries using deep learning. The system combines MobileNetV3-UNet for precise localization and LightGBM for classification, identifying Copy-Move, Splicing, and Text Substitution manipulations with detailed confidence scores and bounding boxes. Trained on 140K samples for robust performance.
         """
-    )
-    gr.Markdown("---")
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### Upload Document")
-            with gr.Tabs():
-                with gr.Tab("📤 Upload File"):
-                    input_file = gr.File(
-                        label="Upload Image, PDF, or Document",
-                        file_types=["image", ".pdf", ".doc", ".docx"],
-                        type="filepath"
-                    )
-                with gr.Tab("📷 Webcam"):
-                    input_webcam = gr.Image(
-                        label="Capture from Webcam",
-                        type="filepath",
-                        sources=["webcam"]
-                    )
-            with gr.Row():
-                clear_btn = gr.Button("🧹 Clear", elem_classes="clear-btn")
-                analyze_btn = gr.Button("🔍 Analyze", elem_classes="predict-btn")
-        with gr.Column(scale=1):
-            gr.Markdown("### Information")
-            gr.HTML(
-                """
-                <div style='padding:16px; border:1px solid #ccc; border-radius:8px; background:var(--background-fill-primary);'>
-                    <p style='margin-top:0;'><b>Supported formats:</b></p>
-                    <ul style='margin:8px 0; padding-left:20px;'>
-                        <li>Images: JPG, PNG, BMP, TIFF, WebP</li>
-                        <li>PDF: First page analyzed</li>
-                    </ul>
-                    <p style='margin-bottom:4px;'><b>Forgery types:</b></p>
-                    <ul style='margin:8px 0; padding-left:20px;'>
-                        <li style='color:#d9534f;'><b>Copy-Move:</b> <span style='color:inherit;'>Duplicated regions</span></li>
-                        <li style='color:#4169E1;'><b>Splicing:</b> <span style='color:inherit;'>Mixed sources</span></li>
-                        <li style='color:#5cb85c;'><b>Text Substitution:</b> <span style='color:inherit;'>Modified text</span></li>
-                    </ul>
-                </div>
-                """
-            )
-        with gr.Column(scale=2):
-            gr.Markdown("### Detection Results")
-            output_image = gr.Image(label="Detected Forgeries", type="numpy")
-    gr.Markdown("---")
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### Analysis Report")
-            output_html = gr.HTML(
-                value="<i>No analysis yet. Upload a document and click Analyze.</i>"
-            )
-        with gr.Column(scale=1):
-            gr.Markdown("### Detection Metrics")
-            metrics_gauge = gr.Plot(label="Concentric Metrics Gauge")
-    gr.Markdown("---")
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### Model Architecture")
-            gr.HTML(
-                """
-                <div style='padding:12px; border:1px solid #444; border-radius:10px; background:var(--background-fill-primary);'>
-                    <p style="margin:0 0 0px 0; font-size:1.05em;"><b>Localization:</b> MobileNetV3-Small + UNet</p>
-                    <p style='margin:0 20px 5px 0; margin-left:0.5cm; font-size:0.9em; opacity:0.85;'>Dice: 62.12% | IoU: 45.06% | Precision: 70.77% | Recall: 55.36%</p>
-                    <p style="margin:0 0 0 0; font-size:1.05em;"><b>Classification:</b> LightGBM with 526 features</p>
-                    <p style="margin:0 20px 0 0; margin-left:0.5cm; font-size:0.9em; opacity:0.85;">Train Accuracy: 90.53% | Val Accuracy: 88.97%</p>
-                    <p style='margin-top:5px; margin-bottom:0; font-size:1.05em;'><b>Training:</b> 140K samples from DocTamper dataset</p>
-                </div>
-                """
-            )
-        with gr.Column(scale=1):
-            gr.Markdown("### Model Performance")
-            gr.HTML(
-                f"""
-                <div style='padding:12px; border:1px solid #444; border-radius:10px; background:var(--background-fill-primary);'>
-                    <p style='margin-top:0; margin-bottom:12px;'><b>Trained Model Performance:</b></p>
-                    <b>Segmentation Dice: {MODEL_METRICS['segmentation']['dice']*100:.2f}%</b>
-                    <div style='width:100%; background:#333; height:12px; border-radius:6px; margin-bottom:12px;'>
-                        <div style='width:{MODEL_METRICS['segmentation']['dice']*100:.1f}%; background:#4169E1; height:12px; border-radius:6px;'></div>
-                    </div>
-                    <b>Classification Accuracy: {MODEL_METRICS['classification']['overall_accuracy']*100:.2f}%</b>
-                    <div style='width:100%; background:#333; height:12px; border-radius:6px;'>
-                        <div style='width:{MODEL_METRICS['classification']['overall_accuracy']*100:.1f}%; background:#5cb85c; height:12px; border-radius:6px;'></div>
-                    </div>
-                </div>
-                """
-            )
-    # Event handlers
-    analyze_btn.click(
-        fn=detect_forgery,
-        inputs=[input_file, input_webcam],
-        outputs=[output_image, metrics_gauge, output_html]
-    )
-    clear_btn.click(
-        fn=lambda: (None, None, None, None, "<i>No analysis yet. Upload a document and click Analyze.</i>"),
-        inputs=None,
-        outputs=[input_file, input_webcam, output_image, metrics_gauge, output_html]
-    )
-if __name__ == "__main__":
-    demo.launch()

 """
+Mask refinement and region extraction
+Implements Critical Fix #3: Adaptive Mask Refinement Thresholds
 """
 import cv2
 import numpy as np
+from typing import List, Tuple, Dict, Optional
+from scipy import ndimage
+from skimage.measure import label, regionprops
+class MaskRefiner:
+    """
+    Mask refinement with adaptive thresholds
+    Implements Critical Fix #3: Dataset-specific minimum region areas
+    """
+    def __init__(self, config, dataset_name: str = 'default'):
+        """
+        Initialize mask refiner
+        Args:
+            config: Configuration object
+            dataset_name: Dataset name for adaptive thresholds
+        """
+        self.config = config
+        self.dataset_name = dataset_name
+        # Get mask refinement parameters
+        self.threshold = config.get('mask_refinement.threshold', 0.5)
+        self.closing_kernel = config.get('mask_refinement.morphology.closing_kernel', 5)
+        self.opening_kernel = config.get('mask_refinement.morphology.opening_kernel', 3)
+        # Critical Fix #3: Adaptive thresholds per dataset
+        self.min_region_area = config.get_min_region_area(dataset_name)
+        print(f"MaskRefiner initialized for {dataset_name}")
+        print(f"Min region area: {self.min_region_area * 100:.2f}%")
+    def refine(self,
+               probability_map: np.ndarray,
+               original_size: Tuple[int, int] = None) -> np.ndarray:
         """
+        Refine probability map to binary mask
+        Args:
+            probability_map: Forgery probability map (H, W), values [0, 1]
+            original_size: Optional (H, W) to resize mask back to original
         Returns:
+            Refined binary mask (H, W)
         """
+        # Threshold to binary
+        binary_mask = (probability_map > self.threshold).astype(np.uint8)
+        # Morphological closing (fill broken strokes)
+        closing_kernel = cv2.getStructuringElement(
+            cv2.MORPH_RECT,
+            (self.closing_kernel, self.closing_kernel)
+        )
+        binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, closing_kernel)
+        # Morphological opening (remove isolated noise)
+        opening_kernel = cv2.getStructuringElement(
+            cv2.MORPH_RECT,
+            (self.opening_kernel, self.opening_kernel)
         )
+        binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, opening_kernel)
+        # Critical Fix #3: Remove small regions with adaptive threshold
+        binary_mask = self._remove_small_regions(binary_mask)
+        # Resize to original size if provided
+        if original_size is not None:
+            binary_mask = cv2.resize(
+                binary_mask,
+                (original_size[1], original_size[0]),  # cv2 uses (W, H)
                 interpolation=cv2.INTER_NEAREST
             )
+        return binary_mask
+    def _remove_small_regions(self, mask: np.ndarray) -> np.ndarray:
+        """
+        Remove regions smaller than minimum area threshold
+        Args:
+            mask: Binary mask (H, W)
+        Returns:
+            Filtered mask
+        """
+        # Calculate minimum pixel count
+        image_area = mask.shape[0] * mask.shape[1]
+        min_pixels = int(image_area * self.min_region_area)
+        # Label connected components
+        labeled_mask, num_features = ndimage.label(mask)
+        # Keep only large enough regions
+        filtered_mask = np.zeros_like(mask)
+        for region_id in range(1, num_features + 1):
+            region_mask = (labeled_mask == region_id)
+            region_area = region_mask.sum()
+            if region_area >= min_pixels:
+                filtered_mask[region_mask] = 1
+        return filtered_mask
+class RegionExtractor:
+    """
+    Extract individual regions from binary mask
+    Implements Critical Fix #4: Region Confidence Aggregation
+    """
+    def __init__(self, config, dataset_name: str = 'default'):
+        """
+        Initialize region extractor
+        Args:
+            config: Configuration object
+            dataset_name: Dataset name
+        """
+        self.config = config
+        self.dataset_name = dataset_name
+        self.min_region_area = config.get_min_region_area(dataset_name)
+    def extract(self,
+                binary_mask: np.ndarray,
+                probability_map: np.ndarray,
+                original_image: np.ndarray) -> List[Dict]:
+        """
+        Extract regions from binary mask
+        Args:
+            binary_mask: Refined binary mask (H, W)
+            probability_map: Original probability map (H, W)
+            original_image: Original image (H, W, 3)
+        Returns:
+            List of region dictionaries with bounding box, mask, image, confidence
         """
+        regions = []
+        print(f"[REGION_EXTRACT] Input shapes:")
+        print(f"  - binary_mask: {binary_mask.shape}")
+        print(f"  - probability_map: {probability_map.shape}")
+        print(f"  - original_image: {original_image.shape}")
+        # Safety check: Ensure probability_map and binary_mask have same dimensions
+        if probability_map.shape != binary_mask.shape:
+            print(f"[REGION_EXTRACT] WARNING: Shape mismatch! Resizing probability_map from {probability_map.shape} to {binary_mask.shape}")
+            import cv2
+            probability_map = cv2.resize(
+                probability_map,
+                (binary_mask.shape[1], binary_mask.shape[0]),
+                interpolation=cv2.INTER_LINEAR
+            )
+            print(f"[REGION_EXTRACT] After resize: probability_map shape = {probability_map.shape}")
+        # Connected component analysis (8-connectivity)
+        labeled_mask = label(binary_mask, connectivity=2)
+        props = regionprops(labeled_mask)
+        for region_id, prop in enumerate(props, start=1):
+            # Bounding box
+            y_min, x_min, y_max, x_max = prop.bbox
+            # Region mask
+            region_mask = (labeled_mask == region_id).astype(np.uint8)
+            # Cropped region image
+            region_image = original_image[y_min:y_max, x_min:x_max].copy()
+            region_mask_cropped = region_mask[y_min:y_max, x_min:x_max]
+            # Critical Fix #4: Region-level confidence aggregation
+            # Ensure region_mask and probability_map have same shape
+            if region_mask.shape != probability_map.shape:
+                import cv2
+                # Resize probability_map to match region_mask
+                probability_map = cv2.resize(
+                    probability_map,
+                    (region_mask.shape[1], region_mask.shape[0]),
+                    interpolation=cv2.INTER_LINEAR
+                )
+            region_probs = probability_map[region_mask > 0]
+            region_confidence = float(np.mean(region_probs)) if len(region_probs) > 0 else 0.0
+            regions.append({
+                'region_id': region_id,
+                'bounding_box': [int(x_min), int(y_min),
+                               int(x_max - x_min), int(y_max - y_min)],
+                'area': prop.area,
+                'centroid': (int(prop.centroid[1]), int(prop.centroid[0])),
+                'region_mask': region_mask,
+                'region_mask_cropped': region_mask_cropped,
+                'region_image': region_image,
+                'confidence': region_confidence,
+                'mask_probability_mean': region_confidence
+            })
+        return regions
+    def extract_for_casia(self,
+                          binary_mask: np.ndarray,
+                          probability_map: np.ndarray,
+                          original_image: np.ndarray) -> List[Dict]:
         """
+        Critical Fix #6: CASIA handling - treat entire image as one region
+        Args:
+            binary_mask: Binary mask (may be empty for authentic images)
+            probability_map: Probability map
+            original_image: Original image
+        Returns:
+            Single region representing entire image
         """
+        h, w = original_image.shape[:2]
+        # Create single region covering entire image
+        region_mask = np.ones((h, w), dtype=np.uint8)
+        # Overall confidence from probability map
+        overall_confidence = float(np.mean(probability_map))
+        return [{
+            'region_id': 1,
+            'bounding_box': [0, 0, w, h],
+            'area': h * w,
+            'centroid': (w // 2, h // 2),
+            'region_mask': region_mask,
+            'region_mask_cropped': region_mask,
+            'region_image': original_image,
+            'confidence': overall_confidence,
+            'mask_probability_mean': overall_confidence
+        }]
+def get_mask_refiner(config, dataset_name: str = 'default') -> MaskRefiner:
+    """Factory function for mask refiner"""
+    return MaskRefiner(config, dataset_name)
+def get_region_extractor(config, dataset_name: str = 'default') -> RegionExtractor:
+    """Factory function for region extractor"""
+    return RegionExtractor(config, dataset_name)