Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 21

Commit

ad645ee

1 Parent(s): e192748

Update app.py

Browse files

Files changed (1) hide show

app.py +525 -781

app.py CHANGED Viewed

@@ -1,798 +1,542 @@
-import gradio as gr
 import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from PIL import Image
 import numpy as np
-from torchvision import transforms
-import os
-from typing import Tuple, Optional
-# ===== VERSION 2.2 - COMPLETE IMPLEMENTATION WITH TRANSPARENCY FIX =====
-print("===== MYAVATARS.DK VERSION 2.2 - FULL IMPLEMENTATION =====")
-print("===== TRANSPARENCY FIX INCLUDED INLINE =====")
-print("===== CACHE BUSTER ACTIVE =====")
-# Fix OMP_NUM_THREADS warning
-os.environ['OMP_NUM_THREADS'] = '1'
-# Force CPU usage and disable CUDA
-os.environ['CUDA_VISIBLE_DEVICES'] = ''
-device = torch.device('cpu')
-# ============================================================================
-# PREPROCESSING AND POSTPROCESSING FUNCTIONS (INLINE FOR CACHE ISSUES)
-# ============================================================================
-def preprocess_image(im: Image.Image, model_input_size: list) -> torch.Tensor:
-    """
-    Preprocess image for model input.
-    Fixed version that maintains proper tensor dimensions.
-    """
-    if len(im.shape) < 3:
-        im = im.convert('RGB')
-    # Create transform pipeline
-    transform = transforms.Compose([
-        transforms.Resize(model_input_size),
-        transforms.ToTensor(),
-        transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                           std=[0.229, 0.224, 0.225])
-    ])
-    im_tensor = transform(im)
-    im_tensor = im_tensor.unsqueeze(0)  # Add batch dimension
-    return im_tensor
-def postprocess_image(result: torch.Tensor, im_size: list) -> Image.Image:
-    """
-    Postprocess model output to final image.
-    FIXED: Returns proper alpha channel instead of inverted mask.
-    """
-    result = result.squeeze(0)  # Remove batch dimension if present
-    if result.dim() == 3 and result.shape[0] == 1:
-        result = result.squeeze(0)  # Remove channel dimension for single channel
-    # Convert to numpy and ensure values are in [0, 1]
-    result_np = result.detach().cpu().numpy()
-    # CRITICAL FIX: Model outputs foreground probability (0=background, 1=foreground)
-    # We need alpha channel where 255=opaque (foreground), 0=transparent (background)
-    # So we DON'T invert - we scale directly to 0-255
-    result_np = (result_np * 255).astype(np.uint8)
-    # Create PIL Image and resize to original dimensions
-    pil_im = Image.fromarray(result_np, mode='L')
-    pil_im = pil_im.resize(im_size, Image.LANCZOS)
-    return pil_im
-# ============================================================================
-# MODEL ARCHITECTURE DEFINITION
-# ============================================================================
-class REBNCONV(nn.Module):
-    def __init__(self, in_ch=3, out_ch=3, dirate=1, stride=1):
-        super(REBNCONV, self).__init__()
-        self.conv_s1 = nn.Conv2d(in_ch, out_ch, 3, padding=1*dirate, dilation=1*dirate, stride=stride)
-        self.bn_s1 = nn.BatchNorm2d(out_ch)
-        self.relu_s1 = nn.ReLU(inplace=True)
-    def forward(self, x):
-        hx = x
-        xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
-        return xout
-def _upsample_like(src, tar):
-    src = F.interpolate(src, size=tar.shape[2:], mode='bilinear', align_corners=False)
-    return src
-class RSU7(nn.Module):
-    def __init__(self, in_ch=3, mid_ch=12, out_ch=3, img_size=512):
-        super(RSU7, self).__init__()
-        self.in_ch = in_ch
-        self.mid_ch = mid_ch
-        self.out_ch = out_ch
-        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
-        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
-        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2)
-        self.rebnconv6d = REBNCONV(mid_ch*2, mid_ch, dirate=1)
-        self.rebnconv5d = REBNCONV(mid_ch*2, mid_ch, dirate=1)
-        self.rebnconv4d = REBNCONV(mid_ch*2, mid_ch, dirate=1)
-        self.rebnconv3d = REBNCONV(mid_ch*2, mid_ch, dirate=1)
-        self.rebnconv2d = REBNCONV(mid_ch*2, mid_ch, dirate=1)
-        self.rebnconv1d = REBNCONV(mid_ch*2, out_ch, dirate=1)
-    def forward(self, x):
-        b, c, h, w = x.shape
-        hx = x
-        hxin = self.rebnconvin(hx)
-        hx1 = self.rebnconv1(hxin)
-        hx = self.pool1(hx1)
-        hx2 = self.rebnconv2(hx)
-        hx = self.pool2(hx2)
-        hx3 = self.rebnconv3(hx)
-        hx = self.pool3(hx3)
-        hx4 = self.rebnconv4(hx)
-        hx = self.pool4(hx4)
-        hx5 = self.rebnconv5(hx)
-        hx = self.pool5(hx5)
-        hx6 = self.rebnconv6(hx)
-        hx7 = self.rebnconv7(hx6)
-        hx6d = self.rebnconv6d(torch.cat((hx7, hx6), 1))
-        hx6dup = _upsample_like(hx6d, hx5)
-        hx5d = self.rebnconv5d(torch.cat((hx6dup, hx5), 1))
-        hx5dup = _upsample_like(hx5d, hx4)
-        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
-        hx4dup = _upsample_like(hx4d, hx3)
-        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
-        hx3dup = _upsample_like(hx3d, hx2)
-        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
-        hx2dup = _upsample_like(hx2d, hx1)
-        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
-        return hx1d + hxin
-class RSU6(nn.Module):
-    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
-        super(RSU6, self).__init__()
-        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
-        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
-        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2)
-        self.rebnconv5d = REBNCONV(mid_ch*2, mid_ch, dirate=1)
-        self.rebnconv4d = REBNCONV(mid_ch*2, mid_ch, dirate=1)
-        self.rebnconv3d = REBNCONV(mid_ch*2, mid_ch, dirate=1)
-        self.rebnconv2d = REBNCONV(mid_ch*2, mid_ch, dirate=1)
-        self.rebnconv1d = REBNCONV(mid_ch*2, out_ch, dirate=1)
-    def forward(self, x):
-        hx = x
-        hxin = self.rebnconvin(hx)
-        hx1 = self.rebnconv1(hxin)
-        hx = self.pool1(hx1)
-        hx2 = self.rebnconv2(hx)
-        hx = self.pool2(hx2)
-        hx3 = self.rebnconv3(hx)
-        hx = self.pool3(hx3)
-        hx4 = self.rebnconv4(hx)
-        hx = self.pool4(hx4)
-        hx5 = self.rebnconv5(hx)
-        hx6 = self.rebnconv6(hx5)
-        hx5d = self.rebnconv5d(torch.cat((hx6, hx5), 1))
-        hx5dup = _upsample_like(hx5d, hx4)
-        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
-        hx4dup = _upsample_like(hx4d, hx3)
-        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
-        hx3dup = _upsample_like(hx3d, hx2)
-        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
-        hx2dup = _upsample_like(hx2d, hx1)
-        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
-        return hx1d + hxin
-class RSU5(nn.Module):
-    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
-        super(RSU5, self).__init__()
-        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
-        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
-        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2)
-        self.rebnconv4d = REBNCONV(mid_ch*2, mid_ch, dirate=1)
-        self.rebnconv3d = REBNCONV(mid_ch*2, mid_ch, dirate=1)
-        self.rebnconv2d = REBNCONV(mid_ch*2, mid_ch, dirate=1)
-        self.rebnconv1d = REBNCONV(mid_ch*2, out_ch, dirate=1)
-    def forward(self, x):
-        hx = x
-        hxin = self.rebnconvin(hx)
-        hx1 = self.rebnconv1(hxin)
-        hx = self.pool1(hx1)
-        hx2 = self.rebnconv2(hx)
-        hx = self.pool2(hx2)
-        hx3 = self.rebnconv3(hx)
-        hx = self.pool3(hx3)
-        hx4 = self.rebnconv4(hx)
-        hx5 = self.rebnconv5(hx4)
-        hx4d = self.rebnconv4d(torch.cat((hx5, hx4), 1))
-        hx4dup = _upsample_like(hx4d, hx3)
-        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
-        hx3dup = _upsample_like(hx3d, hx2)
-        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
-        hx2dup = _upsample_like(hx2d, hx1)
-        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
-        return hx1d + hxin
-class RSU4(nn.Module):
-    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
-        super(RSU4, self).__init__()
-        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
-        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
-        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
-        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2)
-        self.rebnconv3d = REBNCONV(mid_ch*2, mid_ch, dirate=1)
-        self.rebnconv2d = REBNCONV(mid_ch*2, mid_ch, dirate=1)
-        self.rebnconv1d = REBNCONV(mid_ch*2, out_ch, dirate=1)
-    def forward(self, x):
-        hx = x
-        hxin = self.rebnconvin(hx)
-        hx1 = self.rebnconv1(hxin)
-        hx = self.pool1(hx1)
-        hx2 = self.rebnconv2(hx)
-        hx = self.pool2(hx2)
-        hx3 = self.rebnconv3(hx)
-        hx4 = self.rebnconv4(hx3)
-        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
-        hx3dup = _upsample_like(hx3d, hx2)
-        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
-        hx2dup = _upsample_like(hx2d, hx1)
-        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
-        return hx1d + hxin
-class RSU4F(nn.Module):
-    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
-        super(RSU4F, self).__init__()
-        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
-        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
-        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2)
-        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4)
-        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8)
-        self.rebnconv3d = REBNCONV(mid_ch*2, mid_ch, dirate=4)
-        self.rebnconv2d = REBNCONV(mid_ch*2, mid_ch, dirate=2)
-        self.rebnconv1d = REBNCONV(mid_ch*2, out_ch, dirate=1)
-    def forward(self, x):
-        hx = x
-        hxin = self.rebnconvin(hx)
-        hx1 = self.rebnconv1(hxin)
-        hx2 = self.rebnconv2(hx1)
-        hx3 = self.rebnconv3(hx2)
-        hx4 = self.rebnconv4(hx3)
-        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
-        hx2d = self.rebnconv2d(torch.cat((hx3d, hx2), 1))
-        hx1d = self.rebnconv1d(torch.cat((hx2d, hx1), 1))
-        return hx1d + hxin
-class BriaRMBG(nn.Module):
-    """
-    BRIA RMBG Model for background removal.
-    """
-    def __init__(self, config=None):
-        super(BriaRMBG, self).__init__()
-        in_ch = 3
-        out_ch = 1
-        self.conv_in = nn.Conv2d(in_ch, 64, 3, stride=2, padding=1)
-        self.pool_in = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage1 = RSU7(64, 32, 64)
-        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage2 = RSU6(64, 32, 128)
-        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage3 = RSU5(128, 64, 256)
-        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage4 = RSU4(256, 128, 512)
-        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage5 = RSU4F(512, 256, 512)
-        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
-        self.stage6 = RSU4F(512, 256, 512)
-        # decoder
-        self.stage5d = RSU4F(1024, 256, 512)
-        self.stage4d = RSU4(1024, 128, 256)
-        self.stage3d = RSU5(512, 64, 128)
-        self.stage2d = RSU6(256, 32, 64)
-        self.stage1d = RSU7(128, 16, 64)
-        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
-        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
-        self.side3 = nn.Conv2d(128, out_ch, 3, padding=1)
-        self.side4 = nn.Conv2d(256, out_ch, 3, padding=1)
-        self.side5 = nn.Conv2d(512, out_ch, 3, padding=1)
-        self.side6 = nn.Conv2d(512, out_ch, 3, padding=1)
-        self.outconv = nn.Conv2d(6, out_ch, 1)
-    def forward(self, x):
-        hx = x
-        hxin = self.conv_in(hx)
-        hxin = self.pool_in(hxin)
-        # stage 1
-        hx1 = self.stage1(hxin)
-        hx = self.pool12(hx1)
-        # stage 2
-        hx2 = self.stage2(hx)
-        hx = self.pool23(hx2)
-        # stage 3
-        hx3 = self.stage3(hx)
-        hx = self.pool34(hx3)
-        # stage 4
-        hx4 = self.stage4(hx)
-        hx = self.pool45(hx4)
-        # stage 5
-        hx5 = self.stage5(hx)
-        hx = self.pool56(hx5)
-        # stage 6
-        hx6 = self.stage6(hx)
-        hx6up = _upsample_like(hx6, hx5)
-        # decoder
-        hx5d = self.stage5d(torch.cat((hx6up, hx5), 1))
-        hx5dup = _upsample_like(hx5d, hx4)
-        hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1))
-        hx4dup = _upsample_like(hx4d, hx3)
-        hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1))
-        hx3dup = _upsample_like(hx3d, hx2)
-        hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1))
-        hx2dup = _upsample_like(hx2d, hx1)
-        hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1))
-        # side output
-        d1 = self.side1(hx1d)
-        d1 = _upsample_like(d1, x)
-        d2 = self.side2(hx2d)
-        d2 = _upsample_like(d2, x)
-        d3 = self.side3(hx3d)
-        d3 = _upsample_like(d3, x)
-        d4 = self.side4(hx4d)
-        d4 = _upsample_like(d4, x)
-        d5 = self.side5(hx5d)
-        d5 = _upsample_like(d5, x)
-        d6 = self.side6(hx6)
-        d6 = _upsample_like(d6, x)
-        d0 = self.outconv(torch.cat((d1, d2, d3, d4, d5, d6), 1))
-        return torch.sigmoid(d0), torch.sigmoid(d1), torch.sigmoid(d2), torch.sigmoid(d3), torch.sigmoid(d4), torch.sigmoid(d5), torch.sigmoid(d6)
-# ============================================================================
-# MODEL LOADING AND INITIALIZATION
-# ============================================================================
-print("Loading BRIA RMBG model...")
-# Load the model
-model_path = "./model.pth"
-if not os.path.exists(model_path):
-    print("Model not found locally, downloading from HuggingFace...")
-    from huggingface_hub import hf_hub_download
-    model_path = hf_hub_download(
-        repo_id="briaai/RMBG-1.4",
-        filename="model.pth",
-        repo_type="model"
-    )
-    print(f"Model downloaded to: {model_path}")
-# Initialize model
-net = BriaRMBG()
-# Load state dict with error handling
-try:
-    state_dict = torch.load(model_path, map_location=device)
-    # Check if we need to adjust the state dict
-    if 'outconv.weight' not in state_dict:
-        print("Adjusting model state dict keys...")
-        # The model might have different key names, let's check
-        for key in list(state_dict.keys()):
-            if 'outconv' in key:
-                print(f"Found outconv key: {key}")
-    net.load_state_dict(state_dict, strict=False)
-    print("Model weights loaded successfully!")
-except Exception as e:
-    print(f"Warning: Could not load all model weights: {e}")
-    print("Attempting to load with strict=False...")
     try:
-        net.load_state_dict(torch.load(model_path, map_location=device), strict=False)
-        print("Model loaded with strict=False")
-    except Exception as e2:
-        print(f"Error loading model: {e2}")
         raise
-net.to(device)
-net.eval()
-print("Model loaded successfully!")
-# ============================================================================
-# IMAGE PROCESSING FUNCTION
-# ============================================================================
-def process_image(input_image):
     """
-    Main function to process images and remove background.
-    Returns RGBA image with transparent background.
     """
-    if input_image is None:
-        return None
-    print(f"Processing image... Original type: {type(input_image)}")
-    # Convert to PIL Image if needed
-    if isinstance(input_image, np.ndarray):
-        input_image = Image.fromarray(input_image)
-        print("Converted numpy array to PIL Image")
-    # Ensure RGB mode
-    if input_image.mode != 'RGB':
-        input_image = input_image.convert('RGB')
-        print(f"Converted image from {input_image.mode} to RGB")
-    # Get original size
-    orig_size = input_image.size
-    print(f"Original image size: {orig_size}")
-    # Preprocess
-    model_input = preprocess_image(input_image, [1024, 1024])
-    print(f"Model input shape: {model_input.shape}")
-    # Run model
-    with torch.no_grad():
-        preds = net(model_input)[-1]
-    print(f"Model output shape: {preds.shape}")
-    # Postprocess with alpha channel support
-    pred = postprocess_image(preds[0], orig_size)
-    print(f"Postprocessed mask size: {pred.size}")
-    # Convert to numpy array
-    pred_np = np.array(pred)
-    print(f"Mask values - Min: {pred_np.min()}, Max: {pred_np.max()}, Mean: {pred_np.mean():.2f}")
-    # Create RGBA output with fixed transparency
-    output = np.zeros((*pred_np.shape[:2], 4), dtype=np.uint8)
-    # Copy RGB channels
-    input_np = np.array(input_image)
-    output[:, :, :3] = input_np
-    # FIXED: Set alpha channel directly from prediction
-    # The model outputs values 0-255, we use them directly
-    output[:, :, 3] = pred_np
-    print(f"Output shape: {output.shape}")
-    print(f"Alpha channel - Min: {output[:,:,3].min()}, Max: {output[:,:,3].max()}")
-    # Convert to PIL Image with RGBA
-    output_image = Image.fromarray(output, mode='RGBA')
-    print("Successfully created RGBA image with transparent background")
-    return output_image
-# ============================================================================
-# GRADIO INTERFACE
-# ============================================================================
-# Custom CSS with MyAvatars.dk branding
-custom_css = """
-.logo-container {
-    text-align: center;
-    padding: 25px 0;
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    border-radius: 15px;
-    margin-bottom: 25px;
-    box-shadow: 0 10px 30px rgba(0,0,0,0.2);
-}
-.logo-title {
-    color: white;
-    font-size: 3em;
-    font-weight: bold;
-    text-shadow: 3px 3px 6px rgba(0,0,0,0.3);
-    margin-bottom: 10px;
-}
-.logo-subtitle {
-    color: rgba(255,255,255,0.95);
-    font-size: 1.3em;
-    margin-top: 10px;
-    font-weight: 300;
-}
-.powered-by {
-    text-align: center;
-    color: #666;
-    font-size: 0.9em;
-    margin-top: 20px;
-    padding: 10px;
-    background: rgba(0,0,0,0.05);
-    border-radius: 5px;
-}
-.features-grid {
-    display: grid;
-    grid-template-columns: repeat(3, 1fr);
-    gap: 20px;
-    margin: 20px 0;
-}
-.feature-card {
-    text-align: center;
-    padding: 15px;
-    background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
-    border-radius: 10px;
-    box-shadow: 0 5px 15px rgba(0,0,0,0.1);
-}
-.feature-icon {
-    font-size: 2em;
-    margin-bottom: 10px;
-}
-.feature-title {
-    font-weight: bold;
-    color: #333;
-    margin-bottom: 5px;
-}
-.feature-desc {
-    color: #666;
-    font-size: 0.9em;
-}
-"""
-print("Creating Gradio interface...")
-# Create Gradio interface with logo and enhanced UI
-with gr.Blocks(css=custom_css, title="MyAvatars.dk - AI Background Remover") as demo:
-    # Logo header
-    gr.HTML("""
-        <div class="logo-container">
-            <div class="logo-title">🎨 MyAvatars.dk</div>
-            <div class="logo-subtitle">Professional AI-Powered Background Removal</div>
-        </div>
-    """)
-    # Features grid
-    gr.HTML("""
-        <div class="features-grid">
-            <div class="feature-card">
-                <div class="feature-icon">⚡</div>
-                <div class="feature-title">Lightning Fast</div>
-                <div class="feature-desc">Process images in seconds</div>
-            </div>
-            <div class="feature-card">
-                <div class="feature-icon">🎯</div>
-                <div class="feature-title">High Precision</div>
-                <div class="feature-desc">AI-powered edge detection</div>
-            </div>
-            <div class="feature-card">
-                <div class="feature-icon">🔒</div>
-                <div class="feature-title">Privacy First</div>
-                <div class="feature-desc">Images processed locally</div>
-            </div>
-        </div>
-    """)
-    gr.Markdown("## Remove backgrounds instantly with state-of-the-art AI")
-    gr.Markdown("Upload any image and get a perfect transparent background version. Ideal for avatars, product photos, and creative projects!")
-    with gr.Row():
-        with gr.Column():
-            input_image = gr.Image(
-                label="📤 Upload Image",
-                type="pil",
-                height=400,
-                elem_id="input_image"
-            )
-            with gr.Row():
-                clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm")
-                process_btn = gr.Button("✨ Remove Background", variant="primary", size="lg")
-        with gr.Column():
-            output_image = gr.Image(
-                label="📥 Result (Transparent Background)",
-                type="pil",
-                height=400,
-                image_mode="RGBA",
-                elem_id="output_image"
-            )
-            gr.Markdown("""
-            ### 💡 Tips for best results:
-            - Use high-quality images with clear subjects
-            - Ensure good contrast between subject and background
-            - Works best with people, objects, and products
-            """)
-    # Examples section (commented out if no example images available)
-    # gr.Markdown("### 🖼️ Try with examples:")
-    # gr.Examples(
-    #     examples=[
-    #         ["./input.jpg"]
-    #     ],
-    #     inputs=input_image,
-    #     outputs=output_image,
-    #     fn=process_image,
-    #     cache_examples=True
-    # )
-    # Footer with version info
-    gr.HTML("""
-        <div class="powered-by">
-            <strong>Powered by BRIA RMBG 1.4</strong> | Version 2.2 | Cache Buster Active<br>
-            <small>© 2024 MyAvatars.dk - Professional Avatar Solutions</small>
-        </div>
-    """)
-    # Instructions
-    with gr.Accordion("📖 How to use", open=False):
         gr.Markdown("""
-        1. **Upload an image** using the upload area or drag & drop
-        2. **Click "Remove Background"** to process the image
-        3. **Download the result** with transparent background
-        4. **Use the result** in your projects, presentations, or as avatars
-        **Supported formats:** JPG, PNG, WebP
-        **Max resolution:** 4096x4096 pixels
-        **Output format:** PNG with transparency (RGBA)
         """)
-    # Event handlers
-    process_btn.click(
-        fn=process_image,
-        inputs=[input_image],
-        outputs=[output_image]
-    )
-    clear_btn.click(
-        fn=lambda: (None, None),
-        inputs=[],
-        outputs=[input_image, output_image]
-    )
-    # Add keyboard shortcut
-    input_image.change(
-        fn=lambda x: gr.update(interactive=x is not None),
-        inputs=[input_image],
-        outputs=[process_btn]
-    )
-print("=" * 80)
-print("MYAVATARS.DK BACKGROUND REMOVER - VERSION 2.2")
-print("=" * 80)
-print("Application initialized successfully!")
-print("Features enabled:")
-print("  ✓ Transparent background removal")
-print("  ✓ High-quality edge detection")
-print("  ✓ RGBA output support")
-print("  ✓ CPU-optimized processing")
-print("  ✓ Professional UI with branding")
-print("=" * 80)
-# Launch the application - HuggingFace Spaces compatible
 if __name__ == "__main__":
-    print("Launching Gradio interface...")
-    import sys
-    if "google.colab" in sys.modules:
-        demo.launch(debug=True)
-    else:
-        # For HuggingFace Spaces
-        demo.launch(server_name="0.0.0.0", server_port=7860)

+#!/usr/bin/env python3
+# ========================= PRE-IMPORT ENV GUARDS =========================
+import os
+os.environ.pop("OMP_NUM_THREADS", None)
+os.environ.setdefault("MKL_NUM_THREADS", "1")
+os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")
+os.environ.setdefault("VECLIB_MAXIMUM_THREADS", "1")
+os.environ.setdefault("NUMEXPR_NUM_THREADS", "1")
+# ========================= IMPORTS =========================
+import gc
+import sys
+import cv2
 import torch
 import numpy as np
+import gradio as gr
+import tempfile
+import time
+from pathlib import Path
+import logging
+import traceback
+from datetime import datetime
+import psutil
+import warnings
+warnings.filterwarnings("ignore")
+# Import the properly implemented functions from utilities
+from utilities import (
+    segment_person_hq,
+    refine_mask_hq,
+    replace_background_hq,
+    load_background_image,
+    resize_background_to_match,
+    apply_temporal_smoothing,
+    smooth_edges,
+    estimate_foreground
+)
+# Import two-stage processor for advanced mode
+from two_stage_processor import TwoStageProcessor
+# Import UI components
+from ui_components import create_ui, get_example_videos, get_example_backgrounds
+# ========================= LOGGING SETUP =========================
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# ========================= GPU/DEVICE SETUP =========================
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+logger.info(f"Using device: {device}")
+if device.type == "cuda":
+    torch.cuda.empty_cache()
+    # Optimize CUDA settings for memory efficiency
+    torch.backends.cudnn.benchmark = False
+    torch.backends.cudnn.deterministic = True
+    torch.cuda.set_per_process_memory_fraction(0.8)  # Limit to 80% of VRAM
+# ========================= GLOBAL MODELS =========================
+# Models will be loaded on demand to save RAM
+sam2_model = None
+matta_model = None
+two_stage_processor = None
+# ========================= MODEL LOADING =========================
+def load_models_on_demand(use_two_stage=False):
+    """Load models only when needed, with proper memory management"""
+    global sam2_model, matta_model, two_stage_processor
     try:
+        # Clear any existing models first
+        clear_models_from_memory()
+        if use_two_stage and two_stage_processor is None:
+            logger.info("Loading Two-Stage Processor (SAM2 + MattA)...")
+            two_stage_processor = TwoStageProcessor(device=device)
+            logger.info("Two-Stage Processor loaded successfully")
+        elif not use_two_stage:
+            # Load individual models for single-stage processing
+            if sam2_model is None:
+                logger.info("Loading SAM2 model...")
+                # This should be imported from your SAM2 implementation
+                from sam2_integration import load_sam2_model
+                sam2_model = load_sam2_model(device=device)
+                logger.info("SAM2 model loaded")
+            if matta_model is None:
+                logger.info("Loading MattingAnything model...")
+                # This should be imported from your MattA implementation
+                from matta_integration import load_matta_model
+                matta_model = load_matta_model(device=device)
+                logger.info("MattingAnything model loaded")
+        # Force garbage collection after loading
+        gc.collect()
+        if device.type == "cuda":
+            torch.cuda.empty_cache()
+    except Exception as e:
+        logger.error(f"Error loading models: {str(e)}")
         raise
+def clear_models_from_memory():
+    """Clear models from memory to free up RAM"""
+    global sam2_model, matta_model, two_stage_processor
+    if sam2_model is not None:
+        del sam2_model
+        sam2_model = None
+    if matta_model is not None:
+        del matta_model
+        matta_model = None
+    if two_stage_processor is not None:
+        del two_stage_processor
+        two_stage_processor = None
+    gc.collect()
+    if device.type == "cuda":
+        torch.cuda.empty_cache()
+# ========================= MEMORY MONITORING =========================
+def log_memory_usage(stage=""):
+    """Log current memory usage"""
+    process = psutil.Process()
+    mem_info = process.memory_info()
+    ram_usage = mem_info.rss / 1024 / 1024 / 1024  # GB
+    if device.type == "cuda":
+        vram_usage = torch.cuda.memory_allocated() / 1024 / 1024 / 1024  # GB
+        vram_reserved = torch.cuda.memory_reserved() / 1024 / 1024 / 1024  # GB
+        logger.info(f"[{stage}] RAM: {ram_usage:.2f}GB | VRAM: {vram_usage:.2f}GB (reserved: {vram_reserved:.2f}GB)")
+    else:
+        logger.info(f"[{stage}] RAM: {ram_usage:.2f}GB")
+# ========================= PROGRESS TRACKING =========================
+def write_progress_info(info_dict):
+    """Write formatted progress information to temp file for UI display"""
+    try:
+        progress_file = "/tmp/processing_info.txt"
+        with open(progress_file, "w") as f:
+            if "error" in info_dict:
+                f.write(f"❌ ERROR\n{info_dict['error']}\n")
+            elif "complete" in info_dict:
+                f.write(f"✅ COMPLETE\n")
+                f.write(f"Total Frames: {info_dict.get('total_frames', 'N/A')}\n")
+                f.write(f"Processing Time: {info_dict.get('time', 'N/A')}\n")
+                f.write(f"Average FPS: {info_dict.get('fps', 'N/A')}\n")
+                f.write(f"Resolution: {info_dict.get('resolution', 'N/A')}\n")
+                f.write(f"Background: {info_dict.get('background', 'N/A')}\n")
+            else:
+                f.write(f"📊 PROCESSING STATUS\n")
+                f.write(f"━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
+                f.write(f"🎬 Frame {info_dict.get('current_frame', 0)}/{info_dict.get('total_frames', 0)}\n")
+                f.write(f"⏱️ Elapsed: {info_dict.get('elapsed', '0s')}\n")
+                f.write(f"⚡ Speed: {info_dict.get('speed', '0')} fps\n")
+                f.write(f"🎯 ETA: {info_dict.get('eta', 'calculating...')}\n")
+                f.write(f"━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
+                f.write(f"📈 Progress: {info_dict.get('progress', 0):.1f}%\n")
+    except Exception as e:
+        logger.error(f"Error writing progress: {e}")
+# ========================= MAIN PROCESSING FUNCTION =========================
+def process_video(
+    input_video,
+    background_image,
+    use_two_stage=False,
+    use_mask_refinement=True,
+    use_temporal_smoothing=True,
+    mask_blur=5,
+    edge_smoothing=5,
+    background_type="Color",
+    background_color="#00FF00",
+    progress=gr.Progress()
+):
     """
+    Main video processing function with proper SAM2+MattA integration
     """
+    temp_dir = None
+    cap = None
+    out = None
+    start_time = time.time()
+    try:
+        # Initial setup
+        logger.info("Starting video processing...")
+        log_memory_usage("Start")
+        # Validate inputs
+        if input_video is None:
+            raise ValueError("No input video provided")
+        # Load models based on processing mode
+        load_models_on_demand(use_two_stage=use_two_stage)
+        log_memory_usage("Models Loaded")
+        # Setup video capture
+        cap = cv2.VideoCapture(input_video)
+        if not cap.isOpened():
+            raise ValueError(f"Failed to open video: {input_video}")
+        # Get video properties
+        fps = int(cap.get(cv2.CAP_PROP_FPS))
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        logger.info(f"Video info: {width}x{height}, {fps} fps, {total_frames} frames")
+        # Prepare background
+        if background_type == "Color":
+            background = np.full((height, width, 3),
+                               tuple(int(background_color[i:i+2], 16) for i in (5, 3, 1)),
+                               dtype=np.uint8)
+        elif background_type == "Image" and background_image is not None:
+            background = load_background_image(background_image)
+            background = resize_background_to_match(background, (width, height))
+        elif background_type == "Blur":
+            # Will be handled per frame
+            background = None
+        else:
+            background = np.full((height, width, 3), (0, 255, 0), dtype=np.uint8)
+        # Setup output video
+        temp_dir = tempfile.mkdtemp()
+        output_path = os.path.join(temp_dir, "output_video.mp4")
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+        # Process frames
+        frame_idx = 0
+        processed_frames = []
+        masks_history = []  # For temporal smoothing
+        # Batch processing for memory efficiency
+        BATCH_SIZE = 10 if device.type == "cuda" else 5
+        frame_batch = []
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame_batch.append(frame)
+            # Process batch when full or at end
+            if len(frame_batch) == BATCH_SIZE or frame_idx == total_frames - 1:
+                for batch_frame in frame_batch:
+                    # Update progress
+                    progress(frame_idx / total_frames, f"Processing frame {frame_idx}/{total_frames}")
+                    # Calculate and write detailed progress info
+                    elapsed_time = time.time() - start_time
+                    if frame_idx > 0:
+                        fps_current = frame_idx / elapsed_time
+                        eta = (total_frames - frame_idx) / fps_current
+                        write_progress_info({
+                            'current_frame': frame_idx,
+                            'total_frames': total_frames,
+                            'elapsed': f"{elapsed_time:.1f}s",
+                            'speed': f"{fps_current:.1f}",
+                            'eta': f"{eta:.0f}s",
+                            'progress': (frame_idx / total_frames) * 100
+                        })
+                    # Process frame based on mode
+                    if use_two_stage:
+                        # Use integrated two-stage processor
+                        processed_frame, mask = two_stage_processor.process_frame(
+                            batch_frame,
+                            background if background is not None else batch_frame,
+                            use_refinement=use_mask_refinement,
+                            mask_blur=mask_blur
+                        )
+                    else:
+                        # Use utilities functions (properly implemented with transparency fix)
+                        # Step 1: Segment person using SAM2
+                        mask = segment_person_hq(batch_frame, sam2_model)
+                        # Step 2: Refine mask using MattA if enabled
+                        if use_mask_refinement and matta_model is not None:
+                            mask = refine_mask_hq(batch_frame, mask, matta_model)
+                        # Step 3: Apply temporal smoothing if enabled
+                        if use_temporal_smoothing and len(masks_history) > 0:
+                            mask = apply_temporal_smoothing(mask, masks_history, window_size=5)
+                        # Store mask for temporal smoothing
+                        masks_history.append(mask)
+                        if len(masks_history) > 10:  # Keep only recent masks
+                            masks_history.pop(0)
+                        # Step 4: Apply edge smoothing
+                        if edge_smoothing > 0:
+                            mask = smooth_edges(mask, edge_smoothing)
+                        # Step 5: Handle background
+                        if background_type == "Blur":
+                            background_frame = cv2.GaussianBlur(batch_frame, (21, 21), 0)
+                        else:
+                            background_frame = background
+                        # Step 6: Replace background with proper alpha handling
+                        processed_frame = replace_background_hq(
+                            batch_frame,
+                            mask,
+                            background_frame
+                        )
+                    # Write frame
+                    out.write(processed_frame)
+                    processed_frames.append(processed_frame)
+                    frame_idx += 1
+                    # Memory management - clear every 100 frames
+                    if frame_idx % 100 == 0:
+                        gc.collect()
+                        if device.type == "cuda":
+                            torch.cuda.empty_cache()
+                        log_memory_usage(f"Frame {frame_idx}")
+                # Clear batch
+                frame_batch = []
+        # Finalize
+        cap.release()
+        out.release()
+        # Write completion info
+        total_time = time.time() - start_time
+        avg_fps = total_frames / total_time if total_time > 0 else 0
+        write_progress_info({
+            'complete': True,
+            'total_frames': total_frames,
+            'time': f"{total_time:.1f}s",
+            'fps': f"{avg_fps:.1f}",
+            'resolution': f"{width}x{height}",
+            'background': background_type
+        })
+        logger.info(f"Processing complete: {total_frames} frames in {total_time:.1f}s ({avg_fps:.1f} fps)")
+        log_memory_usage("Complete")
+        return output_path
+    except Exception as e:
+        logger.error(f"Processing error: {str(e)}\n{traceback.format_exc()}")
+        write_progress_info({'error': str(e)})
+        raise gr.Error(f"Processing failed: {str(e)}")
+    finally:
+        # Cleanup
+        if cap is not None:
+            cap.release()
+        if out is not None:
+            out.release()
+        # Clear models to free memory
+        clear_models_from_memory()
+        # Final garbage collection
+        gc.collect()
+        if device.type == "cuda":
+            torch.cuda.empty_cache()
+# ========================= GRADIO APP =========================
+def create_app():
+    """Create and configure the Gradio application"""
+    with gr.Blocks(title="Video Background Replacement - SAM2+MattA", theme=gr.themes.Soft()) as app:
         gr.Markdown("""
+        # 🎬 Video Background Replacement
+        ### Powered by SAM2 + MattingAnything
+        Upload a video and replace the background with:
+        - 🎨 Solid colors
+        - 🖼️ Custom images
+        - 🌫️ Blurred background
+        **Two-Stage Mode**: Combines SAM2 segmentation with MattA refinement for best quality
         """)
+        with gr.Tabs():
+            with gr.TabItem("🎥 Process Video"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        input_video = gr.Video(label="Input Video", height=300)
+                        with gr.Accordion("⚙️ Processing Options", open=True):
+                            use_two_stage = gr.Checkbox(
+                                label="Use Two-Stage Processing (SAM2→MattA)",
+                                value=True,
+                                info="Better quality but slower"
+                            )
+                            use_mask_refinement = gr.Checkbox(
+                                label="Refine Masks",
+                                value=True,
+                                info="Use MattA for better edges"
+                            )
+                            use_temporal_smoothing = gr.Checkbox(
+                                label="Temporal Smoothing",
+                                value=True,
+                                info="Reduce flickering between frames"
+                            )
+                            mask_blur = gr.Slider(
+                                minimum=0,
+                                maximum=21,
+                                value=5,
+                                step=2,
+                                label="Mask Blur"
+                            )
+                            edge_smoothing = gr.Slider(
+                                minimum=0,
+                                maximum=21,
+                                value=5,
+                                step=2,
+                                label="Edge Smoothing"
+                            )
+                        with gr.Accordion("🎨 Background Options", open=True):
+                            background_type = gr.Radio(
+                                choices=["Color", "Image", "Blur"],
+                                value="Color",
+                                label="Background Type"
+                            )
+                            background_color = gr.ColorPicker(
+                                label="Background Color",
+                                value="#00FF00",
+                                visible=True
+                            )
+                            background_image = gr.Image(
+                                label="Background Image",
+                                type="filepath",
+                                visible=False
+                            )
+                            # Show/hide based on background type
+                            def update_background_inputs(bg_type):
+                                return (
+                                    gr.update(visible=bg_type == "Color"),
+                                    gr.update(visible=bg_type == "Image")
+                                )
+                            background_type.change(
+                                update_background_inputs,
+                                inputs=[background_type],
+                                outputs=[background_color, background_image]
+                            )
+                    with gr.Column(scale=1):
+                        output_video = gr.Video(label="Output Video", height=300)
+                        process_btn = gr.Button("🚀 Process Video", variant="primary", size="lg")
+                        processing_info = gr.Textbox(
+                            label="📊 Processing Info",
+                            lines=10,
+                            max_lines=15,
+                            interactive=False,
+                            placeholder="Processing status will appear here...",
+                            elem_id="processing-info"
+                        )
+                # Connect processing
+                process_btn.click(
+                    fn=process_video,
+                    inputs=[
+                        input_video,
+                        background_image,
+                        use_two_stage,
+                        use_mask_refinement,
+                        use_temporal_smoothing,
+                        mask_blur,
+                        edge_smoothing,
+                        background_type,
+                        background_color
+                    ],
+                    outputs=[output_video]
+                )
+            with gr.TabItem("📚 Examples"):
+                gr.Examples(
+                    examples=get_example_videos(),
+                    inputs=input_video,
+                    label="Sample Videos"
+                )
+                gr.Examples(
+                    examples=get_example_backgrounds(),
+                    inputs=background_image,
+                    label="Sample Backgrounds"
+                )
+            with gr.TabItem("ℹ️ About"):
+                gr.Markdown("""
+                ### Technology Stack
+                - **SAM2**: Segment Anything Model 2 for accurate person segmentation
+                - **MattingAnything**: Advanced alpha matting for refined edges
+                - **Two-Stage Processing**: Combines both models for optimal quality
+                ### Tips for Best Results
+                1. **Use Two-Stage Mode** for highest quality output
+                2. **Enable Temporal Smoothing** to reduce flickering
+                3. **Adjust Edge Smoothing** for softer transitions
+                4. **High contrast backgrounds** work best
+                ### Performance Notes
+                - Processing speed depends on video resolution and length
+                - GPU recommended for faster processing
+                - Two-stage mode is slower but produces better results
+                """)
+    return app
+# ========================= MAIN ENTRY POINT =========================
 if __name__ == "__main__":
+    try:
+        # Create and launch app
+        app = create_app()
+        # Configure for HuggingFace Spaces
+        app.queue(max_size=5)
+        app.launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=False,
+            debug=False,
+            show_error=True
+        )
+    except Exception as e:
+        logger.error(f"Failed to start application: {str(e)}")
+        traceback.print_exc()
+        sys.exit(1)