File size: 2,690 Bytes
717f2ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1bde97
717f2ec
c1bde97
 
717f2ec
 
c1bde97
 
717f2ec
 
 
 
 
 
 
3d4810a
c1bde97
717f2ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f73289b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
from PIL import Image
import numpy as np
from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
from matplotlib.colors import to_rgb
import re
import cv2

# Load model
processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")

def parse_color(color_str):
    """
    Converts a color string (hex, name, or rgba(...)) to an RGB tuple.
    """
    try:
        if isinstance(color_str, str):
            if color_str.startswith("rgba("):
                # Extract the 3 RGB components
                numbers = list(map(float, re.findall(r"[\d.]+", color_str)))
                if len(numbers) >= 3:
                    r, g, b = numbers[:3]
                    return int(r), int(g), int(b)
            else:
                # Use named or hex color
                return tuple(int(255 * c) for c in to_rgb(color_str))
    except Exception:
        pass
    raise ValueError(f"Invalid color format: {color_str}. Use hex like '#ff0000', color name like 'red', or rgba format.")

def apply_mask(image: Image.Image, prompt: str, color: str) -> Image.Image:
    # Process the input image and prompt
    inputs = processor(text=prompt, images=image, return_tensors="pt")
    outputs = model(**inputs)
    preds = outputs.logits[0]

    # Get the binary mask from predictions
    mask = preds.sigmoid().detach().cpu().numpy()
    mask = (mask > 0.5).astype(np.uint8)

    # Convert image to RGBA
    image_np = np.array(image.convert("RGBA"))

    # Resize mask to match image size
    mask_resized = cv2.resize(mask, (image_np.shape[1], image_np.shape[0]))
    mask_3d = np.stack([mask_resized] * 4, axis=-1)  # Extend mask to 3D

    # Convert the color string to an RGB tuple
    color_rgb = parse_color(color)
    overlay_color = np.array([*color_rgb, 128], dtype=np.uint8)  # RGBA with alpha 128

    # Create an overlay with the selected color
    overlay = np.zeros_like(image_np, dtype=np.uint8)
    overlay[:] = overlay_color

    # Apply the mask to the image
    masked_image = np.where(mask_3d == 1, overlay, image_np)
    return Image.fromarray(masked_image)

# Gradio Interface
iface = gr.Interface(
    fn=apply_mask,
    inputs=[
        gr.Image(type="pil", label="Input Image"),
        gr.Textbox(label="Segmentation Prompt", placeholder="e.g., helmet, road, sky"),
        gr.ColorPicker(label="Mask Color", value="#ff0000")
    ],
    outputs=gr.Image(type="pil", label="Segmented Image"),
    title="CLIPSeg Image Masking",
    description="Upload an image, input a prompt (e.g., 'person', 'sky'), and pick a mask color."
)

iface.launch()