File size: 2,332 Bytes
9532327
 
a6f1144
 
 
9532327
 
 
 
 
 
a6f1144
9532327
 
a6f1144
9532327
 
 
a6f1144
9532327
 
 
a6f1144
9532327
a6f1144
9532327
 
 
 
 
 
 
 
 
 
 
 
a6f1144
9532327
 
a6f1144
9532327
 
 
 
 
 
 
 
 
 
 
a6f1144
9532327
a6f1144
 
9532327
 
 
 
 
 
 
 
 
 
a6f1144
9532327
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import gradio as gr
import cv2
import numpy as np
import torch
from PIL import Image
from transformers import SegformerForSemanticSegmentation, SegformerFeatureExtractor

# Load Segformer model
model_name = "nvidia/segformer-b0-finetuned-ade-512-512"
model = SegformerForSemanticSegmentation.from_pretrained(model_name)
feature_extractor = SegformerFeatureExtractor.from_pretrained(model_name)

# Mug class index in ADE20K
MUG_CLASS_INDEX = 41  

def apply_blur(image, blur_type):
    image = Image.fromarray(image).convert("RGB")
    original_width, original_height = image.size

    # Preprocess the image for the model
    inputs = feature_extractor(images=image, return_tensors="pt")
    
    with torch.no_grad():
        outputs = model(**inputs)
    
    logits = outputs.logits
    predicted_mask = logits.argmax(dim=1).squeeze().cpu().numpy()
    
    # Confidence thresholding
    confidence_scores = torch.nn.functional.softmax(logits, dim=1)
    mug_confidence = confidence_scores[0, MUG_CLASS_INDEX].cpu().numpy()
    binary_mask = (mug_confidence > 0.08).astype(np.uint8) * 255  
    
    # Morphological operations
    kernel = np.ones((5, 5), np.uint8)
    binary_mask = cv2.dilate(binary_mask, kernel, iterations=2)
    binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)

    # Resize mask
    binary_mask_resized = cv2.resize(binary_mask, (original_width, original_height), interpolation=cv2.INTER_NEAREST)
    
    # Convert image to NumPy
    image_np = np.array(image)

    # Apply the selected blur type
    if blur_type == "Gaussian":
        blurred_image = cv2.GaussianBlur(image_np, (0, 0), sigmaX=15, sigmaY=15)
    elif blur_type == "Lens":
        blurred_image = cv2.blur(image_np, (15, 15))  # Simulating lens blur with a box filter

    # Combine sharp foreground with blurred background
    output_image = np.where(binary_mask_resized[..., None] == 255, image_np, blurred_image)
    
    return output_image

# Gradio UI
iface = gr.Interface(
    fn=apply_blur,
    inputs=[
        gr.Image(type="numpy"),
        gr.Radio(["Gaussian", "Lens"], label="Select Blur Type")
    ],
    outputs=gr.Image(type="numpy"),
    title="Blur Background for Mug Objects",
    description="Upload an image, and the model will blur the background while keeping the mug sharp."
)

iface.launch()