VideoBackgroundReplacer / utilities.py
MogensR's picture
Update utilities.py
5680088
raw
history blame
24.4 kB
#!/usr/bin/env python3
"""
utilities.py - Helper functions and utilities for Video Background Replacement
Contains all the utility functions, background creation functions
UPDATED: Models passed as parameters instead of globals
"""
import os
import cv2
import numpy as np
import torch
import requests
from PIL import Image, ImageDraw
import logging
import time
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Professional background templates
PROFESSIONAL_BACKGROUNDS = {
"office_modern": {
"name": "Modern Office",
"type": "gradient",
"colors": ["#f8f9fa", "#e9ecef", "#dee2e6"],
"direction": "diagonal",
"description": "Clean, contemporary office environment"
},
"office_executive": {
"name": "Executive Office",
"type": "gradient",
"colors": ["#2c3e50", "#34495e", "#5d6d7e"],
"direction": "vertical",
"description": "Professional executive setting"
},
"studio_blue": {
"name": "Professional Blue",
"type": "gradient",
"colors": ["#1e3c72", "#2a5298", "#3498db"],
"direction": "radial",
"description": "Broadcast-quality blue studio"
},
"studio_green": {
"name": "Broadcast Green",
"type": "color",
"colors": ["#00b894"],
"chroma_key": True,
"description": "Professional green screen replacement"
},
"conference": {
"name": "Conference Room",
"type": "gradient",
"colors": ["#74b9ff", "#0984e3", "#6c5ce7"],
"direction": "horizontal",
"description": "Modern conference room setting"
},
"minimalist": {
"name": "Minimalist White",
"type": "gradient",
"colors": ["#ffffff", "#f1f2f6", "#ddd"],
"direction": "soft_radial",
"description": "Clean, minimal background"
},
"warm_gradient": {
"name": "Warm Sunset",
"type": "gradient",
"colors": ["#ff7675", "#fd79a8", "#fdcb6e"],
"direction": "diagonal",
"description": "Warm, inviting atmosphere"
},
"cool_gradient": {
"name": "Cool Ocean",
"type": "gradient",
"colors": ["#74b9ff", "#0984e3", "#00cec9"],
"direction": "vertical",
"description": "Cool, calming ocean tones"
},
"corporate": {
"name": "Corporate Navy",
"type": "gradient",
"colors": ["#2d3436", "#636e72", "#74b9ff"],
"direction": "radial",
"description": "Corporate professional setting"
},
"creative": {
"name": "Creative Purple",
"type": "gradient",
"colors": ["#6c5ce7", "#a29bfe", "#fd79a8"],
"direction": "diagonal",
"description": "Creative, artistic environment"
},
"tech_dark": {
"name": "Tech Dark",
"type": "gradient",
"colors": ["#0c0c0c", "#2d3748", "#4a5568"],
"direction": "vertical",
"description": "Modern tech/gaming setup"
},
"nature_green": {
"name": "Nature Green",
"type": "gradient",
"colors": ["#27ae60", "#2ecc71", "#58d68d"],
"direction": "soft_radial",
"description": "Natural, organic background"
},
"luxury_gold": {
"name": "Luxury Gold",
"type": "gradient",
"colors": ["#f39c12", "#e67e22", "#d68910"],
"direction": "diagonal",
"description": "Premium, luxury setting"
},
"medical_clean": {
"name": "Medical Clean",
"type": "gradient",
"colors": ["#ecf0f1", "#bdc3c7", "#95a5a6"],
"direction": "horizontal",
"description": "Clean, medical/healthcare setting"
},
"education_blue": {
"name": "Education Blue",
"type": "gradient",
"colors": ["#3498db", "#5dade2", "#85c1e9"],
"direction": "vertical",
"description": "Educational, learning environment"
}
}
def segment_person_hq(image, predictor):
"""High-quality person segmentation using provided SAM2 predictor"""
try:
predictor.set_image(image)
h, w = image.shape[:2]
# Strategic point placement for person detection
points = np.array([
[w//2, h//4], # Top-center (head)
[w//2, h//2], # Center (torso)
[w//2, 3*h//4], # Bottom-center (legs)
[w//4, h//2], # Left-side (arm)
[3*w//4, h//2], # Right-side (arm)
[w//5, h//5], # Top-left (hair/accessories)
[4*w//5, h//5] # Top-right (hair/accessories)
])
labels = np.ones(len(points))
masks, scores, _ = predictor.predict(
point_coords=points,
point_labels=labels,
multimask_output=True
)
# Select best mask
best_idx = np.argmax(scores)
best_mask = masks[best_idx]
# Ensure proper format
if len(best_mask.shape) > 2:
best_mask = best_mask.squeeze()
if best_mask.dtype != np.uint8:
best_mask = (best_mask * 255).astype(np.uint8)
# Post-process mask
kernel = np.ones((3, 3), np.uint8)
best_mask = cv2.morphologyEx(best_mask, cv2.MORPH_CLOSE, kernel)
best_mask = cv2.GaussianBlur(best_mask.astype(np.float32), (3, 3), 0.8)
return (best_mask * 255).astype(np.uint8) if best_mask.max() <= 1.0 else best_mask.astype(np.uint8)
except Exception as e:
logger.error(f"Segmentation error: {e}")
# Fallback to simple center mask
h, w = image.shape[:2]
fallback_mask = np.zeros((h, w), dtype=np.uint8)
x1, y1 = w//4, h//6
x2, y2 = 3*w//4, 5*h//6
fallback_mask[y1:y2, x1:x2] = 255
return fallback_mask
def refine_mask_hq(image, mask, matanyone_processor):
"""Cinema-quality mask refinement using provided MatAnyone processor"""
try:
# Prepare image for matting
image_filtered = cv2.bilateralFilter(image, 10, 75, 75)
# Use MatAnyone for refinement
if hasattr(matanyone_processor, 'process_video'):
# If it's the HF InferenceCore, we need to handle differently
# For now, use enhanced OpenCV refinement
refined_mask = enhance_mask_opencv(image_filtered, mask)
else:
# Direct inference call
refined_mask = matanyone_processor.infer(image_filtered, mask)
# Ensure proper format
if len(refined_mask.shape) == 3:
refined_mask = cv2.cvtColor(refined_mask, cv2.COLOR_BGR2GRAY)
# Additional refinement
refined_mask = cv2.bilateralFilter(refined_mask, 10, 75, 75)
refined_mask = cv2.medianBlur(refined_mask, 3)
return refined_mask
except Exception as e:
logger.error(f"Mask refinement error: {e}")
return enhance_mask_opencv(image, mask)
def enhance_mask_opencv(image, mask):
"""Enhanced mask refinement using OpenCV techniques"""
try:
if len(mask.shape) == 3:
mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
# Bilateral filtering for edge preservation
refined_mask = cv2.bilateralFilter(mask, 9, 75, 75)
# Morphological operations
kernel_ellipse = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
refined_mask = cv2.morphologyEx(refined_mask, cv2.MORPH_CLOSE, kernel_ellipse)
refined_mask = cv2.morphologyEx(refined_mask, cv2.MORPH_OPEN, kernel_ellipse)
# Gaussian blur for smoothing
refined_mask = cv2.GaussianBlur(refined_mask, (3, 3), 1.0)
# Edge enhancement
edges = cv2.Canny(refined_mask, 50, 150)
edge_enhancement = cv2.dilate(edges, np.ones((2, 2), np.uint8), iterations=1)
refined_mask = cv2.bitwise_or(refined_mask, edge_enhancement // 4)
# Distance transform for better interior
dist_transform = cv2.distanceTransform(refined_mask, cv2.DIST_L2, 5)
dist_transform = cv2.normalize(dist_transform, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
# Blend with distance transform
alpha = 0.7
refined_mask = cv2.addWeighted(refined_mask, alpha, dist_transform, 1-alpha, 0)
# Final smoothing
refined_mask = cv2.medianBlur(refined_mask, 3)
refined_mask = cv2.GaussianBlur(refined_mask, (1, 1), 0.5)
return refined_mask
except Exception as e:
logger.warning(f"Enhanced mask refinement error: {e}")
return mask if len(mask.shape) == 2 else cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
def create_green_screen_background(frame):
"""Create green screen background for two-stage processing"""
h, w = frame.shape[:2]
green_screen = np.full((h, w, 3), (0, 177, 64), dtype=np.uint8)
return green_screen
def replace_background_hq(frame, mask, background):
"""High-quality background replacement with advanced compositing"""
try:
# Resize background to match frame
background = cv2.resize(background, (frame.shape[1], frame.shape[0]), interpolation=cv2.INTER_LANCZOS4)
# Ensure mask is single channel
if len(mask.shape) == 3:
mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
# Normalize mask to 0-1 range
mask_float = mask.astype(np.float32) / 255.0
# Edge feathering for smooth transitions
feather_radius = 3
kernel_size = feather_radius * 2 + 1
mask_feathered = cv2.GaussianBlur(mask_float, (kernel_size, kernel_size), feather_radius/3)
# Create 3-channel mask
mask_3channel = np.stack([mask_feathered] * 3, axis=2)
# Linear gamma correction for proper compositing
frame_linear = np.power(frame.astype(np.float32) / 255.0, 2.2)
background_linear = np.power(background.astype(np.float32) / 255.0, 2.2)
# Composite in linear space
result_linear = frame_linear * mask_3channel + background_linear * (1 - mask_3channel)
# Convert back to gamma space
result = np.power(result_linear, 1/2.2) * 255.0
result = np.clip(result, 0, 255).astype(np.uint8)
return result
except Exception as e:
logger.error(f"Background replacement error: {e}")
# Fallback to simple replacement
try:
if len(mask.shape) == 3:
mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
background = cv2.resize(background, (frame.shape[1], frame.shape[0]))
mask_normalized = mask.astype(np.float32) / 255.0
mask_3channel = np.stack([mask_normalized] * 3, axis=2)
result = frame * mask_3channel + background * (1 - mask_3channel)
return result.astype(np.uint8)
except:
return frame
def create_professional_background(bg_config, width, height):
"""Create professional background based on configuration"""
try:
if bg_config["type"] == "color":
color_hex = bg_config["colors"][0].lstrip('#')
color_rgb = tuple(int(color_hex[i:i+2], 16) for i in (0, 2, 4))
color_bgr = color_rgb[::-1]
background = np.full((height, width, 3), color_bgr, dtype=np.uint8)
elif bg_config["type"] == "gradient":
background = create_gradient_background(bg_config, width, height)
else:
background = np.full((height, width, 3), (128, 128, 128), dtype=np.uint8)
return background
except Exception as e:
logger.error(f"Background creation error: {e}")
return np.full((height, width, 3), (128, 128, 128), dtype=np.uint8)
def create_gradient_background(bg_config, width, height):
"""Create high-quality gradient backgrounds"""
try:
colors = bg_config["colors"]
direction = bg_config.get("direction", "vertical")
# Convert hex to RGB
rgb_colors = []
for color_hex in colors:
color_hex = color_hex.lstrip('#')
try:
rgb = tuple(int(color_hex[i:i+2], 16) for i in (0, 2, 4))
rgb_colors.append(rgb)
except ValueError:
rgb_colors.append((128, 128, 128))
if not rgb_colors:
rgb_colors = [(128, 128, 128)]
# Create PIL image for gradient
pil_img = Image.new('RGB', (width, height))
draw = ImageDraw.Draw(pil_img)
def interpolate_color(colors, progress):
if len(colors) == 1:
return colors[0]
elif len(colors) == 2:
r = int(colors[0][0] + (colors[1][0] - colors[0][0]) * progress)
g = int(colors[0][1] + (colors[1][1] - colors[0][1]) * progress)
b = int(colors[0][2] + (colors[1][2] - colors[0][2]) * progress)
return (r, g, b)
else:
segment = progress * (len(colors) - 1)
idx = int(segment)
local_progress = segment - idx
if idx >= len(colors) - 1:
return colors[-1]
else:
c1, c2 = colors[idx], colors[idx + 1]
r = int(c1[0] + (c2[0] - c1[0]) * local_progress)
g = int(c1[1] + (c2[1] - c1[1]) * local_progress)
b = int(c1[2] + (c2[2] - c1[2]) * local_progress)
return (r, g, b)
# Generate gradient based on direction
if direction == "vertical":
for y in range(height):
progress = y / height if height > 0 else 0
color = interpolate_color(rgb_colors, progress)
draw.line([(0, y), (width, y)], fill=color)
elif direction == "horizontal":
for x in range(width):
progress = x / width if width > 0 else 0
color = interpolate_color(rgb_colors, progress)
draw.line([(x, 0), (x, height)], fill=color)
elif direction == "diagonal":
max_distance = width + height
for y in range(height):
for x in range(width):
progress = (x + y) / max_distance if max_distance > 0 else 0
progress = min(1.0, progress)
color = interpolate_color(rgb_colors, progress)
pil_img.putpixel((x, y), color)
elif direction in ["radial", "soft_radial"]:
center_x, center_y = width // 2, height // 2
max_distance = np.sqrt(center_x**2 + center_y**2)
for y in range(height):
for x in range(width):
distance = np.sqrt((x - center_x)**2 + (y - center_y)**2)
progress = distance / max_distance if max_distance > 0 else 0
progress = min(1.0, progress)
if direction == "soft_radial":
progress = progress**0.7
color = interpolate_color(rgb_colors, progress)
pil_img.putpixel((x, y), color)
else:
# Default to vertical
for y in range(height):
progress = y / height if height > 0 else 0
color = interpolate_color(rgb_colors, progress)
draw.line([(0, y), (width, y)], fill=color)
# Convert to OpenCV format
background = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
return background
except Exception as e:
logger.error(f"Gradient creation error: {e}")
# Fallback gradient
background = np.zeros((height, width, 3), dtype=np.uint8)
for y in range(height):
intensity = int(255 * (y / height)) if height > 0 else 128
background[y, :] = [intensity, intensity, intensity]
return background
def create_procedural_background(prompt, style, width, height):
"""Create procedural background based on text prompt and style"""
try:
prompt_lower = prompt.lower()
# Color mapping based on keywords
color_map = {
'blue': ['#1e3c72', '#2a5298', '#3498db'],
'ocean': ['#74b9ff', '#0984e3', '#00cec9'],
'sky': ['#87CEEB', '#4682B4', '#1E90FF'],
'green': ['#27ae60', '#2ecc71', '#58d68d'],
'nature': ['#2d5016', '#3c6e1f', '#4caf50'],
'forest': ['#1B4332', '#2D5A36', '#40916C'],
'red': ['#e74c3c', '#c0392b', '#ff7675'],
'sunset': ['#ff7675', '#fd79a8', '#fdcb6e'],
'orange': ['#e67e22', '#f39c12', '#ff9f43'],
'purple': ['#6c5ce7', '#a29bfe', '#fd79a8'],
'pink': ['#fd79a8', '#fdcb6e', '#ff7675'],
'yellow': ['#f1c40f', '#f39c12', '#fdcb6e'],
'tech': ['#2c3e50', '#34495e', '#74b9ff'],
'space': ['#0c0c0c', '#2d3748', '#4a5568'],
'dark': ['#1a1a1a', '#2d2d2d', '#404040'],
'office': ['#f8f9fa', '#e9ecef', '#74b9ff'],
'corporate': ['#2c3e50', '#34495e', '#74b9ff'],
'warm': ['#ff7675', '#fd79a8', '#fdcb6e'],
'cool': ['#74b9ff', '#0984e3', '#00cec9'],
'minimal': ['#ffffff', '#f1f2f6', '#ddd'],
'abstract': ['#6c5ce7', '#a29bfe', '#fd79a8']
}
# Select colors based on prompt
selected_colors = ['#3498db', '#2ecc71', '#e74c3c'] # Default
for keyword, colors in color_map.items():
if keyword in prompt_lower:
selected_colors = colors
break
# Create background based on style
if style == "abstract":
return create_abstract_background(selected_colors, width, height)
elif style == "minimalist":
return create_minimalist_background(selected_colors, width, height)
elif style == "corporate":
return create_corporate_background(selected_colors, width, height)
elif style == "nature":
return create_nature_background(selected_colors, width, height)
elif style == "artistic":
return create_artistic_background(selected_colors, width, height)
else:
# Default gradient
bg_config = {
"type": "gradient",
"colors": selected_colors[:2],
"direction": "diagonal"
}
return create_gradient_background(bg_config, width, height)
except Exception as e:
logger.error(f"Procedural background creation failed: {e}")
return None
def create_abstract_background(colors, width, height):
"""Create abstract geometric background"""
try:
background = np.zeros((height, width, 3), dtype=np.uint8)
# Convert hex colors to BGR
bgr_colors = []
for color in colors:
hex_color = color.lstrip('#')
rgb = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
bgr = rgb[::-1]
bgr_colors.append(bgr)
# Create base gradient
for y in range(height):
progress = y / height
color = [
int(bgr_colors[0][i] + (bgr_colors[1][i] - bgr_colors[0][i]) * progress)
for i in range(3)
]
background[y, :] = color
# Add geometric shapes
import random
random.seed(42) # Consistent results
for _ in range(8):
center_x = random.randint(width//4, 3*width//4)
center_y = random.randint(height//4, 3*height//4)
radius = random.randint(width//20, width//8)
color = bgr_colors[random.randint(0, len(bgr_colors)-1)]
overlay = background.copy()
cv2.circle(overlay, (center_x, center_y), radius, color, -1)
cv2.addWeighted(background, 0.7, overlay, 0.3, 0, background)
return background
except Exception as e:
logger.error(f"Abstract background creation failed: {e}")
return None
def create_minimalist_background(colors, width, height):
"""Create minimalist background"""
try:
bg_config = {
"type": "gradient",
"colors": colors[:2],
"direction": "soft_radial"
}
return create_gradient_background(bg_config, width, height)
except Exception as e:
logger.error(f"Minimalist background creation failed: {e}")
return None
def create_corporate_background(colors, width, height):
"""Create corporate background"""
try:
bg_config = {
"type": "gradient",
"colors": ['#2c3e50', '#34495e', '#74b9ff'],
"direction": "diagonal"
}
background = create_gradient_background(bg_config, width, height)
# Add subtle grid pattern
grid_color = (80, 80, 80)
grid_spacing = width // 20
for x in range(0, width, grid_spacing):
cv2.line(background, (x, 0), (x, height), grid_color, 1)
for y in range(0, height, grid_spacing):
cv2.line(background, (0, y), (width, y), grid_color, 1)
background = cv2.GaussianBlur(background, (3, 3), 1.0)
return background
except Exception as e:
logger.error(f"Corporate background creation failed: {e}")
return None
def create_nature_background(colors, width, height):
"""Create nature background"""
try:
bg_config = {
"type": "gradient",
"colors": ['#2d5016', '#3c6e1f', '#4caf50'],
"direction": "vertical"
}
return create_gradient_background(bg_config, width, height)
except Exception as e:
logger.error(f"Nature background creation failed: {e}")
return None
def create_artistic_background(colors, width, height):
"""Create artistic background with creative elements"""
try:
bg_config = {
"type": "gradient",
"colors": colors,
"direction": "diagonal"
}
background = create_gradient_background(bg_config, width, height)
# Add artistic wave patterns
import random
random.seed(42)
bgr_colors = []
for color in colors:
hex_color = color.lstrip('#')
rgb = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
bgr_colors.append(rgb[::-1])
overlay = background.copy()
for i in range(3):
pts = []
for x in range(0, width, width//10):
y = int(height//2 + (height//4) * np.sin(2 * np.pi * x / width + i))
pts.append([x, y])
pts = np.array(pts, np.int32)
color = bgr_colors[i % len(bgr_colors)]
cv2.polylines(overlay, [pts], False, color, thickness=width//50)
cv2.addWeighted(background, 0.7, overlay, 0.3, 0, background)
background = cv2.GaussianBlur(background, (3, 3), 1.0)
return background
except Exception as e:
logger.error(f"Artistic background creation failed: {e}")
return None
def get_model_status():
"""Get current model loading status"""
return "Models loaded in app.py - ready for processing"
def validate_video_file(video_path):
"""Validate video file format and basic properties"""
if not video_path or not os.path.exists(video_path):
return False, "Video file not found"
try:
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
return False, "Cannot open video file"
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if frame_count == 0:
return False, "Video appears to be empty"
cap.release()
return True, "Video file valid"
except Exception as e:
return False, f"Error validating video: {str(e)}"
def get_available_backgrounds():
"""Get list of available professional backgrounds"""
return {key: config["name"] for key, config in PROFESSIONAL_BACKGROUNDS.items()}