Upload 55 files

19d78dd verified 18 days ago

22 kB

	# api/utils.py
	# -----------------------------------------------------------------------------
	# Color utilities for mask visualization (COCO-183 and ADE-151 aware)
	# - Name-driven colors (e.g., water -> blue, sky -> sky blue)
	# - Prompt-aware palettes (supports multi-term like "human and horse")
	# - Legends for classes present in a mask
	#
	# Public functions:
	# - colorize_mask(mask_tensor, classes=None, dataset=None) -> PIL.Image
	# - overlay_mask(image, color_img, alpha=0.5) -> PIL.Image
	# - build_legend_from_mask(mask_tensor, classes=None, dataset=None) -> list[dict]
	# -----------------------------------------------------------------------------

	from __future__ import annotations
	import re
	from typing import List, Tuple, Dict
	import numpy as np
	from PIL import Image

	# =============================================================================
	# COCO-183 (green cone) CLASS NAMES
	# NOTE: This is the dataset order you expect from the COCO-183 model.
	# If your model's index order differs, update this list accordingly.
	# =============================================================================
	CLASS_NAMES: List[str] = [
	"unlabeled",
	"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat",
	"traffic light","fire hydrant","street sign","stop sign","parking meter","bench",
	"bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe",
	"hat","backpack","umbrella","shoe","eyeglasses","handbag","tie","suitcase",
	"frisbee","skis","snowboard","ball","kite","baseball_bat","baseball_glove",
	"skateboard","surfboard","tennis_racket","bottle","plate","wine_glass","cup",
	"fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli",
	"carrot","hot_dog","pizza","donut","cake","chair","couch","potted_plant","bed",
	"mirror","dining_table","window","desk","toilet","door","tv","laptop","mouse",
	"remote","keyboard","cell_phone","microwave","oven","toaster","sink","refrigerator",
	"blender","book","clock","vase","scissors","teddy_bear","hair_dryer","toothbrush",
	"hair_brush",

	# "stuff" classes (COCO-Stuff-like)
	"banner","blanket","branch","bridge","building-other","bush","cabinet","cage",
	"cardboard","carpet","ceiling-other","ceiling-tile","cloth","clothes","clouds",
	"counter","cupboard","curtain","desk-stuff","dirt","door-stuff","fence",
	"floor-marble","floor-other","floor-stone","floor-tile","floor-wood","flower",
	"fog","food-other","fruit","furniture-other","grass","gravel","ground-other",
	"hill","house","leaves","light","mat","metal","mirror-stuff","moss","mountain",
	"mud","napkin","net","paper","pavement","pillow","plant-other","plastic",
	"platform","playingfield","railing","railroad","river","road","rock","roof","rug",
	"salad","sand","sea","shelf","sky-other","skyscraper","snow","solid-other",
	"stairs","stone","straw","structural-other","table","tent","textile-other",
	"towel","tree","vegetable","wall-brick","wall-concrete","wall-other","wall-panel",
	"wall-stone","wall-tile","wall-wood","water","waterdrops","window_blind",
	"window","wood",
	]

	# Normalize COCO names to internal canonical form (underscored)
	CLASS_NAMES = [re.sub(r"\s+", "_", n.strip().lower()) for n in CLASS_NAMES]

	# =============================================================================
	# ADE-151 (orange cone) CLASS NAMES (index order given by user)
	# =============================================================================
	ADE_151_CLASS_NAMES: List[str] = [
	"unlabeled","wall","building","blue_sky","floor","tree","ceiling","road","bed","window",
	"grass","cabinet","sidewalk","person","ground","door","table","mountain","flora","curtain",
	"chair","car","water","painting","sofa","shelf","house","sea","mirror","rug",
	"field","armchair","seat","fence","desk","rock","wardrobe","lamp","bathtub","rail",
	"cushion","pedestal","box","pillar","signboard","dresser","counter","sand","sink","skyscraper",
	"fireplace","refrigerator","grandstand","path","stairs","runway","display","snooker","pillow","screen_door",
	"stairway","river","bridge","bookcase","blind","tea_table","commode","flower","book","hill",
	"bench","countertop","stove","palm_tree","kitchen","computer","swivel_chair","boat","bar","console",
	"hovel","bus","towel","light","truck","tower","chandelier","sunshade","streetlight","booth",
	"television","aeroplane","dirt","apparel","pole","land","bannister","escalator","ottoman","bottle",
	"sideboard","poster","stage","van","ship","fountain","conveyer_belt","canopy","washer","plaything",
	"swimming_pool","stool","barrel","basket","waterfall","tent","bag","motorcycle","cradle","oven",
	"ball","food","stair","tank","marque","microwave","flowerpot","animal","bicycle","lake",
	"dishwasher","projector","blanket","sculpture","exhaust","sconce","vase","traffic_light","tray","ashcan",
	"fan","pier","screen","plate","monitor","notice_board","shower","radiator","glass","clock","flag",
	]
	ADE_151_CLASS_NAMES = [n.strip().lower() for n in ADE_151_CLASS_NAMES]

	# =============================================================================
	# Color dictionary (seeded with explicit choices; everything else inferred)
	# =============================================================================
	# Base named colors; extend freely. Keys are canonical underscored names.
	NAMED_COLORS: Dict[str, Tuple[int, int, int]] = {
	# universal
	"unlabeled": (0, 0, 0),

	# people/animals/vehicles — COCO
	"person": (220, 20, 60),
	"human": (220, 20, 60), # alias
	"horse": (90, 60, 30), # per user's requested color
	"dog": (184, 134, 11),
	"cat": (255, 160, 122),
	"bird": (30, 144, 255),
	"sheep": (245, 222, 179),
	"cow": (139, 69, 19),
	"elephant": (128, 128, 128),
	"bear": (92, 64, 51),
	"zebra": (200, 200, 200),
	"giraffe": (218, 165, 32),

	"bicycle": (60, 180, 75),
	"car": (0, 90, 190),
	"motorcycle": (255, 80, 80),
	"airplane": (120, 120, 255),
	"aeroplane": (120, 120, 255),
	"bus": (255, 140, 0),
	"train": (70, 130, 180),
	"truck": (200, 120, 0),
	"boat": (0, 120, 170),
	"van": (80, 140, 220),
	"ship": (30, 100, 160),

	# nature / environment
	"water": (64, 164, 223),
	"river": (64, 164, 223),
	"lake": (64, 164, 223),
	"sea": (0, 105, 148),
	"waterfall": (120, 170, 230),
	"swimming_pool": (100, 200, 230),

	"sky": (135, 206, 235),
	"blue_sky": (135, 206, 235),
	"clouds": (220, 230, 240),

	"tree": (34, 139, 34),
	"palm_tree": (44, 159, 44),
	"flora": (52, 168, 83),
	"flower": (233, 84, 150),
	"grass": (76, 187, 23),
	"leaves": (76, 187, 23),
	"moss": (107, 142, 35),
	"hill": (88, 120, 80),
	"mountain": (96, 108, 118),

	"sand": (194, 178, 128),
	"ground": (120, 72, 48),
	"land": (120, 72, 48),
	"dirt": (115, 74, 53),
	"mud": (110, 74, 57),
	"rock": (101, 110, 120),
	"stone": (112, 128, 144),

	# roads / man-made terrain
	"road": (128, 128, 128),
	"sidewalk": (170, 170, 170),
	"pavement": (150, 150, 150),
	"path": (150, 150, 150),
	"playingfield": (100, 180, 100),
	"runway": (160, 160, 160),
	"stairs": (145, 145, 145),
	"stair": (145, 145, 145),
	"stairway": (145, 145, 145),
	"railroad": (100, 100, 100),
	"bridge": (120, 120, 140),
	"pier": (120, 120, 140),

	# buildings / structures
	"building": (160, 160, 160),
	"building-other": (160, 160, 160),
	"house": (170, 160, 160),
	"skyscraper": (120, 130, 140),
	"roof": (150, 120, 100),
	"wall": (180, 180, 180),
	"wall-brick": (178, 34, 34),
	"wall-concrete": (190, 190, 190),
	"wall-other": (170, 170, 170),
	"wall-panel": (160, 160, 160),
	"wall-stone": (135, 135, 135),
	"wall-tile": (200, 200, 200),
	"wall-wood": (181, 101, 29),
	"ceiling": (210, 210, 210),
	"ceiling-other": (210, 210, 210),
	"ceiling-tile": (220, 220, 220),
	"door": (150, 120, 90),
	"door-stuff": (150, 120, 90),
	"window": (175, 215, 230),
	"window_blind": (170, 210, 230),
	"mirror": (210, 220, 230),
	"mirror-stuff": (210, 220, 230),
	"light": (255, 230, 140),
	"streetlight": (240, 210, 120),
	"tower": (140, 140, 160),

	"fence": (189, 183, 107),
	"railing": (170, 170, 150),
	"pillar": (180, 180, 170),
	"signboard": (255, 200, 80),
	"poster": (255, 200, 140),
	"traffic_light": (50, 205, 50),

	# furniture / interior
	"chair": (205, 133, 63),
	"armchair": (200, 120, 80),
	"seat": (205, 133, 63),
	"bench": (160, 120, 70),
	"sofa": (160, 82, 45),
	"stool": (175, 125, 80),
	"table": (181, 101, 29),
	"dining_table": (181, 101, 29),
	"desk": (170, 100, 40),
	"desk-stuff": (170, 100, 40),
	"bed": (180, 130, 100),
	"cabinet": (145, 110, 70),
	"cupboard": (145, 110, 70),
	"wardrobe": (130, 90, 60),
	"dresser": (135, 95, 65),
	"sideboard": (135, 95, 65),
	"shelf": (140, 105, 65),

	"carpet": (150, 80, 60),
	"rug": (150, 80, 60),
	"curtain": (200, 180, 160),
	"pillow": (230, 200, 170),
	"cushion": (230, 200, 170),
	"blanket": (200, 170, 150),
	"towel": (220, 220, 200),

	"kitchen": (170, 170, 160),
	"counter": (150, 140, 130),
	"countertop": (160, 150, 140),
	"sink": (200, 210, 220),
	"stove": (140, 140, 140),
	"oven": (140, 140, 150),
	"microwave": (155, 160, 170),
	"dishwasher": (190, 200, 210),
	"washer": (190, 200, 210),
	"refrigerator": (200, 220, 235),

	# electronics
	"television": (70, 100, 160),
	"tv": (70, 100, 160),
	"monitor": (70, 100, 160),
	"screen": (70, 100, 160),
	"screen_door": (170, 210, 230),
	"projector": (100, 120, 160),
	"laptop": (70, 100, 160),
	"keyboard": (70, 90, 120),
	"mouse": (80, 80, 90),
	"remote": (60, 60, 70),
	"cell_phone": (100, 120, 140),

	# decor / smalls
	"vase": (186, 85, 211),
	"flowerpot": (170, 100, 60),
	"lamp": (255, 230, 140),
	"chandelier": (255, 220, 120),
	"sconce": (255, 225, 140),

	# materials / stuff
	"paper": (240, 240, 220),
	"plastic": (200, 200, 220),
	"metal": (180, 180, 190),
	"cloth": (220, 200, 190),
	"textile-other": (220, 200, 190),
	"glass": (200, 220, 240),
	"wood": (181, 101, 29),

	# foods
	"banana": (255, 225, 53),
	"apple": (220, 30, 30),
	"sandwich": (222, 184, 135),
	"orange": (255, 165, 0),
	"broccoli": (67, 160, 71),
	"carrot": (255, 127, 80),
	"pizza": (255, 180, 100),
	"donut": (210, 180, 140),
	"cake": (255, 218, 185),
	"hot_dog": (204, 102, 0),
	"salad": (143, 188, 143),
	"fruit": (255, 160, 122),
	"vegetable": (85, 139, 47),
	"food-other": (200, 160, 120),
	"food": (200, 160, 120),

	# utensils / containers
	"bottle": (135, 206, 250),
	"plate": (245, 245, 245),
	"wine_glass": (230, 230, 250),
	"cup": (250, 250, 250),
	"fork": (192, 192, 192),
	"knife": (192, 192, 192),
	"spoon": (192, 192, 192),
	"bowl": (255, 239, 213),
	"bag": (170, 120, 70),
	"box": (170, 120, 70),
	"barrel": (165, 105, 58),
	"basket": (170, 120, 70),
	"tray": (210, 210, 210),

	# misc (signage, banners)
	"banner": (255, 215, 0),
	"flag": (220, 20, 60),

	# other ADE things
	"booth": (160, 160, 160),
	"display": (100, 120, 160),
	"notice_board": (210, 180, 140),
	"signboard": (255, 200, 80),
	}

	# =============================================================================
	# Aliases & normalization
	# =============================================================================
	# Map user tokens to canonical dataset names
	_ALIASES: Dict[str, str] = {
	"human": "person", "humans": "person", "man": "person", "men": "person",
	"woman": "person", "women": "person", "people": "person",

	"tv": "television", "tv_monitor": "television", "monitor_tv": "television",

	"cell phone": "cell_phone", "cellphone": "cell_phone", "mobile": "cell_phone", "phone": "cell_phone",
	"teddy bear": "teddy_bear", "wine glass": "wine_glass", "baseball bat": "baseball_bat",
	"baseball glove": "baseball_glove", "tennis racket": "tennis_racket",

	"blue sky": "blue_sky", "traffic light": "traffic_light", "water fall": "waterfall",
	"window blind": "window_blind", "street light": "streetlight",

	# ADE terms mapping to close COCO terms (used in heuristics)
	"aeroplane": "airplane",
	}

	def _normalize_token(s: str) -> str:
	s = s.strip().lower()
	s = re.sub(r"[_\-]+", " ", s)
	s = re.sub(r"\s+", " ", s)
	s = _ALIASES.get(s, s)
	s = s.replace(" ", "_")
	return s

	def _resolve_prompt_item_to_names(item: str) -> List[str]:
	"""
	Turn one prompt item into one or more canonical names.
	Splits ONLY on 'and' as a WORD, or on &, /, + (with optional spaces).
	Critically, it won't split inside words like 'sand'.
	"""
	norm = item.strip()
	parts = re.split(r"\s(?:\band\b\|&\|/\|\+)\s", norm, flags=re.IGNORECASE)
	out: List[str] = []
	for p in parts:
	tok = _normalize_token(p)
	if not tok:
	continue
	if tok in ("background", "unlabeled"):
	tok = "unlabeled"
	out.append(tok)
	return out if out else ["unlabeled"]

	# =============================================================================
	# Color selection fallback (heuristics)
	# =============================================================================
	def _infer_color_from_name(name: str) -> Tuple[int, int, int]:
	"""Heuristic fallback: choose a sensible color by keyword."""
	n = name.lower().replace("_", " ")
	def c(r,g,b): return (r, g, b)

	# water/sky
	if "blue sky" in n or ("sky" in n and "blue" in n): return c(135,206,235)
	if "sky" in n: return c(135,206,235)
	if any(k in n for k in ["sea","ocean"]): return c(0,105,148)
	if any(k in n for k in ["river","lake","waterfall","pool"]): return c(64,164,223)
	if "water" in n: return c(64,164,223)

	# vegetation / land
	if any(k in n for k in ["tree","palm","flora","grass","plant","field","hill","land"]): return c(52,168,83)
	if any(k in n for k in ["sand","beach","desert"]): return c(194,178,128)
	if any(k in n for k in ["ground","dirt","soil","mud"]): return c(120,72,48)
	if any(k in n for k in ["rock","mountain","stone","skyscraper"]): return c(120,130,140)

	# man-made ground
	if any(k in n for k in ["road","street","sidewalk","path","runway","stairs","stair"]): return c(150,150,150)
	if "railroad" in n: return c(100,100,100)

	# humans & vehicles
	if any(k in n for k in ["person","people","human"]): return c(220,20,60)
	if any(k in n for k in ["car","truck","van","bus"]): return c(0,90,190)
	if any(k in n for k in ["bicycle","bike","motorcycle"]): return c(60,180,75)
	if any(k in n for k in ["boat","ship","ferry"]): return c(0,120,170)
	if any(k in n for k in ["aeroplane","airplane","aircraft"]): return c(120,120,255)

	# buildings / structures
	if any(k in n for k in ["building","house","wall","ceiling","door","window","bridge","tower"]): return c(170,170,170)

	# furniture
	if any(k in n for k in ["sofa","chair","stool","bench","table","desk","bed","cabinet","wardrobe","dresser","shelf"]): return c(181,101,29)

	# electronics / lighting
	if any(k in n for k in ["television","monitor","computer","screen","projector","tv"]): return c(70,100,160)
	if any(k in n for k in ["lamp","light","chandelier","sconce","streetlight"]): return c(255,230,140)

	# reflective / transparent
	if "mirror" in n or "glass" in n: return c(200, 220, 240)

	# decorative / misc
	if any(k in n for k in ["flower","vase","sculpture","poster","painting","flag"]): return c(186,85,211)

	# containers
	if any(k in n for k in ["bag","bottle","barrel","basket","box"]): return c(170,120,70)

	# kitchen / appliances
	if any(k in n for k in ["kitchen","sink","stove","oven","microwave","dishwasher","washer","refrigerator","counter","countertop"]):
	return c(175,185,195)

	# default neutral
	return c(128, 128, 128)

	def _color_for_name(name: str) -> Tuple[int, int, int]:
	key = _normalize_token(name)
	if key in NAMED_COLORS:
	return NAMED_COLORS[key]
	# also try alias canonical
	alias_back = _ALIASES.get(name.lower(), None)
	if alias_back and alias_back in NAMED_COLORS:
	return NAMED_COLORS[alias_back]
	return _infer_color_from_name(key)

	# =============================================================================
	# Palettes (LUTs)
	# =============================================================================
	def _build_lut_for_names(names: List[str]) -> np.ndarray:
	lut = np.zeros((len(names), 3), dtype=np.uint8)
	for i, n in enumerate(names):
	lut[i] = _color_for_name(n)
	return lut

	_COCO_LUT: np.ndarray \| None = None
	_ADE_LUT: np.ndarray \| None = None

	def _palette_for_dataset(dataset: str) -> np.ndarray:
	"""Return [N,3] palette for dataset: 'coco' or 'ade'."""
	global _COCO_LUT, _ADE_LUT
	if dataset == "ade":
	if _ADE_LUT is None:
	_ADE_LUT = _build_lut_for_names(ADE_151_CLASS_NAMES)
	return _ADE_LUT
	# default: coco
	if _COCO_LUT is None:
	_COCO_LUT = _build_lut_for_names(CLASS_NAMES)
	return _COCO_LUT

	def _palette_for_prompt_classes(classes: List[str]) -> np.ndarray:
	"""
	Build a per-request palette given a prompt class list.
	Index 0 is treated as 'unlabeled' (background) if present.
	Supports entries like 'human and horse' -> average of person + horse.
	"""
	n = len(classes)
	pal = np.zeros((n, 3), dtype=np.uint8)
	for idx, raw in enumerate(classes):
	if idx == 0: # background slot convention
	pal[idx] = np.array(NAMED_COLORS.get("unlabeled", (0, 0, 0)), dtype=np.uint8)
	continue

	names = _resolve_prompt_item_to_names(raw)
	# canonicalize each token through aliases (e.g., human -> person)
	canon_names = [ _ALIASES.get(n.replace("_"," "), n).replace(" ", "_") for n in names ]
	# compute average color across the resolved set
	cols = [ np.array(_color_for_name(n), dtype=np.float32) for n in canon_names ]
	if len(cols) == 0:
	rgb = np.array((128,128,128), dtype=np.float32)
	else:
	rgb = np.mean(cols, axis=0)
	pal[idx] = np.clip(rgb, 0, 255).astype(np.uint8)
	return pal

	# Display name for legend in prompt mode
	def _display_name_for_prompt_item(item: str) -> str:
	names = _resolve_prompt_item_to_names(item)
	if not names:
	return "unlabeled"
	disp = []
	for n in names:
	if n in ("background", "unlabeled"):
	disp.append("unlabeled")
	else:
	# show canonical term (e.g., human -> person)
	nn = _ALIASES.get(n.replace("_", " "), n).replace(" ", "_")
	disp.append(nn)
	return "+".join(disp)

	# =============================================================================
	# Public API
	# =============================================================================
	def colorize_mask(mask_tensor, classes: List[str] \| None = None, dataset: str \| None = None) -> Image.Image:
	"""
	Colorize a [H,W] mask.
	- If `classes` is provided (prompt mode), use prompt palette:
	index 0 is background (unlabeled), others per item or averaged
	- Else, choose dataset palette: 'ade' (151) or default 'coco' (183)
	"""
	mask = np.array(mask_tensor, dtype=np.int32)
	h, w = mask.shape

	if classes is not None:
	pal = _palette_for_prompt_classes(classes)
	else:
	pal = _palette_for_dataset("ade" if dataset == "ade" else "coco")

	color = np.zeros((h, w, 3), dtype=np.uint8)
	valid = (mask >= 0) & (mask < pal.shape[0])
	color[valid] = pal[mask[valid]]
	return Image.fromarray(color, mode="RGB")


	def overlay_mask(image: Image.Image, color: Image.Image, alpha: float = 0.5) -> Image.Image:
	if color.size != image.size:
	color = color.resize(image.size, resample=Image.NEAREST)
	return Image.blend(image.convert("RGB"), color.convert("RGB"), alpha)


	def build_legend_from_mask(mask_tensor, classes: List[str] \| None = None, dataset: str \| None = None):
	"""
	Build a compact legend for the classes PRESENT in the mask.
	Returns a list of entries: {'index': int, 'name': str, 'color': [r,g,b]}
	- In prompt mode, names are prompt-derived (with '+' for multi-terms)
	- In dataset mode, names come from the dataset class list (COCO or ADE)
	"""
	mask = np.array(mask_tensor, dtype=np.int64)
	present = np.unique(mask[(mask >= 0)])

	legend: List[Dict] = []
	if classes is not None:
	pal = _palette_for_prompt_classes(classes)
	for idx in present:
	if 0 <= idx < pal.shape[0]:
	raw_item = classes[int(idx)] if int(idx) < len(classes) else "unlabeled"
	try:
	name = _display_name_for_prompt_item(raw_item)
	except Exception:
	name = str(raw_item)
	col = pal[int(idx)]
	legend.append({
	"index": int(idx),
	"name": name,
	"color": [int(col[0]), int(col[1]), int(col[2])],
	})
	else:
	if dataset == "ade":
	names = ADE_151_CLASS_NAMES
	pal = _palette_for_dataset("ade")
	else:
	names = CLASS_NAMES
	pal = _palette_for_dataset("coco")

	for idx in present:
	if 0 <= idx < len(names):
	col = pal[int(idx)]
	legend.append({
	"index": int(idx),
	"name": names[int(idx)],
	"color": [int(col[0]), int(col[1]), int(col[2])],
	})

	legend.sort(key=lambda e: (0 if e["index"] == 0 else 1, e["index"]))
	return legend