Spaces:
Sleeping
Sleeping
from io import BytesIO | |
from PIL import Image | |
from models.vision import VisionModel | |
from utils.bg_removal import remove_background | |
vision = VisionModel() | |
FASHION_LABELS = { | |
"shirt", "t-shirt", "blouse", "tank top", "sweater", "hoodie", "jacket", | |
"coat", "overcoat", "raincoat", "windbreaker", "cardigan", "blazer", | |
"pants", "jeans", "shorts", "leggings", "tights", "skirt", "dress", | |
"suit", "jumpsuit", "romper", "vest", "sports bra", "tracksuit", | |
"belt", "tie", "scarf", "hat", "cap", "gloves", "socks", | |
"shoe", "sneakers", "boots", "sandals", "heels", | |
"watch", "necklace", "bracelet", "earrings", "ring", | |
"backpack", "handbag", "purse", "wallet" | |
} | |
def detect_clothing(image_input, do_bg_remove: bool = False): | |
# 1) Load into a PIL.Image if it's a filepath | |
if isinstance(image_input, str): | |
img = Image.open(image_input) | |
else: | |
img = image_input | |
# 2) Optionally remove background (works on bytes) | |
if do_bg_remove: | |
buf = BytesIO() | |
img.convert("RGB").save(buf, format="JPEG") | |
img_bytes = buf.getvalue() | |
img = remove_background(img_bytes) | |
else: | |
# ensure you drop any alpha channel | |
img = img.convert("RGB") | |
# 3) Run detection | |
raw_detections = vision.detect(img) | |
# 4) Filter and deduplicate | |
filtered = {} | |
for det in raw_detections: | |
label = det["label"].lower() | |
if label in FASHION_LABELS: | |
# Only keep the first or highest score if multiple detected | |
if label not in filtered or det["score"] > filtered[label]["score"]: | |
filtered[label] = { | |
"label": label, | |
"score": det["score"], | |
"box": det.get("box", []) | |
} | |
# 5) Return dict or fallback if empty | |
if not filtered: | |
return {"outfit": {"label": "outfit", "score": 1.0, "box": []}} | |
return filtered | |