Spaces:
Running
Running
File size: 1,936 Bytes
2759de1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
from io import BytesIO
from PIL import Image
from models.vision import VisionModel
from utils.bg_removal import remove_background
vision = VisionModel()
FASHION_LABELS = {
"shirt", "t-shirt", "blouse", "tank top", "sweater", "hoodie", "jacket",
"coat", "overcoat", "raincoat", "windbreaker", "cardigan", "blazer",
"pants", "jeans", "shorts", "leggings", "tights", "skirt", "dress",
"suit", "jumpsuit", "romper", "vest", "sports bra", "tracksuit",
"belt", "tie", "scarf", "hat", "cap", "gloves", "socks",
"shoe", "sneakers", "boots", "sandals", "heels",
"watch", "necklace", "bracelet", "earrings", "ring",
"backpack", "handbag", "purse", "wallet"
}
def detect_clothing(image_input, do_bg_remove: bool = False):
# 1) Load into a PIL.Image if it's a filepath
if isinstance(image_input, str):
img = Image.open(image_input)
else:
img = image_input
# 2) Optionally remove background (works on bytes)
if do_bg_remove:
buf = BytesIO()
img.convert("RGB").save(buf, format="JPEG")
img_bytes = buf.getvalue()
img = remove_background(img_bytes)
else:
# ensure you drop any alpha channel
img = img.convert("RGB")
# 3) Run detection
raw_detections = vision.detect(img)
# 4) Filter and deduplicate
filtered = {}
for det in raw_detections:
label = det["label"].lower()
if label in FASHION_LABELS:
# Only keep the first or highest score if multiple detected
if label not in filtered or det["score"] > filtered[label]["score"]:
filtered[label] = {
"label": label,
"score": det["score"],
"box": det.get("box", [])
}
# 5) Return dict or fallback if empty
if not filtered:
return {"outfit": {"label": "outfit", "score": 1.0, "box": []}}
return filtered
|