from io import BytesIO
from PIL import Image
from models.vision import VisionModel
from utils.bg_removal import remove_background

vision = VisionModel()
FASHION_LABELS = {
    "shirt", "t-shirt", "blouse", "tank top", "sweater", "hoodie", "jacket",
    "coat", "overcoat", "raincoat", "windbreaker", "cardigan", "blazer",
    "pants", "jeans", "shorts", "leggings", "tights", "skirt", "dress",
    "suit", "jumpsuit", "romper", "vest", "sports bra", "tracksuit",
    "belt", "tie", "scarf", "hat", "cap", "gloves", "socks",
    "shoe", "sneakers", "boots", "sandals", "heels",
    "watch", "necklace", "bracelet", "earrings", "ring",
    "backpack", "handbag", "purse", "wallet"
}

def detect_clothing(image_input, do_bg_remove: bool = False):
    # 1) Load into a PIL.Image if it's a filepath
    if isinstance(image_input, str):
        img = Image.open(image_input)
    else:
        img = image_input

    # 2) Optionally remove background (works on bytes)
    if do_bg_remove:
        buf = BytesIO()
        img.convert("RGB").save(buf, format="JPEG")
        img_bytes = buf.getvalue()
        img = remove_background(img_bytes)
    else:
        # ensure you drop any alpha channel
        img = img.convert("RGB")

    # 3) Run detection
    raw_detections = vision.detect(img)

    # 4) Filter and deduplicate
    filtered = {}
    for det in raw_detections:
        label = det["label"].lower()
        if label in FASHION_LABELS:
            # Only keep the first or highest score if multiple detected
            if label not in filtered or det["score"] > filtered[label]["score"]:
                filtered[label] = {
                    "label": label,
                    "score": det["score"],
                    "box": det.get("box", [])
                }

    # 5) Return dict or fallback if empty
    if not filtered:
        return {"outfit": {"label": "outfit", "score": 1.0, "box": []}}

    return filtered