File size: 1,195 Bytes
afb91b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51

# models/vision.py -- Working

from transformers import pipeline
from PIL import Image

class VisionModel:
    def __init__(
        self,
        model_name: str = "valentinafeve/yolos-fashionpedia",
        threshold: float = 0.7
    ):
        self.pipe = pipeline("object-detection", model=model_name)
        self.threshold = threshold

    def detect(self, image: Image.Image):
        # 1) Ensure RGB
        if image.mode != "RGB":
            image = image.convert("RGB")

        # 2) Run detection
        results = self.pipe(image)

        # 3) Process & filter
        processed = []
        for r in results:
            score = float(r["score"])
            if score < self.threshold:
                continue

            # r["box"] is a dict: {"xmin":..., "ymin":..., "xmax":..., "ymax":...}
            box = r["box"]
            coords = [
                float(box["xmin"]),
                float(box["ymin"]),
                float(box["xmax"]),
                float(box["ymax"]),
            ]

            processed.append({
                "label": r["label"],
                "score": score,
                "box": coords
            })

        return processed