Spaces:
Running
Running
File size: 1,195 Bytes
afb91b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# models/vision.py -- Working
from transformers import pipeline
from PIL import Image
class VisionModel:
def __init__(
self,
model_name: str = "valentinafeve/yolos-fashionpedia",
threshold: float = 0.7
):
self.pipe = pipeline("object-detection", model=model_name)
self.threshold = threshold
def detect(self, image: Image.Image):
# 1) Ensure RGB
if image.mode != "RGB":
image = image.convert("RGB")
# 2) Run detection
results = self.pipe(image)
# 3) Process & filter
processed = []
for r in results:
score = float(r["score"])
if score < self.threshold:
continue
# r["box"] is a dict: {"xmin":..., "ymin":..., "xmax":..., "ymax":...}
box = r["box"]
coords = [
float(box["xmin"]),
float(box["ymin"]),
float(box["xmax"]),
float(box["ymax"]),
]
processed.append({
"label": r["label"],
"score": score,
"box": coords
})
return processed
|