Spaces:
Sleeping
Sleeping
# models/vision.py -- Working | |
from transformers import pipeline | |
from PIL import Image | |
class VisionModel: | |
def __init__( | |
self, | |
model_name: str = "valentinafeve/yolos-fashionpedia", | |
threshold: float = 0.7 | |
): | |
self.pipe = pipeline("object-detection", model=model_name) | |
self.threshold = threshold | |
def detect(self, image: Image.Image): | |
# 1) Ensure RGB | |
if image.mode != "RGB": | |
image = image.convert("RGB") | |
# 2) Run detection | |
results = self.pipe(image) | |
# 3) Process & filter | |
processed = [] | |
for r in results: | |
score = float(r["score"]) | |
if score < self.threshold: | |
continue | |
# r["box"] is a dict: {"xmin":..., "ymin":..., "xmax":..., "ymax":...} | |
box = r["box"] | |
coords = [ | |
float(box["xmin"]), | |
float(box["ymin"]), | |
float(box["xmax"]), | |
float(box["ymax"]), | |
] | |
processed.append({ | |
"label": r["label"], | |
"score": score, | |
"box": coords | |
}) | |
return processed | |