aww / adetailer /ultralytics.py
ddoc's picture
Upload 31 files
13c5a27
from __future__ import annotations
from pathlib import Path
import cv2
from PIL import Image
from adetailer import PredictOutput
from adetailer.common import create_mask_from_bbox
def ultralytics_predict(
model_path: str | Path,
image: Image.Image,
confidence: float = 0.3,
device: str = "",
) -> PredictOutput:
from ultralytics import YOLO
model_path = str(model_path)
model = YOLO(model_path)
pred = model(image, conf=confidence, device=device)
bboxes = pred[0].boxes.xyxy.cpu().numpy()
if bboxes.size == 0:
return PredictOutput()
bboxes = bboxes.tolist()
if pred[0].masks is None:
masks = create_mask_from_bbox(bboxes, image.size)
else:
masks = mask_to_pil(pred[0].masks.data, image.size)
preview = pred[0].plot()
preview = cv2.cvtColor(preview, cv2.COLOR_BGR2RGB)
preview = Image.fromarray(preview)
return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)
def mask_to_pil(masks, shape: tuple[int, int]) -> list[Image.Image]:
"""
Parameters
----------
masks: torch.Tensor, dtype=torch.float32, shape=(N, H, W).
The device can be CUDA, but `to_pil_image` takes care of that.
shape: tuple[int, int]
(width, height) of the original image
"""
from torchvision.transforms.functional import to_pil_image
n = masks.shape[0]
return [to_pil_image(masks[i], mode="L").resize(shape) for i in range(n)]