|
from __future__ import annotations |
|
|
|
from pathlib import Path |
|
|
|
import cv2 |
|
from PIL import Image |
|
|
|
from adetailer import PredictOutput |
|
from adetailer.common import create_mask_from_bbox |
|
|
|
|
|
def ultralytics_predict( |
|
model_path: str | Path, |
|
image: Image.Image, |
|
confidence: float = 0.3, |
|
device: str = "", |
|
) -> PredictOutput: |
|
from ultralytics import YOLO |
|
|
|
model_path = str(model_path) |
|
|
|
model = YOLO(model_path) |
|
pred = model(image, conf=confidence, device=device) |
|
|
|
bboxes = pred[0].boxes.xyxy.cpu().numpy() |
|
if bboxes.size == 0: |
|
return PredictOutput() |
|
bboxes = bboxes.tolist() |
|
|
|
if pred[0].masks is None: |
|
masks = create_mask_from_bbox(bboxes, image.size) |
|
else: |
|
masks = mask_to_pil(pred[0].masks.data, image.size) |
|
preview = pred[0].plot() |
|
preview = cv2.cvtColor(preview, cv2.COLOR_BGR2RGB) |
|
preview = Image.fromarray(preview) |
|
|
|
return PredictOutput(bboxes=bboxes, masks=masks, preview=preview) |
|
|
|
|
|
def mask_to_pil(masks, shape: tuple[int, int]) -> list[Image.Image]: |
|
""" |
|
Parameters |
|
---------- |
|
masks: torch.Tensor, dtype=torch.float32, shape=(N, H, W). |
|
The device can be CUDA, but `to_pil_image` takes care of that. |
|
|
|
shape: tuple[int, int] |
|
(width, height) of the original image |
|
""" |
|
from torchvision.transforms.functional import to_pil_image |
|
|
|
n = masks.shape[0] |
|
return [to_pil_image(masks[i], mode="L").resize(shape) for i in range(n)] |
|
|