| import torch |
| from PIL import Image |
| import io |
| import torchvision.transforms as T |
| from ultralytics.nn.tasks import DetectionModel |
|
|
| |
| model = DetectionModel(cfg="yolov11.yaml") |
| model.load_state_dict(torch.load("yolo_safe.pth", map_location="cpu")) |
| model.eval() |
|
|
| def preprocess(image_bytes): |
| image = Image.open(io.BytesIO(image_bytes)).convert("RGB") |
| transform = T.Compose([ |
| T.Resize((640, 640)), |
| T.ToTensor() |
| ]) |
| return transform(image).unsqueeze(0) |
|
|
| def postprocess(preds): |
| result = [] |
| for pred in preds: |
| for box in pred: |
| result.append({ |
| "box": { |
| "xmin": int(box[0]), |
| "ymin": int(box[1]), |
| "xmax": int(box[2]), |
| "ymax": int(box[3]) |
| }, |
| "score": float(box[4]), |
| "label": str(int(box[5])) |
| }) |
| return result |
|
|
| def inference(inputs): |
| image_bytes = inputs["inputs"] |
| image_tensor = preprocess(image_bytes) |
| with torch.no_grad(): |
| preds = model(image_tensor)[0] |
| return postprocess([preds]) |