import os import io import base64 import cv2 import torch import numpy as np from PIL import Image from fastapi import FastAPI, UploadFile, File, Form from fastapi.responses import JSONResponse from detectron2.config import get_cfg from detectron2.engine import DefaultPredictor from detectron2 import model_zoo from detectron2.utils.visualizer import Visualizer from detectron2.data import MetadataCatalog app = FastAPI(title="Roof Segmentation API") @app.get("/") def home(): return {"status": "running"} MODEL_PATH = "model_final (4).pth" # ----------------------------- Detectron2 Config ----------------------------- cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 cfg.MODEL.WEIGHTS = MODEL_PATH cfg.MODEL.DEVICE = "cpu" # or "cuda" if GPU available predictor = DefaultPredictor(cfg) kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) # ----------------------------- Helper ----------------------------- def encode_image(img: np.ndarray) -> str: """Convert BGR image to base64 PNG string.""" img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) pil_img = Image.fromarray(img_rgb) buf = io.BytesIO() pil_img.save(buf, format="PNG") return base64.b64encode(buf.getvalue()).decode("utf-8") # ----------------------------- API Endpoint ----------------------------- @app.post("/predict") async def predict( file: UploadFile = File(...), output_type: str = Form("both") # 'predicted_output', 'polygon_overlay', or 'both' ): contents = await file.read() image = np.array(Image.open(io.BytesIO(contents)).convert("RGB"))[:, :, ::-1] # BGR outputs = predictor(image) instances = outputs["instances"].to("cpu") # --- Polygon overlay --- polygon_overlay = image.copy() if instances.has("pred_masks"): masks = instances.pred_masks combined_mask = torch.any(masks, dim=0).numpy().astype("uint8") * 255 clean_mask = cv2.morphologyEx(combined_mask, cv2.MORPH_OPEN, kernel) clean_mask = cv2.morphologyEx(clean_mask, cv2.MORPH_CLOSE, kernel) smooth_mask = cv2.GaussianBlur(clean_mask, (5, 5), 0) _, smooth_mask = cv2.threshold(smooth_mask, 127, 255, cv2.THRESH_BINARY) contours, _ = cv2.findContours(smooth_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if contours: largest_contour = max(contours, key=cv2.contourArea) polygon = cv2.approxPolyDP(largest_contour, epsilon=10, closed=True) cv2.polylines(polygon_overlay, [polygon], isClosed=True, color=(0, 255, 0), thickness=2) # --- Detectron2 predicted output --- v = Visualizer(image[:, :, ::-1], MetadataCatalog.get("toproof_train"), scale=1.2) out = v.draw_instance_predictions(instances) predicted_output = out.get_image()[:, :, ::-1] # BGR # Encode images predicted_b64 = encode_image(predicted_output) polygon_b64 = encode_image(polygon_overlay) # Build response based on output_type result = {} if output_type == "predicted_output": result["predicted_output"] = predicted_b64 elif output_type == "polygon_overlay": result["polygon_overlay"] = polygon_b64 else: # both result["predicted_output"] = predicted_b64 result["polygon_overlay"] = polygon_b64 return JSONResponse(result)