GECO2-demo

Runtime error

App Files Files Community

Karthika0308 commited on 7 days ago

Commit

c81bba7

verified ·

1 Parent(s): 1b77aaf

Upload 2 files

Browse files

Files changed (2) hide show

app.py +170 -0
inference.py +198 -0

app.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+import torch
+import gradio as gr
+from gradio_image_prompter import ImagePrompter
+from torch.nn import DataParallel
+from models.counter_infer import build_model
+from utils.arg_parser import get_argparser
+from utils.data import resize_and_pad
+import torchvision.ops as ops
+from torchvision import transforms as T
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
+# Load model (once, to avoid reloading)
+def load_model():
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    args = get_argparser().parse_args()
+    args.zero_shot = True
+    model = DataParallel(build_model(args).to(device))
+    model.load_state_dict(torch.load('CNTQG_multitrain_ca44.pth', weights_only=True)['model'], strict=False)
+    model.eval()
+    return model, device
+model, device = load_model()
+# **Function to Process Image Once**
+def process_image_once(inputs, enable_mask):
+    model.module.return_masks = enable_mask
+    image = inputs['image']
+    drawn_boxes = inputs['points']
+    image_tensor = torch.tensor(image).to(device)
+    image_tensor = image_tensor.permute(2, 0, 1).float() / 255.0
+    image_tensor = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(image_tensor)
+    bboxes_tensor = torch.tensor([[box[0], box[1], box[3], box[4]] for box in drawn_boxes], dtype=torch.float32).to(device)
+    img, bboxes, scale = resize_and_pad(image_tensor, bboxes_tensor, size=1024.0)
+    img = img.unsqueeze(0).to(device)
+    bboxes = bboxes.unsqueeze(0).to(device)
+    with torch.no_grad():
+        outputs, _, _, _, masks = model(img, bboxes)
+    return image, outputs, masks, img, scale, drawn_boxes
+# **Post-process and Update Output**
+def post_process(image, outputs, masks, img, scale, drawn_boxes, enable_mask, threshold):
+    idx = 0
+    threshold = 1/threshold
+    keep = ops.nms(outputs[idx]['pred_boxes'][outputs[idx]['box_v'] > outputs[idx]['box_v'].max() / threshold],
+                   outputs[idx]['box_v'][outputs[idx]['box_v'] > outputs[idx]['box_v'].max() / threshold], 0.5)
+    pred_boxes = outputs[idx]['pred_boxes'][outputs[idx]['box_v'] > outputs[idx]['box_v'].max() / threshold][keep]
+    pred_boxes = torch.clamp(pred_boxes, 0, 1)
+    pred_boxes = (pred_boxes.cpu() / scale * img.shape[-1]).tolist()
+    image = Image.fromarray((image).astype(np.uint8))
+    if enable_mask:
+        from matplotlib import pyplot as plt
+        masks_ = masks[idx][(outputs[idx]['box_v'] > outputs[idx]['box_v'].max() / threshold)[0]]
+        N_masks = masks_.shape[0]
+        indices = torch.randint(1, N_masks + 1, (1, N_masks), device=masks_.device).view(-1, 1, 1)
+        masks = (masks_ * indices).sum(dim=0)
+        mask_display = (
+            T.Resize((int(img.shape[2] / scale), int(img.shape[3] / scale)), interpolation=T.InterpolationMode.NEAREST)(
+                masks.cpu().unsqueeze(0))[0])[:image.size[1], :image.size[0]]
+        cmap = plt.cm.tab20
+        norm = plt.Normalize(vmin=0, vmax=N_masks)
+        del masks
+        del masks_
+        del outputs
+        rgba_image = cmap(norm(mask_display))
+        rgba_image[mask_display == 0, -1] = 0
+        rgba_image[mask_display != 0, -1] = 0.5
+        overlay = Image.fromarray((rgba_image * 255).astype(np.uint8), mode="RGBA")
+        image = image.convert("RGBA")
+        image = Image.alpha_composite(image, overlay)
+    draw = ImageDraw.Draw(image)
+    for box in pred_boxes:
+        draw.rectangle([box[0], box[1], box[2], box[3]], outline="orange", width=5)
+    # for box in drawn_boxes:
+    #     draw.rectangle([box[0], box[1], box[3], box[4]], outline="red", width=3)
+    width, height = image.size
+    square_size = int(0.05 * width)
+    x1, y1 = 10, height - square_size - 10
+    x2, y2 = x1 + square_size, y1 + square_size
+    # draw.rectangle([x1, y1, x2, y2], outline="black", fill="black", width=1)
+    # font = ImageFont.load_default()
+    # txt = str(len(pred_boxes))
+    # w = draw.textlength(txt, font=font)
+    # text_x = x1 + (square_size - w) / 2
+    # text_y = y1 + (square_size - 10) / 2
+    # draw.text((text_x, text_y), txt, fill="white", font=font)
+    return image, len(pred_boxes)
+iface = gr.Blocks()
+with iface:
+    # Store intermediate states
+    image_input = gr.State()
+    outputs_state = gr.State()
+    masks_state = gr.State()
+    img_state = gr.State()
+    scale_state = gr.State()
+    drawn_boxes_state = gr.State()
+    # UI Layout: Input Section
+    with gr.Row():
+        image_prompter = ImagePrompter()
+        image_output = gr.Image(type="pil")
+    # UI Layout: Output Section
+    with gr.Row():
+        count_output = gr.Number(label="Total Count")
+        enable_mask = gr.Checkbox(label="Predict masks", value=True)  # Mask enabled by default
+        threshold = gr.Slider(0.05, 0.95, value=0.33, step=0.01, label="Threshold")  # Updated range and default
+    # Create the 'Count' button
+    count_button = gr.Button("Count")
+    # Process image once when "Count" button is pressed
+    def initial_process(inputs, enable_mask, threshold):
+        # Perform inference once
+        image, outputs, masks, img, scale, drawn_boxes = process_image_once(inputs, enable_mask)
+        # Save intermediate states
+        return (
+            *post_process(image, outputs, masks, img, scale, drawn_boxes, enable_mask, threshold),  # Processed outputs
+            image, outputs, masks, img, scale, drawn_boxes  # Store in states for later use
+        )
+    # Update image and count when the threshold slider changes (post-process only)
+    def update_threshold(threshold, image, outputs, masks, img, scale, drawn_boxes, enable_mask):
+        return post_process(image, outputs, masks, img, scale, drawn_boxes, enable_mask, threshold)
+    # Run initial inference and post-process when "Count" button is clicked
+    count_button.click(
+        initial_process,
+        [image_prompter, enable_mask, threshold],  # Inputs
+        [image_output, count_output, image_input, outputs_state, masks_state, img_state, scale_state, drawn_boxes_state]  # Outputs + States
+    )
+    # Adjust the output dynamically based on the threshold slider (no re-inference)
+    threshold.change(
+        update_threshold,
+        [threshold, image_input, outputs_state, masks_state, img_state, scale_state, drawn_boxes_state, enable_mask],
+        [image_output, count_output]
+    )
+    enable_mask.change(
+         update_threshold,
+        [threshold, image_input, outputs_state, masks_state, img_state, scale_state, drawn_boxes_state, enable_mask],
+        [image_output, count_output]
+    )
+iface.launch(share=True)

inference.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import argparse
+import json
+import math
+import os
+import numpy as np
+import skimage
+import torch
+from torch.nn import DataParallel
+from torch.utils.data import DataLoader
+from torchvision import ops
+from torchvision.transforms import Resize
+from tqdm import tqdm
+from models.counter_infer import build_model
+from models.matcher import build_matcher
+from utils.arg_parser import get_argparser
+from utils.box_ops import BoxList
+from utils.data import FSC147DATASET, pad_collate_test
+from utils.losses import SetCriterion
+@torch.no_grad()
+def evaluate(args):
+    gpu=0
+    torch.cuda.set_device(gpu)
+    device = torch.device(gpu)
+    model = DataParallel(
+        build_model(args).to(device),
+        device_ids=[gpu],
+        output_device=gpu
+    )
+    state_dict = torch.load(os.path.join(args.model_path, f'{args.model_name}.pth'))['model']
+    state_dict = {k if 'module.' in k else 'module.' + k: v for k, v in state_dict.items()}
+    model.load_state_dict(state_dict, strict=False)
+    for split in ['val', 'test']:
+        test = FSC147DATASET(
+            args.data_path,
+            args.image_size,
+            split=split,
+            num_objects=args.num_objects,
+            tiling_p=args.tiling_p,
+            return_ids=True,
+            training=False
+        )
+        test_loader = DataLoader(
+            test,
+            batch_size=args.batch_size,
+            drop_last=False,
+            num_workers=args.num_workers,
+            collate_fn=pad_collate_test,
+        )
+        ae = torch.tensor(0.0).to(device)
+        se = torch.tensor(0.0).to(device)
+        model.eval()
+        matcher = build_matcher(args)
+        criterion = SetCriterion(0, matcher, {"loss_giou":args.giou_loss_coef}, ["bboxes", "ce"], focal_alpha=args.focal_alpha)
+        criterion.to(device)
+        predictions = dict()
+        predictions["categories"] = [{"name": "fg", "id": 1}]
+        predictions["images"] = list()
+        predictions["annotations"] = list()
+        anno_id = 1
+        for img, bboxes, density_map, ids, gt_bboxes, scaling_factor, padwh in test_loader:
+            img = img.to(device)
+            bboxes = bboxes.to(device)
+            gt_bboxes = gt_bboxes.to(device)
+            outputs, ref_points, _, _, masks = model(img, bboxes)
+            w, h = img.shape[-1], img.shape[-2]
+            losses = []
+            num_objects_gt = []
+            num_objects_pred = []
+            nms_bboxes = []
+            nms_scores = []
+            nms_masks = []
+            for idx in range(img.shape[0]):
+                thr = 1/0.11
+                if len(outputs[idx]['pred_boxes'][-1]) == 0:
+                    nms_bboxes.append(torch.zeros((0, 4)))
+                    nms_scores.append(torch.zeros((0)))
+                    num_objects_pred.append(0)
+                else:
+                    # threshold and NMS
+                    v = outputs[idx]["box_v"]
+                    v_thr = v.max() / thr
+                    mask = v > v_thr
+                    keep = ops.nms(
+                        outputs[idx]["pred_boxes"][mask],
+                        v[mask],
+                        0.5,
+                    )
+                    boxes = outputs[idx]["pred_boxes"][mask][keep]
+                    boxes = torch.clamp(boxes, 0, 1)
+                    scores = outputs[idx]["scores"][mask][keep]
+                    # remove bboxes in padded area
+                    maxw = (img.shape[-1] - padwh[idx][0]).to(device)
+                    maxh = (img.shape[-2] - padwh[idx][1]).to(device)
+                    center = (boxes[:, :2] + boxes[:, 2:]) / 2
+                    valid = (center[:, 0] * h < maxw) & (center[:, 1] * w < maxh)
+                    scores = scores[valid]
+                    boxes = boxes[valid]
+                    nms_bboxes.append(boxes)
+                    nms_scores.append(scores)
+                    num_objects_pred.append(len(boxes))
+                    if False:
+                        from matplotlib import pyplot as plt
+                        fig1 = plt.figure(figsize=(8, 8))
+                        ((ax1_11, ax1_12), (ax1_21, ax1_22)) = fig1.subplots(2, 2)
+                        fig1.tight_layout(pad=2.5)
+                        img_ = np.array((img).cpu()[idx].permute(1, 2, 0))
+                        img_ = img_ - np.min(img_)
+                        img_ = img_ / np.max(img_)
+                        ax1_11.imshow(img_)
+                        ax1_11.set_title("Input", fontsize=8)
+                        bboxes_ = np.array(bboxes.cpu())[idx]
+                        for i in range(3):
+                            ax1_11.plot([bboxes_[i][0], bboxes_[i][0], bboxes_[i][2], bboxes_[i][2], bboxes_[i][0]],
+                                        [bboxes_[i][1], bboxes_[i][3], bboxes_[i][3], bboxes_[i][1], bboxes_[i][1]], c='r')
+                        ax1_12.imshow(img_)
+                        ax1_12.set_title("gt bboxes", fontsize=8)
+                        target_bboxes = gt_bboxes[idx][torch.logical_not((gt_bboxes[idx] == 0).all(dim=1))]
+                        bboxes_ = ((target_bboxes)).detach().cpu()
+                        for i in range(len(bboxes_)):
+                            ax1_12.plot([bboxes_[i][0], bboxes_[i][0], bboxes_[i][2], bboxes_[i][2], bboxes_[i][0]],
+                                        [bboxes_[i][1], bboxes_[i][3], bboxes_[i][3], bboxes_[i][1], bboxes_[i][1]], c='g')
+                        ax1_21.imshow(img_)
+                        bboxes_pred = nms_bboxes[idx]
+                        bboxes_ = ((bboxes_pred * img_.shape[0])).detach().cpu()
+                        for i in range(len(bboxes_)):
+                            ax1_21.plot([bboxes_[i][0], bboxes_[i][0], bboxes_[i][2], bboxes_[i][2], bboxes_[i][0]],
+                                        [bboxes_[i][1], bboxes_[i][3], bboxes_[i][3], bboxes_[i][1], bboxes_[i][1]],
+                                        c='orange', linewidth=0.5)
+                        ax1_21.set_title("#GT-#PRED=" + str(len(target_bboxes) - len(bboxes_pred)))
+                        from torchvision import transforms as T
+                        res = T.Resize((1024, 1024))
+                        ax1_21.imshow(res(centerness).detach().cpu()[idx][0], alpha=0.6)
+                        plt.savefig(test.image_names[ids[idx].item()], dpi=200)
+                        plt.close()
+            for idx in range(img.shape[0]):
+                img_info = {
+                    "id": test.map_img_name_to_ori_id()[test.image_names[ids[idx].item()]],
+                    "file_name": "None",
+                }
+                bboxes = ops.box_convert(nms_bboxes[idx], 'xyxy', 'xywh')
+                bboxes = bboxes * img.shape[-1] / scaling_factor[idx]
+                for idxi in range(len(nms_bboxes[idx])):
+                    box = bboxes[idxi].detach().cpu()
+                    anno = {
+                        "id": anno_id,
+                        "image_id": test.map_img_name_to_ori_id()[test.image_names[ids[idx].item()]],
+                        "area": int((box[2] * box[3]).item()),
+                        "bbox": [int(box[0].item()), int(box[1].item()), int(box[2].item()), int(box[3].item())],
+                        "category_id": 1,
+                        "score": float(nms_scores[idx][idxi].item()),
+                    }
+                    anno_id += 1
+                    predictions["annotations"].append(anno)
+                predictions["images"].append(img_info)
+            num_objects_gt =  density_map.flatten(1).sum(dim=1)
+            num_objects_pred = torch.tensor(num_objects_pred)
+            ae += torch.abs(
+                num_objects_gt - num_objects_pred
+            ).sum()
+            se += torch.pow(
+                num_objects_gt - num_objects_pred, 2
+            ).sum()
+        print(
+            f"{split.capitalize()} set",
+            f"MAE: {ae.item() / len(test):.2f}",
+            f"RMSE: {torch.sqrt(se / len(test)).item():.2f}",
+        )
+        with open("geco2_" + split + ".json", "w") as handle:
+            json.dump(predictions, handle)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser('GECO2', parents=[get_argparser()])
+    args = parser.parse_args()
+    print(args)
+    print("model_name: ", args.model_name)
+    evaluate(args)