added 30 pixel to avoide crop table structure

Suggest adding NMS to reduce number of bounding boxes as well

def outputs_to_objects(outputs, img_size, id2label, iou_threshold=0.2):

m = outputs.logits.softmax(-1).max(-1)
pred_labels = list(m.indices.detach().cpu().numpy())[0]
pred_scores = list(m.values.detach().cpu().numpy())[0]
pred_bboxes = outputs["pred_boxes"].detach().cpu()[0]

pred_bboxes = [
    elem.tolist() for elem in rescale_bboxes(pred_bboxes, img_size)
]

objects = []
for label, score, bbox in zip(pred_labels, pred_scores, pred_bboxes):
    class_label = id2label[int(label)]
    if not class_label == "no object":
        objects.append(
            {
                "label": class_label,
                "score": float(score),
                "bbox": [float(elem) for elem in bbox],
            }
        )

# Convert list of dictionaries to tensors for NMS
boxes = torch.tensor([obj['bbox'] for obj in objects])
scores = torch.tensor([obj['score'] for obj in objects])

# Apply non-maximum suppression
keep = nms(boxes, scores, iou_threshold=iou_threshold)

# Use only the detections that were kept after NMS
objects = [objects[i] for i in keep]

return objects
Ready to merge
This branch is ready to get merged automatically.

Sign up or log in to comment