pipeline_server

Running

App Files Files Community

pipeline_server / detectors /retinanet /encoder_knn.py

zy984764389

Upload folder using huggingface_hub

4ba6fde verified 11 months ago

raw

history blame contribute delete

10.8 kB

	'''Encode object boxes and labels.'''
	import math
	import torch
	import numpy as np
	from detectors.retinanet.tools import meshgrid, box_iou, box_nms, change_box_order


	class DataEncoder:
	def __init__(self,device):
	self.anchor_areas = [3232., 6464., 128128.,256256.,512*512.] # p3 -> p7
	self.aspect_ratios = [1/3., 1/1., 3/1.]
	self.scale_ratios = [1., pow(2,1/2.) , 0.3]
	self.anchor_wh = self._get_anchor_wh()
	self.device = device

	def _get_anchor_wh(self):
	'''Compute anchor width and height for each feature map.

	Returns:
	anchor_wh: (tensor) anchor wh, sized [#fm, #anchors_per_cell, 2].
	'''
	anchor_wh = []
	for s in self.anchor_areas:
	for ar in self.aspect_ratios: # w/h = ar
	h = math.sqrt(s/ar)
	w = ar * h
	for sr in self.scale_ratios: # scale
	anchor_h = h*sr
	anchor_w = w*sr
	anchor_wh.append([anchor_w, anchor_h])
	num_fms = len(self.anchor_areas)
	return torch.Tensor(anchor_wh).view(num_fms, -1, 2)

	def _get_anchor_boxes(self, input_size):
	'''Compute anchor boxes for each feature map.

	Args:
	input_size: (tensor) model input size of (w,h).

	Returns:
	boxes: (list) anchor boxes for each feature map. Each of size [#anchors,4],
	where #anchors = fmw * fmh * #anchors_per_cell
	'''
	num_fms = len(self.anchor_areas)
	fm_sizes = [(input_size/pow(2.,i+3)).ceil() for i in range(num_fms)] # p3 -> p7 feature map sizes
	boxes = []
	for i in range(num_fms):
	fm_size = fm_sizes[i]
	grid_size = input_size / fm_size
	fm_w, fm_h = int(fm_size[0]), int(fm_size[1])
	xy = meshgrid(fm_w,fm_h) + 0.5 # [fm_h*fm_w, 2]
	xy = (xy.float()*grid_size).view(fm_h,fm_w,1,2).expand(fm_h,fm_w,9,2)
	wh = self.anchor_wh[i].view(1,1,9,2).expand(fm_h,fm_w,9,2)
	box = torch.cat([xy,wh], 3) # [x,y,w,h]
	boxes.append(box.view(-1,4))
	return torch.cat(boxes, 0)

	def encode(self, boxes, labels, input_size):
	'''Encode target bounding boxes and class labels.

	We obey the Faster RCNN box coder:
	tx = (x - anchor_x) / anchor_w
	ty = (y - anchor_y) / anchor_h
	tw = log(w / anchor_w)
	th = log(h / anchor_h)

	Args:
	boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
	labels: (tensor) object class labels, sized [#obj,].
	input_size: (int/tuple) model input size of (w,h).

	Returns:
	loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
	cls_targets: (tensor) encoded class labels, sized [#anchors,].
	'''

	input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
	else torch.Tensor(input_size)
	anchor_boxes = self._get_anchor_boxes(input_size)
	if ((len(boxes.size())==1 or boxes.size()[1]!=4)):
	return torch.zeros((49104, 4)).float(), torch.zeros(49104).long()
	boxes = change_box_order(boxes, 'xyxy2xywh')
	ious = box_iou(anchor_boxes, boxes, order='xywh')
	try:
	max_ious, max_ids = ious.max(1)
	except:
	return torch.zeros([0,4]), torch.zeros([0,])
	boxes = boxes[max_ids]

	loc_xy = (boxes[:,:2]-anchor_boxes[:,:2]) / anchor_boxes[:,2:]
	loc_wh = torch.log(boxes[:,2:]/anchor_boxes[:,2:])
	loc_targets = torch.cat([loc_xy,loc_wh], 1)
	cls_targets = labels[max_ids]
	cls_targets[max_ious<0.4] = 0
	ignore = (max_ious>0.2) & (max_ious<0.4) # ignore ious between [0.4,0.5]
	cls_targets[ignore] = -1 # for now just mark ignored to -1
	return loc_targets, cls_targets

	def decode(self, loc_preds, cls_preds, input_size,CLS_THRESH,NMS_THRESH):
	'''Decode outputs back to bouding box locations and class labels.

	Args:
	loc_preds: (tensor) predicted locations, sized [#anchors, 4].
	cls_preds: (tensor) predicted class labels, sized [#anchors, #classes].
	input_size: (int/tuple) model input size of (w,h).

	Returns:
	boxes: (tensor) decode box locations, sized [#obj,4].
	labels: (tensor) class labels for each box, sized [#obj,].
	'''
	#CLS_THRESH = 0
	#NMS_THRESH = 0.15
	#loc_preds, cls_preds, input_size=loc_preds.data.squeeze(), cls_preds.data.squeeze(),(w,h)
	input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
	else torch.Tensor(input_size)
	anchor_boxes = self._get_anchor_boxes(input_size)

	anchor_boxes=anchor_boxes.to(self.device)
	loc_xy = loc_preds[:,:2]
	loc_wh = loc_preds[:,2:]

	xy = loc_xy * anchor_boxes[:,2:] + anchor_boxes[:,:2]
	wh = loc_wh.exp() * anchor_boxes[:,2:]
	boxes = torch.cat([xy-wh/2, xy+wh/2], 1) # [#anchors,4]


	#binary class
	labels = torch.ones(len(cls_preds)).long().to(self.device)
	score= cls_preds.sigmoid() # [#anchors,]
	#score,labels=cls_preds.sigmoid().max(1)
	labels=labels.to(self.device)
	ids = score > CLS_THRESH
	#
	if ids.sum()==0:
	return torch.ones(1),torch.ones(1),torch.ones(1)
	else:
	ids = ids.nonzero().squeeze() # [#obj,]
	keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH)
	keep = keep.to(self.device)
	if (len(ids.shape)==0):
	ids = ids.unsqueeze(0)
	return boxes[ids][keep], labels[ids][keep], score[ids][keep]


	class DataEncoder_fusion:
	def __init__(self,anchor_wh,device,**kwargs):
	#self.anchor_wh = torch.tensor([[18,19],[28,26],[36,38],[55,56],[92,91.]])
	self.anchor_wh = torch.tensor(anchor_wh)
	self.num_anchors = len(self.anchor_wh)
	self.fm_size = 32
	self.device = device


	def _get_anchor_boxes(self, input_size):
	'''Compute anchor boxes for each feature map.

	Args:
	input_size: (tensor) model input size of (w,h).

	Returns:
	boxes: (list) anchor boxes for each feature map. Each of size [#anchors,4],
	where #anchors = fmw * fmh * #anchors_per_cell
	'''
	boxes = []
	fm_size =self.fm_size
	grid_size = input_size / fm_size
	fm_w, fm_h = int(fm_size), int(fm_size)
	xy = meshgrid(fm_w,fm_h) + 0.5 # [fm_h*fm_w, 2]
	xy = (xy.float()*grid_size).view(fm_h,fm_w,1,2).expand(fm_h,fm_w,self.num_anchors,2)
	wh = self.anchor_wh.view(1,1,self.num_anchors,2).expand(fm_h,fm_w,self.num_anchors,2)
	box = torch.cat([xy,wh], 3) # [x,y,w,h]
	boxes.append(box.view(-1,4))
	return torch.cat(boxes, 0)

	def encode(self, boxes, labels, input_size):
	'''Encode target bounding boxes and class labels.

	We obey the Faster RCNN box coder:
	tx = (x - anchor_x) / anchor_w
	ty = (y - anchor_y) / anchor_h
	tw = log(w / anchor_w)
	th = log(h / anchor_h)

	Args:
	boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
	labels: (tensor) object class labels, sized [#obj,].
	input_size: (int/tuple) model input size of (w,h).

	Returns:
	loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
	cls_targets: (tensor) encoded class labels, sized [#anchors,].
	'''

	input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
	else torch.Tensor(input_size)
	anchor_boxes = self._get_anchor_boxes(input_size)
	if ((len(boxes.size()) == 1 or boxes.size()[1] != 4)):
	num_box = self.fm_size*2(self.num_anchors)
	return torch.zeros((num_box, 4)).float(), torch.zeros(num_box).long()
	boxes = change_box_order(boxes, 'xyxy2xywh')
	ious = box_iou(anchor_boxes, boxes, order='xywh')
	try:
	max_ious, max_ids = ious.max(1)
	except:
	return torch.zeros([0,4]), torch.zeros([0,])
	boxes = boxes[max_ids]

	loc_xy = (boxes[:,:2]-anchor_boxes[:,:2]) / anchor_boxes[:,2:]
	loc_wh = torch.log(boxes[:,2:]/anchor_boxes[:,2:])
	loc_targets = torch.cat([loc_xy,loc_wh], 1)
	cls_targets = labels[max_ids]
	cls_targets[max_ious<0.4] = 0
	ignore = (max_ious>0.2) & (max_ious<0.4) # ignore ious between [0.4,0.5]
	cls_targets[ignore] = -1 # for now just mark ignored to -1
	return loc_targets, cls_targets

	def decode(self, loc_preds, cls_preds, input_size,CLS_THRESH,NMS_THRESH):
	'''Decode outputs back to bouding box locations and class labels.

	Args:
	loc_preds: (tensor) predicted locations, sized [#anchors, 4].
	cls_preds: (tensor) predicted class labels, sized [#anchors, #classes].
	input_size: (int/tuple) model input size of (w,h).

	Returns:
	boxes: (tensor) decode box locations, sized [#obj,4].
	labels: (tensor) class labels for each box, sized [#obj,].
	'''
	#CLS_THRESH = 0
	#NMS_THRESH = 0.15
	#loc_preds, cls_preds, input_size=loc_preds.data.squeeze(), cls_preds.data.squeeze(),(w,h)
	input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
	else torch.Tensor(input_size)
	anchor_boxes = self._get_anchor_boxes(input_size)
	anchor_boxes=anchor_boxes.to(self.device)
	loc_xy = loc_preds[:,:2]
	loc_wh = loc_preds[:,2:]

	xy = loc_xy * anchor_boxes[:,2:] + anchor_boxes[:,:2]
	wh = loc_wh.exp() * anchor_boxes[:,2:]
	boxes = torch.cat([xy-wh/2, xy+wh/2], 1) # [#anchors,4]


	#binary class
	labels = torch.ones(len(cls_preds)).long().to(self.device)
	score= cls_preds.sigmoid() # [#anchors,]
	#score,labels=cls_preds.sigmoid().max(1)
	labels=labels.to(self.device)
	ids = score > CLS_THRESH
	#
	if ids.sum()==0:
	return torch.ones(1),torch.ones(1),torch.ones(1)
	else:
	ids = ids.nonzero().squeeze() # [#obj,]
	keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH)
	keep = keep.to(self.device)
	if (len(ids.shape)==0):
	ids = ids.unsqueeze(0)
	return boxes[ids][keep], labels[ids][keep], score[ids][keep]