glenn-jocher commited on
Commit
93a6765
1 Parent(s): 77fb8ee

update mosaic border

Browse files
Files changed (1) hide show
  1. utils/datasets.py +17 -13
utils/datasets.py CHANGED
@@ -62,7 +62,7 @@ def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=Fa
62
 
63
 
64
  class LoadImages: # for inference
65
- def __init__(self, path, img_size=416):
66
  path = str(Path(path)) # os-agnostic
67
  files = []
68
  if os.path.isdir(path):
@@ -139,7 +139,7 @@ class LoadImages: # for inference
139
 
140
 
141
  class LoadWebcam: # for inference
142
- def __init__(self, pipe=0, img_size=416):
143
  self.img_size = img_size
144
 
145
  if pipe == '0':
@@ -204,7 +204,7 @@ class LoadWebcam: # for inference
204
 
205
 
206
  class LoadStreams: # multiple IP or RTSP cameras
207
- def __init__(self, sources='streams.txt', img_size=416):
208
  self.mode = 'images'
209
  self.img_size = img_size
210
 
@@ -277,7 +277,7 @@ class LoadStreams: # multiple IP or RTSP cameras
277
 
278
 
279
  class LoadImagesAndLabels(Dataset): # for training/testing
280
- def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
281
  cache_images=False, single_cls=False, stride=32, pad=0.0):
282
  try:
283
  path = str(Path(path)) # os-agnostic
@@ -307,6 +307,9 @@ class LoadImagesAndLabels(Dataset): # for training/testing
307
  self.image_weights = image_weights
308
  self.rect = False if image_weights else rect
309
  self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
 
 
 
310
 
311
  # Define labels
312
  self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')
@@ -585,7 +588,8 @@ def load_mosaic(self, index):
585
 
586
  labels4 = []
587
  s = self.img_size
588
- xc, yc = [int(random.uniform(s * 0.5, s * 1.5)) for _ in range(2)] # mosaic center x, y
 
589
  indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
590
  for i, index in enumerate(indices):
591
  # Load image
@@ -633,12 +637,12 @@ def load_mosaic(self, index):
633
  translate=self.hyp['translate'],
634
  scale=self.hyp['scale'],
635
  shear=self.hyp['shear'],
636
- border=-s // 2) # border to remove
637
 
638
  return img4, labels4
639
 
640
 
641
- def letterbox(img, new_shape=(416, 416), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
642
  # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
643
  shape = img.shape[:2] # current shape [height, width]
644
  if isinstance(new_shape, int):
@@ -671,13 +675,13 @@ def letterbox(img, new_shape=(416, 416), color=(114, 114, 114), auto=True, scale
671
  return img, ratio, (dw, dh)
672
 
673
 
674
- def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=0):
675
  # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
676
  # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
677
  # targets = [cls, xyxy]
678
 
679
- height = img.shape[0] + border * 2
680
- width = img.shape[1] + border * 2
681
 
682
  # Rotation and Scale
683
  R = np.eye(3)
@@ -689,8 +693,8 @@ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10,
689
 
690
  # Translation
691
  T = np.eye(3)
692
- T[0, 2] = random.uniform(-translate, translate) * img.shape[0] + border # x translation (pixels)
693
- T[1, 2] = random.uniform(-translate, translate) * img.shape[1] + border # y translation (pixels)
694
 
695
  # Shear
696
  S = np.eye(3)
@@ -699,7 +703,7 @@ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10,
699
 
700
  # Combined rotation matrix
701
  M = S @ T @ R # ORDER IS IMPORTANT HERE!!
702
- if (border != 0) or (M != np.eye(3)).any(): # image changed
703
  img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))
704
 
705
  # Transform label coordinates
 
62
 
63
 
64
  class LoadImages: # for inference
65
+ def __init__(self, path, img_size=640):
66
  path = str(Path(path)) # os-agnostic
67
  files = []
68
  if os.path.isdir(path):
 
139
 
140
 
141
  class LoadWebcam: # for inference
142
+ def __init__(self, pipe=0, img_size=640):
143
  self.img_size = img_size
144
 
145
  if pipe == '0':
 
204
 
205
 
206
  class LoadStreams: # multiple IP or RTSP cameras
207
+ def __init__(self, sources='streams.txt', img_size=640):
208
  self.mode = 'images'
209
  self.img_size = img_size
210
 
 
277
 
278
 
279
  class LoadImagesAndLabels(Dataset): # for training/testing
280
+ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
281
  cache_images=False, single_cls=False, stride=32, pad=0.0):
282
  try:
283
  path = str(Path(path)) # os-agnostic
 
307
  self.image_weights = image_weights
308
  self.rect = False if image_weights else rect
309
  self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
310
+ self.mosaic_border = None
311
+ self.stride = stride
312
+
313
 
314
  # Define labels
315
  self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')
 
588
 
589
  labels4 = []
590
  s = self.img_size
591
+ border = [-s // 2, -s // 2] # self.mosaic_border
592
+ yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in border] # mosaic center x, y
593
  indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
594
  for i, index in enumerate(indices):
595
  # Load image
 
637
  translate=self.hyp['translate'],
638
  scale=self.hyp['scale'],
639
  shear=self.hyp['shear'],
640
+ border=border) # border to remove
641
 
642
  return img4, labels4
643
 
644
 
645
+ def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
646
  # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
647
  shape = img.shape[:2] # current shape [height, width]
648
  if isinstance(new_shape, int):
 
675
  return img, ratio, (dw, dh)
676
 
677
 
678
+ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=(0, 0)):
679
  # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
680
  # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
681
  # targets = [cls, xyxy]
682
 
683
+ height = img.shape[0] + border[0] * 2 # shape(h,w,c)
684
+ width = img.shape[1] + border[1] * 2
685
 
686
  # Rotation and Scale
687
  R = np.eye(3)
 
693
 
694
  # Translation
695
  T = np.eye(3)
696
+ T[0, 2] = random.uniform(-translate, translate) * img.shape[1] + border[1] # x translation (pixels)
697
+ T[1, 2] = random.uniform(-translate, translate) * img.shape[0] + border[0] # y translation (pixels)
698
 
699
  # Shear
700
  S = np.eye(3)
 
703
 
704
  # Combined rotation matrix
705
  M = S @ T @ R # ORDER IS IMPORTANT HERE!!
706
+ if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
707
  img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))
708
 
709
  # Transform label coordinates