glenn-jocher commited on
Commit
f899417
1 Parent(s): ffb6e11

NGA xView 2018 Dataset Auto-Download (#3775)

Browse files

* update clip_coords for numpy

* uncomment

* cleanup

* Add autosplits

* fix

* cleanup

Files changed (3) hide show
  1. data/xView.yaml +101 -0
  2. utils/datasets.py +11 -10
  3. utils/general.py +13 -5
data/xView.yaml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # xView 2018 dataset https://challenge.xviewdataset.org
2
+ # ----> NOTE: DOWNLOAD DATA MANUALLY from URL above and unzip to /datasets/xView before running train command below
3
+ # Train command: python train.py --data xView.yaml
4
+ # Default dataset location is next to YOLOv5:
5
+ # /parent
6
+ # /datasets/xView
7
+ # /yolov5
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/xView # dataset root dir
12
+ train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
13
+ val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
14
+
15
+ # Classes
16
+ nc: 60 # number of classes
17
+ names: [ 'Fixed-wing Aircraft', 'Small Aircraft', 'Cargo Plane', 'Helicopter', 'Passenger Vehicle', 'Small Car', 'Bus',
18
+ 'Pickup Truck', 'Utility Truck', 'Truck', 'Cargo Truck', 'Truck w/Box', 'Truck Tractor', 'Trailer',
19
+ 'Truck w/Flatbed', 'Truck w/Liquid', 'Crane Truck', 'Railway Vehicle', 'Passenger Car', 'Cargo Car',
20
+ 'Flat Car', 'Tank car', 'Locomotive', 'Maritime Vessel', 'Motorboat', 'Sailboat', 'Tugboat', 'Barge',
21
+ 'Fishing Vessel', 'Ferry', 'Yacht', 'Container Ship', 'Oil Tanker', 'Engineering Vehicle', 'Tower crane',
22
+ 'Container Crane', 'Reach Stacker', 'Straddle Carrier', 'Mobile Crane', 'Dump Truck', 'Haul Truck',
23
+ 'Scraper/Tractor', 'Front loader/Bulldozer', 'Excavator', 'Cement Mixer', 'Ground Grader', 'Hut/Tent', 'Shed',
24
+ 'Building', 'Aircraft Hangar', 'Damaged Building', 'Facility', 'Construction Site', 'Vehicle Lot', 'Helipad',
25
+ 'Storage Tank', 'Shipping container lot', 'Shipping Container', 'Pylon', 'Tower' ] # class names
26
+
27
+
28
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
29
+ download: |
30
+ import json
31
+ import os
32
+ from pathlib import Path
33
+
34
+ import numpy as np
35
+ from PIL import Image
36
+ from tqdm import tqdm
37
+
38
+ from utils.datasets import autosplit
39
+ from utils.general import download, xyxy2xywhn
40
+
41
+
42
+ def convert_labels(fname=Path('xView/xView_train.geojson')):
43
+ # Convert xView geoJSON labels to YOLO format
44
+ path = fname.parent
45
+ with open(fname) as f:
46
+ print(f'Loading {fname}...')
47
+ data = json.load(f)
48
+
49
+ # Make dirs
50
+ labels = Path(path / 'labels' / 'train')
51
+ os.system(f'rm -rf {labels}')
52
+ labels.mkdir(parents=True, exist_ok=True)
53
+
54
+ # xView classes 11-94 to 0-59
55
+ xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
56
+ 12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
57
+ 29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
58
+ 47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]
59
+
60
+ shapes = {}
61
+ for feature in tqdm(data['features'], desc=f'Converting {fname}'):
62
+ p = feature['properties']
63
+ if p['bounds_imcoords']:
64
+ id = p['image_id']
65
+ file = path / 'train_images' / id
66
+ if file.exists(): # 1395.tif missing
67
+ try:
68
+ box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
69
+ assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
70
+ cls = p['type_id']
71
+ cls = xview_class2index[int(cls)] # xView class to 0-60
72
+ assert 59 >= cls >= 0, f'incorrect class index {cls}'
73
+
74
+ # Write YOLO label
75
+ if id not in shapes:
76
+ shapes[id] = Image.open(file).size
77
+ box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
78
+ with open((labels / id).with_suffix('.txt'), 'a') as f:
79
+ f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n") # write label.txt
80
+ except Exception as e:
81
+ print(f'WARNING: skipping one label for {file}: {e}')
82
+
83
+
84
+ # Download manually from https://challenge.xviewdataset.org
85
+ dir = Path(yaml['path']) # dataset root dir
86
+ # urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip', # train labels
87
+ # 'https://d307kc0mrhucc3.cloudfront.net/train_images.zip', # 15G, 847 train images
88
+ # 'https://d307kc0mrhucc3.cloudfront.net/val_images.zip'] # 5G, 282 val images (no labels)
89
+ # download(urls, dir=dir, delete=False)
90
+
91
+ # Convert labels
92
+ convert_labels(dir / 'xView_train.geojson')
93
+
94
+ # Move images
95
+ images = Path(dir / 'images')
96
+ images.mkdir(parents=True, exist_ok=True)
97
+ Path(dir / 'train_images').rename(dir / 'images' / 'train')
98
+ Path(dir / 'val_images').rename(dir / 'images' / 'val')
99
+
100
+ # Split
101
+ autosplit(dir / 'images' / 'train')
utils/datasets.py CHANGED
@@ -985,7 +985,7 @@ def create_folder(path='./new'):
985
  os.makedirs(path) # make new output folder
986
 
987
 
988
- def flatten_recursive(path='../coco128'):
989
  # Flatten a recursive directory by bringing all files to top level
990
  new_path = Path(path + '_flat')
991
  create_folder(new_path)
@@ -993,7 +993,7 @@ def flatten_recursive(path='../coco128'):
993
  shutil.copyfile(file, new_path / Path(file).name)
994
 
995
 
996
- def extract_boxes(path='../coco128/'): # from utils.datasets import *; extract_boxes('../coco128')
997
  # Convert detection dataset into classification dataset, with one directory per class
998
 
999
  path = Path(path) # images dir
@@ -1028,27 +1028,28 @@ def extract_boxes(path='../coco128/'): # from utils.datasets import *; extract_
1028
  assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
1029
 
1030
 
1031
- def autosplit(path='../coco128', weights=(0.9, 0.1, 0.0), annotated_only=False):
1032
  """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
1033
- Usage: from utils.datasets import *; autosplit('../coco128')
1034
  Arguments
1035
- path: Path to images directory
1036
- weights: Train, val, test weights (list)
1037
- annotated_only: Only use images with an annotated txt file
1038
  """
1039
  path = Path(path) # images dir
1040
  files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in img_formats], []) # image files only
1041
  n = len(files) # number of files
 
1042
  indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split
1043
 
1044
  txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files
1045
- [(path / x).unlink() for x in txt if (path / x).exists()] # remove existing
1046
 
1047
  print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
1048
  for i, img in tqdm(zip(indices, files), total=n):
1049
  if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label
1050
- with open(path / txt[i], 'a') as f:
1051
- f.write(str(img) + '\n') # add image to txt file
1052
 
1053
 
1054
  def verify_image_label(args):
 
985
  os.makedirs(path) # make new output folder
986
 
987
 
988
+ def flatten_recursive(path='../datasets/coco128'):
989
  # Flatten a recursive directory by bringing all files to top level
990
  new_path = Path(path + '_flat')
991
  create_folder(new_path)
 
993
  shutil.copyfile(file, new_path / Path(file).name)
994
 
995
 
996
+ def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *; extract_boxes()
997
  # Convert detection dataset into classification dataset, with one directory per class
998
 
999
  path = Path(path) # images dir
 
1028
  assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
1029
 
1030
 
1031
+ def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
1032
  """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
1033
+ Usage: from utils.datasets import *; autosplit()
1034
  Arguments
1035
+ path: Path to images directory
1036
+ weights: Train, val, test weights (list, tuple)
1037
+ annotated_only: Only use images with an annotated txt file
1038
  """
1039
  path = Path(path) # images dir
1040
  files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in img_formats], []) # image files only
1041
  n = len(files) # number of files
1042
+ random.seed(0) # for reproducibility
1043
  indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split
1044
 
1045
  txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files
1046
+ [(path.parent / x).unlink(missing_ok=True) for x in txt] # remove existing
1047
 
1048
  print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
1049
  for i, img in tqdm(zip(indices, files), total=n):
1050
  if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label
1051
+ with open(path.parent / txt[i], 'a') as f:
1052
+ f.write('./' + img.relative_to(path.parent).as_posix() + '\n') # add image to txt file
1053
 
1054
 
1055
  def verify_image_label(args):
utils/general.py CHANGED
@@ -393,8 +393,10 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
393
  return y
394
 
395
 
396
- def xyxy2xywhn(x, w=640, h=640):
397
  # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
 
 
398
  y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
399
  y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center
400
  y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center
@@ -455,10 +457,16 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
455
 
456
  def clip_coords(boxes, img_shape):
457
  # Clip bounding xyxy bounding boxes to image shape (height, width)
458
- boxes[:, 0].clamp_(0, img_shape[1]) # x1
459
- boxes[:, 1].clamp_(0, img_shape[0]) # y1
460
- boxes[:, 2].clamp_(0, img_shape[1]) # x2
461
- boxes[:, 3].clamp_(0, img_shape[0]) # y2
 
 
 
 
 
 
462
 
463
 
464
  def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
 
393
  return y
394
 
395
 
396
+ def xyxy2xywhn(x, w=640, h=640, clip=False):
397
  # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
398
+ if clip:
399
+ clip_coords(x, (h, w)) # warning: inplace clip
400
  y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
401
  y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center
402
  y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center
 
457
 
458
  def clip_coords(boxes, img_shape):
459
  # Clip bounding xyxy bounding boxes to image shape (height, width)
460
+ if isinstance(boxes, torch.Tensor):
461
+ boxes[:, 0].clamp_(0, img_shape[1]) # x1
462
+ boxes[:, 1].clamp_(0, img_shape[0]) # y1
463
+ boxes[:, 2].clamp_(0, img_shape[1]) # x2
464
+ boxes[:, 3].clamp_(0, img_shape[0]) # y2
465
+ else: # np.array
466
+ boxes[:, 0].clip(0, img_shape[1], out=boxes[:, 0]) # x1
467
+ boxes[:, 1].clip(0, img_shape[0], out=boxes[:, 1]) # y1
468
+ boxes[:, 2].clip(0, img_shape[1], out=boxes[:, 2]) # x2
469
+ boxes[:, 3].clip(0, img_shape[0], out=boxes[:, 3]) # y2
470
 
471
 
472
  def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):