Farley Lai glenn-jocher commited on
Commit
fc36064
1 Parent(s): 1c9f710

Update Objects365.yaml to include the official validation set (#5194)

Browse files

* Update Objects365.yaml

Download the official Objects365 validation set and convert the labels

* Enforce 4-space indent, reformat and cleanup

* shorten list comprehension

Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>

Files changed (1) hide show
  1. data/Objects365.yaml +40 -33
data/Objects365.yaml CHANGED
@@ -62,43 +62,50 @@ names: ['Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Gla
62
  download: |
63
  from pycocotools.coco import COCO
64
  from tqdm import tqdm
65
-
66
  from utils.general import download, Path
67
-
68
  # Make Directories
69
  dir = Path(yaml['path']) # dataset root dir
70
  for p in 'images', 'labels':
71
  (dir / p).mkdir(parents=True, exist_ok=True)
72
  for q in 'train', 'val':
73
  (dir / p / q).mkdir(parents=True, exist_ok=True)
74
-
75
- # Download
76
- url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/"
77
- download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir, delete=False) # annotations json
78
- download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train',
79
- curl=True, delete=False, threads=8)
80
-
81
- # Move
82
- train = dir / 'images' / 'train'
83
- for f in tqdm(train.rglob('*.jpg'), desc=f'Moving images'):
84
- f.rename(train / f.name) # move to /images/train
85
-
86
- # Labels
87
- coco = COCO(dir / 'zhiyuan_objv2_train.json')
88
- names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
89
- for cid, cat in enumerate(names):
90
- catIds = coco.getCatIds(catNms=[cat])
91
- imgIds = coco.getImgIds(catIds=catIds)
92
- for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
93
- width, height = im["width"], im["height"]
94
- path = Path(im["file_name"]) # image filename
95
- try:
96
- with open(dir / 'labels' / 'train' / path.with_suffix('.txt').name, 'a') as file:
97
- annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
98
- for a in coco.loadAnns(annIds):
99
- x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
100
- x, y = x + w / 2, y + h / 2 # xy to center
101
- file.write(f"{cid} {x / width:.5f} {y / height:.5f} {w / width:.5f} {h / height:.5f}\n")
102
-
103
- except Exception as e:
104
- print(e)
 
 
 
 
 
 
 
 
62
  download: |
63
  from pycocotools.coco import COCO
64
  from tqdm import tqdm
65
+
66
  from utils.general import download, Path
67
+
68
  # Make Directories
69
  dir = Path(yaml['path']) # dataset root dir
70
  for p in 'images', 'labels':
71
  (dir / p).mkdir(parents=True, exist_ok=True)
72
  for q in 'train', 'val':
73
  (dir / p / q).mkdir(parents=True, exist_ok=True)
74
+
75
+ # Train, Val Splits
76
+ for split, patches in [('train', 50 + 1), ('val', 43 + 1)]:
77
+ print(f"Processing {split} in {patches} patches ...")
78
+ images, labels = dir / 'images' / split, dir / 'labels' / split
79
+
80
+ # Download
81
+ url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/"
82
+ if split == 'train':
83
+ download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir, delete=False) # annotations json
84
+ download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, delete=False, threads=8)
85
+ elif split == 'val':
86
+ download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir, delete=False) # annotations json
87
+ download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, delete=False, threads=8)
88
+ download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, delete=False, threads=8)
89
+
90
+ # Move
91
+ for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'):
92
+ f.rename(images / f.name) # move to /images/{split}
93
+
94
+ # Labels
95
+ coco = COCO(dir / f'zhiyuan_objv2_{split}.json')
96
+ names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
97
+ for cid, cat in enumerate(names):
98
+ catIds = coco.getCatIds(catNms=[cat])
99
+ imgIds = coco.getImgIds(catIds=catIds)
100
+ for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
101
+ width, height = im["width"], im["height"]
102
+ path = Path(im["file_name"]) # image filename
103
+ try:
104
+ with open(labels / path.with_suffix('.txt').name, 'a') as file:
105
+ annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
106
+ for a in coco.loadAnns(annIds):
107
+ x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
108
+ x, y = x + w / 2, y + h / 2 # xy to center
109
+ file.write(f"{cid} {x / width:.5f} {y / height:.5f} {w / width:.5f} {h / height:.5f}\n")
110
+ except Exception as e:
111
+ print(e)