ydshieh commited on
Commit
bf29ca1
1 Parent(s): a651689
Files changed (1) hide show
  1. coco_dataset/coco_dataset.py +15 -6
coco_dataset/coco_dataset.py CHANGED
@@ -113,6 +113,15 @@ class COCODataset(datasets.GeneratorBasedBuilder):
113
  "This script is supposed to work with local (downloaded) COCO dataset. The argument `data_dir` in `load_dataset()` is required."
114
  )
115
 
 
 
 
 
 
 
 
 
 
116
  splits = []
117
  for split in self.config.splits:
118
  if split == 'train':
@@ -120,8 +129,8 @@ class COCODataset(datasets.GeneratorBasedBuilder):
120
  name=datasets.Split.TRAIN,
121
  # These kwargs will be passed to _generate_examples
122
  gen_kwargs={
123
- "json_path": os.path.join(data_dir, f"captions_train{self.config.name}.json"),
124
- "image_dir": os.path.join(data_dir, f'train{self.config.name}'),
125
  "split": "train",
126
  }
127
  )
@@ -130,8 +139,8 @@ class COCODataset(datasets.GeneratorBasedBuilder):
130
  name=datasets.Split.VALIDATION,
131
  # These kwargs will be passed to _generate_examples
132
  gen_kwargs={
133
- "json_path": os.path.join(data_dir, f"captions_val{self.config.name}.json"),
134
- "image_dir": os.path.join(data_dir, f'val{self.config.name}'),
135
  "split": "valid",
136
  },
137
  )
@@ -140,8 +149,8 @@ class COCODataset(datasets.GeneratorBasedBuilder):
140
  name=datasets.Split.TEST,
141
  # These kwargs will be passed to _generate_examples
142
  gen_kwargs={
143
- "json_path": os.path.join(data_dir, f'image_info_test{self.config.name}.json'),
144
- "image_dir": os.path.join(data_dir, f'test{self.config.name}'),
145
  "split": "test",
146
  },
147
  )
 
113
  "This script is supposed to work with local (downloaded) COCO dataset. The argument `data_dir` in `load_dataset()` is required."
114
  )
115
 
116
+ _DL_URLS = {
117
+ "train": os.path.join(data_dir, "train2017.zip"),
118
+ "val": os.path.join(data_dir, "val2017.zip"),
119
+ "test": os.path.join(data_dir, "test2017.zip"),
120
+ "annotations_trainval": os.path.join(data_dir, "annotations_trainval2017.zip"),
121
+ "image_info_test": os.path.join(data_dir, "image_info_test2017.zip"),
122
+ }
123
+ archive_path = dl_manager.download_and_extract(_DL_URLS)
124
+
125
  splits = []
126
  for split in self.config.splits:
127
  if split == 'train':
 
129
  name=datasets.Split.TRAIN,
130
  # These kwargs will be passed to _generate_examples
131
  gen_kwargs={
132
+ "json_path": os.path.join(archive_path["annotations_trainval"], "annotations", "captions_train2017.json"),
133
+ "image_dir": os.path.join(archive_path["train"], "train2017"),
134
  "split": "train",
135
  }
136
  )
 
139
  name=datasets.Split.VALIDATION,
140
  # These kwargs will be passed to _generate_examples
141
  gen_kwargs={
142
+ "json_path": os.path.join(archive_path["annotations_trainval"], "annotations", "captions_val2017.json"),
143
+ "image_dir": os.path.join(archive_path["val"], "val2017"),
144
  "split": "valid",
145
  },
146
  )
 
149
  name=datasets.Split.TEST,
150
  # These kwargs will be passed to _generate_examples
151
  gen_kwargs={
152
+ "json_path": os.path.join(archive_path["image_info_test"], "annotations", "image_info_test2017.json"),
153
+ "image_dir": os.path.join(archive_path["test"], "test2017"),
154
  "split": "test",
155
  },
156
  )