ydshieh
commited on
Commit
•
bf29ca1
1
Parent(s):
a651689
improve
Browse files- coco_dataset/coco_dataset.py +15 -6
coco_dataset/coco_dataset.py
CHANGED
@@ -113,6 +113,15 @@ class COCODataset(datasets.GeneratorBasedBuilder):
|
|
113 |
"This script is supposed to work with local (downloaded) COCO dataset. The argument `data_dir` in `load_dataset()` is required."
|
114 |
)
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
splits = []
|
117 |
for split in self.config.splits:
|
118 |
if split == 'train':
|
@@ -120,8 +129,8 @@ class COCODataset(datasets.GeneratorBasedBuilder):
|
|
120 |
name=datasets.Split.TRAIN,
|
121 |
# These kwargs will be passed to _generate_examples
|
122 |
gen_kwargs={
|
123 |
-
"json_path": os.path.join(
|
124 |
-
"image_dir": os.path.join(
|
125 |
"split": "train",
|
126 |
}
|
127 |
)
|
@@ -130,8 +139,8 @@ class COCODataset(datasets.GeneratorBasedBuilder):
|
|
130 |
name=datasets.Split.VALIDATION,
|
131 |
# These kwargs will be passed to _generate_examples
|
132 |
gen_kwargs={
|
133 |
-
"json_path": os.path.join(
|
134 |
-
"image_dir": os.path.join(
|
135 |
"split": "valid",
|
136 |
},
|
137 |
)
|
@@ -140,8 +149,8 @@ class COCODataset(datasets.GeneratorBasedBuilder):
|
|
140 |
name=datasets.Split.TEST,
|
141 |
# These kwargs will be passed to _generate_examples
|
142 |
gen_kwargs={
|
143 |
-
"json_path": os.path.join(
|
144 |
-
"image_dir": os.path.join(
|
145 |
"split": "test",
|
146 |
},
|
147 |
)
|
|
|
113 |
"This script is supposed to work with local (downloaded) COCO dataset. The argument `data_dir` in `load_dataset()` is required."
|
114 |
)
|
115 |
|
116 |
+
_DL_URLS = {
|
117 |
+
"train": os.path.join(data_dir, "train2017.zip"),
|
118 |
+
"val": os.path.join(data_dir, "val2017.zip"),
|
119 |
+
"test": os.path.join(data_dir, "test2017.zip"),
|
120 |
+
"annotations_trainval": os.path.join(data_dir, "annotations_trainval2017.zip"),
|
121 |
+
"image_info_test": os.path.join(data_dir, "image_info_test2017.zip"),
|
122 |
+
}
|
123 |
+
archive_path = dl_manager.download_and_extract(_DL_URLS)
|
124 |
+
|
125 |
splits = []
|
126 |
for split in self.config.splits:
|
127 |
if split == 'train':
|
|
|
129 |
name=datasets.Split.TRAIN,
|
130 |
# These kwargs will be passed to _generate_examples
|
131 |
gen_kwargs={
|
132 |
+
"json_path": os.path.join(archive_path["annotations_trainval"], "annotations", "captions_train2017.json"),
|
133 |
+
"image_dir": os.path.join(archive_path["train"], "train2017"),
|
134 |
"split": "train",
|
135 |
}
|
136 |
)
|
|
|
139 |
name=datasets.Split.VALIDATION,
|
140 |
# These kwargs will be passed to _generate_examples
|
141 |
gen_kwargs={
|
142 |
+
"json_path": os.path.join(archive_path["annotations_trainval"], "annotations", "captions_val2017.json"),
|
143 |
+
"image_dir": os.path.join(archive_path["val"], "val2017"),
|
144 |
"split": "valid",
|
145 |
},
|
146 |
)
|
|
|
149 |
name=datasets.Split.TEST,
|
150 |
# These kwargs will be passed to _generate_examples
|
151 |
gen_kwargs={
|
152 |
+
"json_path": os.path.join(archive_path["image_info_test"], "annotations", "image_info_test2017.json"),
|
153 |
+
"image_dir": os.path.join(archive_path["test"], "test2017"),
|
154 |
"split": "test",
|
155 |
},
|
156 |
)
|