Spaces:
Runtime error
Runtime error
# Copyright (c) OpenMMLab. All rights reserved. | |
from typing import List, Optional, Union | |
from mmengine import fileio | |
from mmengine.logging import MMLogger | |
from mmpretrain.registry import DATASETS | |
from .categories import IMAGENET_CATEGORIES | |
from .custom import CustomDataset | |
class ImageNet(CustomDataset): | |
"""`ImageNet <http://www.image-net.org>`_ Dataset. | |
The dataset supports two kinds of directory format, | |
:: | |
imagenet | |
βββ train | |
β βββclass_x | |
| | βββ x1.jpg | |
| | βββ x2.jpg | |
| | βββ ... | |
β βββ class_y | |
| | βββ y1.jpg | |
| | βββ y2.jpg | |
| | βββ ... | |
| βββ ... | |
βββ val | |
β βββclass_x | |
| | βββ ... | |
β βββ class_y | |
| | βββ ... | |
| βββ ... | |
βββ test | |
βββ test1.jpg | |
βββ test2.jpg | |
βββ ... | |
or :: | |
imagenet | |
βββ train | |
β βββ x1.jpg | |
β βββ y1.jpg | |
β βββ ... | |
βββ val | |
β βββ x3.jpg | |
β βββ y3.jpg | |
β βββ ... | |
βββ test | |
β βββ test1.jpg | |
β βββ test2.jpg | |
β βββ ... | |
βββ meta | |
βββ train.txt | |
βββ val.txt | |
Args: | |
data_root (str): The root directory for ``data_prefix`` and | |
``ann_file``. Defaults to ''. | |
split (str): The dataset split, supports "train", "val" and "test". | |
Default to ''. | |
data_prefix (str | dict): Prefix for training data. Defaults to ''. | |
ann_file (str): Annotation file path. Defaults to ''. | |
metainfo (dict, optional): Meta information for dataset, such as class | |
information. Defaults to None. | |
**kwargs: Other keyword arguments in :class:`CustomDataset` and | |
:class:`BaseDataset`. | |
Examples: | |
>>> from mmpretrain.datasets import ImageNet | |
>>> train_dataset = ImageNet(data_root='data/imagenet', split='train') | |
>>> train_dataset | |
Dataset ImageNet | |
Number of samples: 1281167 | |
Number of categories: 1000 | |
Root of dataset: data/imagenet | |
>>> test_dataset = ImageNet(data_root='data/imagenet', split='val') | |
>>> test_dataset | |
Dataset ImageNet | |
Number of samples: 50000 | |
Number of categories: 1000 | |
Root of dataset: data/imagenet | |
""" # noqa: E501 | |
IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif') | |
METAINFO = {'classes': IMAGENET_CATEGORIES} | |
def __init__(self, | |
data_root: str = '', | |
split: str = '', | |
data_prefix: Union[str, dict] = '', | |
ann_file: str = '', | |
metainfo: Optional[dict] = None, | |
**kwargs): | |
kwargs = {'extensions': self.IMG_EXTENSIONS, **kwargs} | |
if split: | |
splits = ['train', 'val', 'test'] | |
assert split in splits, \ | |
f"The split must be one of {splits}, but get '{split}'" | |
if split == 'test': | |
logger = MMLogger.get_current_instance() | |
logger.info( | |
'Since the ImageNet1k test set does not provide label' | |
'annotations, `with_label` is set to False') | |
kwargs['with_label'] = False | |
data_prefix = split if data_prefix == '' else data_prefix | |
if ann_file == '': | |
_ann_path = fileio.join_path(data_root, 'meta', f'{split}.txt') | |
if fileio.exists(_ann_path): | |
ann_file = fileio.join_path('meta', f'{split}.txt') | |
super().__init__( | |
data_root=data_root, | |
data_prefix=data_prefix, | |
ann_file=ann_file, | |
metainfo=metainfo, | |
**kwargs) | |
def extra_repr(self) -> List[str]: | |
"""The extra repr information of the dataset.""" | |
body = [ | |
f'Root of dataset: \t{self.data_root}', | |
] | |
return body | |
class ImageNet21k(CustomDataset): | |
"""ImageNet21k Dataset. | |
Since the dataset ImageNet21k is extremely big, contains 21k+ classes | |
and 1.4B files. We won't provide the default categories list. Please | |
specify it from the ``classes`` argument. | |
The dataset directory structure is as follows, | |
ImageNet21k dataset directory :: | |
imagenet21k | |
βββ train | |
β βββclass_x | |
| | βββ x1.jpg | |
| | βββ x2.jpg | |
| | βββ ... | |
β βββ class_y | |
| | βββ y1.jpg | |
| | βββ y2.jpg | |
| | βββ ... | |
| βββ ... | |
βββ meta | |
βββ train.txt | |
Args: | |
data_root (str): The root directory for ``data_prefix`` and | |
``ann_file``. Defaults to ''. | |
data_prefix (str | dict): Prefix for training data. Defaults to ''. | |
ann_file (str): Annotation file path. Defaults to ''. | |
metainfo (dict, optional): Meta information for dataset, such as class | |
information. Defaults to None. | |
multi_label (bool): Not implement by now. Use multi label or not. | |
Defaults to False. | |
**kwargs: Other keyword arguments in :class:`CustomDataset` and | |
:class:`BaseDataset`. | |
Examples: | |
>>> from mmpretrain.datasets import ImageNet21k | |
>>> train_dataset = ImageNet21k(data_root='data/imagenet21k', split='train') | |
>>> train_dataset | |
Dataset ImageNet21k | |
Number of samples: 14197088 | |
Annotation file: data/imagenet21k/meta/train.txt | |
Prefix of images: data/imagenet21k/train | |
""" # noqa: E501 | |
IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif') | |
def __init__(self, | |
data_root: str = '', | |
split: str = '', | |
data_prefix: Union[str, dict] = '', | |
ann_file: str = '', | |
metainfo: Optional[dict] = None, | |
multi_label: bool = False, | |
**kwargs): | |
if multi_label: | |
raise NotImplementedError( | |
'The `multi_label` option is not supported by now.') | |
self.multi_label = multi_label | |
if split: | |
splits = ['train'] | |
assert split in splits, \ | |
f"The split must be one of {splits}, but get '{split}'.\ | |
If you want to specify your own validation set or test set,\ | |
please set split to None." | |
self.split = split | |
data_prefix = split if data_prefix == '' else data_prefix | |
if not ann_file: | |
_ann_path = fileio.join_path(data_root, 'meta', f'{split}.txt') | |
if fileio.exists(_ann_path): | |
ann_file = fileio.join_path('meta', f'{split}.txt') | |
logger = MMLogger.get_current_instance() | |
if not ann_file: | |
logger.warning( | |
'The ImageNet21k dataset is large, and scanning directory may ' | |
'consume long time. Considering to specify the `ann_file` to ' | |
'accelerate the initialization.') | |
kwargs = {'extensions': self.IMG_EXTENSIONS, **kwargs} | |
super().__init__( | |
data_root=data_root, | |
data_prefix=data_prefix, | |
ann_file=ann_file, | |
metainfo=metainfo, | |
**kwargs) | |
if self.CLASSES is None: | |
logger.warning( | |
'The CLASSES is not stored in the `ImageNet21k` class. ' | |
'Considering to specify the `classes` argument if you need ' | |
'do inference on the ImageNet-21k dataset') | |