Spaces:
Runtime error
Runtime error
| # Copyright (c) OpenMMLab. All rights reserved. | |
| from typing import List, Optional, Union | |
| from mmengine import fileio | |
| from mmengine.logging import MMLogger | |
| from mmpretrain.registry import DATASETS | |
| from .categories import IMAGENET_CATEGORIES | |
| from .custom import CustomDataset | |
| class ImageNet(CustomDataset): | |
| """`ImageNet <http://www.image-net.org>`_ Dataset. | |
| The dataset supports two kinds of directory format, | |
| :: | |
| imagenet | |
| βββ train | |
| β βββclass_x | |
| | | βββ x1.jpg | |
| | | βββ x2.jpg | |
| | | βββ ... | |
| β βββ class_y | |
| | | βββ y1.jpg | |
| | | βββ y2.jpg | |
| | | βββ ... | |
| | βββ ... | |
| βββ val | |
| β βββclass_x | |
| | | βββ ... | |
| β βββ class_y | |
| | | βββ ... | |
| | βββ ... | |
| βββ test | |
| βββ test1.jpg | |
| βββ test2.jpg | |
| βββ ... | |
| or :: | |
| imagenet | |
| βββ train | |
| β βββ x1.jpg | |
| β βββ y1.jpg | |
| β βββ ... | |
| βββ val | |
| β βββ x3.jpg | |
| β βββ y3.jpg | |
| β βββ ... | |
| βββ test | |
| β βββ test1.jpg | |
| β βββ test2.jpg | |
| β βββ ... | |
| βββ meta | |
| βββ train.txt | |
| βββ val.txt | |
| Args: | |
| data_root (str): The root directory for ``data_prefix`` and | |
| ``ann_file``. Defaults to ''. | |
| split (str): The dataset split, supports "train", "val" and "test". | |
| Default to ''. | |
| data_prefix (str | dict): Prefix for training data. Defaults to ''. | |
| ann_file (str): Annotation file path. Defaults to ''. | |
| metainfo (dict, optional): Meta information for dataset, such as class | |
| information. Defaults to None. | |
| **kwargs: Other keyword arguments in :class:`CustomDataset` and | |
| :class:`BaseDataset`. | |
| Examples: | |
| >>> from mmpretrain.datasets import ImageNet | |
| >>> train_dataset = ImageNet(data_root='data/imagenet', split='train') | |
| >>> train_dataset | |
| Dataset ImageNet | |
| Number of samples: 1281167 | |
| Number of categories: 1000 | |
| Root of dataset: data/imagenet | |
| >>> test_dataset = ImageNet(data_root='data/imagenet', split='val') | |
| >>> test_dataset | |
| Dataset ImageNet | |
| Number of samples: 50000 | |
| Number of categories: 1000 | |
| Root of dataset: data/imagenet | |
| """ # noqa: E501 | |
| IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif') | |
| METAINFO = {'classes': IMAGENET_CATEGORIES} | |
| def __init__(self, | |
| data_root: str = '', | |
| split: str = '', | |
| data_prefix: Union[str, dict] = '', | |
| ann_file: str = '', | |
| metainfo: Optional[dict] = None, | |
| **kwargs): | |
| kwargs = {'extensions': self.IMG_EXTENSIONS, **kwargs} | |
| if split: | |
| splits = ['train', 'val', 'test'] | |
| assert split in splits, \ | |
| f"The split must be one of {splits}, but get '{split}'" | |
| if split == 'test': | |
| logger = MMLogger.get_current_instance() | |
| logger.info( | |
| 'Since the ImageNet1k test set does not provide label' | |
| 'annotations, `with_label` is set to False') | |
| kwargs['with_label'] = False | |
| data_prefix = split if data_prefix == '' else data_prefix | |
| if ann_file == '': | |
| _ann_path = fileio.join_path(data_root, 'meta', f'{split}.txt') | |
| if fileio.exists(_ann_path): | |
| ann_file = fileio.join_path('meta', f'{split}.txt') | |
| super().__init__( | |
| data_root=data_root, | |
| data_prefix=data_prefix, | |
| ann_file=ann_file, | |
| metainfo=metainfo, | |
| **kwargs) | |
| def extra_repr(self) -> List[str]: | |
| """The extra repr information of the dataset.""" | |
| body = [ | |
| f'Root of dataset: \t{self.data_root}', | |
| ] | |
| return body | |
| class ImageNet21k(CustomDataset): | |
| """ImageNet21k Dataset. | |
| Since the dataset ImageNet21k is extremely big, contains 21k+ classes | |
| and 1.4B files. We won't provide the default categories list. Please | |
| specify it from the ``classes`` argument. | |
| The dataset directory structure is as follows, | |
| ImageNet21k dataset directory :: | |
| imagenet21k | |
| βββ train | |
| β βββclass_x | |
| | | βββ x1.jpg | |
| | | βββ x2.jpg | |
| | | βββ ... | |
| β βββ class_y | |
| | | βββ y1.jpg | |
| | | βββ y2.jpg | |
| | | βββ ... | |
| | βββ ... | |
| βββ meta | |
| βββ train.txt | |
| Args: | |
| data_root (str): The root directory for ``data_prefix`` and | |
| ``ann_file``. Defaults to ''. | |
| data_prefix (str | dict): Prefix for training data. Defaults to ''. | |
| ann_file (str): Annotation file path. Defaults to ''. | |
| metainfo (dict, optional): Meta information for dataset, such as class | |
| information. Defaults to None. | |
| multi_label (bool): Not implement by now. Use multi label or not. | |
| Defaults to False. | |
| **kwargs: Other keyword arguments in :class:`CustomDataset` and | |
| :class:`BaseDataset`. | |
| Examples: | |
| >>> from mmpretrain.datasets import ImageNet21k | |
| >>> train_dataset = ImageNet21k(data_root='data/imagenet21k', split='train') | |
| >>> train_dataset | |
| Dataset ImageNet21k | |
| Number of samples: 14197088 | |
| Annotation file: data/imagenet21k/meta/train.txt | |
| Prefix of images: data/imagenet21k/train | |
| """ # noqa: E501 | |
| IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif') | |
| def __init__(self, | |
| data_root: str = '', | |
| split: str = '', | |
| data_prefix: Union[str, dict] = '', | |
| ann_file: str = '', | |
| metainfo: Optional[dict] = None, | |
| multi_label: bool = False, | |
| **kwargs): | |
| if multi_label: | |
| raise NotImplementedError( | |
| 'The `multi_label` option is not supported by now.') | |
| self.multi_label = multi_label | |
| if split: | |
| splits = ['train'] | |
| assert split in splits, \ | |
| f"The split must be one of {splits}, but get '{split}'.\ | |
| If you want to specify your own validation set or test set,\ | |
| please set split to None." | |
| self.split = split | |
| data_prefix = split if data_prefix == '' else data_prefix | |
| if not ann_file: | |
| _ann_path = fileio.join_path(data_root, 'meta', f'{split}.txt') | |
| if fileio.exists(_ann_path): | |
| ann_file = fileio.join_path('meta', f'{split}.txt') | |
| logger = MMLogger.get_current_instance() | |
| if not ann_file: | |
| logger.warning( | |
| 'The ImageNet21k dataset is large, and scanning directory may ' | |
| 'consume long time. Considering to specify the `ann_file` to ' | |
| 'accelerate the initialization.') | |
| kwargs = {'extensions': self.IMG_EXTENSIONS, **kwargs} | |
| super().__init__( | |
| data_root=data_root, | |
| data_prefix=data_prefix, | |
| ann_file=ann_file, | |
| metainfo=metainfo, | |
| **kwargs) | |
| if self.CLASSES is None: | |
| logger.warning( | |
| 'The CLASSES is not stored in the `ImageNet21k` class. ' | |
| 'Considering to specify the `classes` argument if you need ' | |
| 'do inference on the ImageNet-21k dataset') | |