TTP / mmpretrain /datasets /caltech101.py
KyanChen's picture
Upload 1861 files
3b96cb1
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List
from mmengine import get_file_backend, list_from_file
from mmpretrain.registry import DATASETS
from .base_dataset import BaseDataset
from .categories import CALTECH101_CATEGORIES
@DATASETS.register_module()
class Caltech101(BaseDataset):
"""The Caltech101 Dataset.
Support the `Caltech101 <https://data.caltech.edu/records/mzrjq-6wc02>`_ Dataset.
After downloading and decompression, the dataset directory structure is as follows.
Caltech101 dataset directory: ::
caltech-101
β”œβ”€β”€ 101_ObjectCategories
β”‚ β”œβ”€β”€ class_x
β”‚ β”‚ β”œβ”€β”€ xx1.jpg
β”‚ β”‚ β”œβ”€β”€ xx2.jpg
β”‚ β”‚ └── ...
β”‚ β”œβ”€β”€ class_y
β”‚ β”‚ β”œβ”€β”€ yy1.jpg
β”‚ β”‚ β”œβ”€β”€ yy2.jpg
β”‚ β”‚ └── ...
β”‚ └── ...
β”œβ”€β”€ Annotations
β”‚ β”œβ”€β”€ class_x
β”‚ β”‚ β”œβ”€β”€ xx1.mat
β”‚ β”‚ └── ...
β”‚ └── ...
β”œβ”€β”€ meta
β”‚ β”œβ”€β”€ train.txt
β”‚ └── test.txt
└── ....
Please note that since there is no official splitting for training and
test set, you can use the train.txt and text.txt provided by us or
create your own annotation files. Here is the download
`link <https://download.openmmlab.com/mmpretrain/datasets/caltech_meta.zip>`_
for the annotations.
Args:
data_root (str): The root directory for the Caltech101 dataset.
split (str, optional): The dataset split, supports "train" and "test".
Default to "train".
Examples:
>>> from mmpretrain.datasets import Caltech101
>>> train_dataset = Caltech101(data_root='data/caltech-101', split='train')
>>> train_dataset
Dataset Caltech101
Number of samples: 3060
Number of categories: 102
Root of dataset: data/caltech-101
>>> test_dataset = Caltech101(data_root='data/caltech-101', split='test')
>>> test_dataset
Dataset Caltech101
Number of samples: 6728
Number of categories: 102
Root of dataset: data/caltech-101
""" # noqa: E501
METAINFO = {'classes': CALTECH101_CATEGORIES}
def __init__(self, data_root: str, split: str = 'train', **kwargs):
splits = ['train', 'test']
assert split in splits, \
f"The split must be one of {splits}, but get '{split}'"
self.split = split
self.backend = get_file_backend(data_root, enable_singleton=True)
if split == 'train':
ann_file = self.backend.join_path('meta', 'train.txt')
else:
ann_file = self.backend.join_path('meta', 'test.txt')
data_prefix = '101_ObjectCategories'
test_mode = split == 'test'
super(Caltech101, self).__init__(
ann_file=ann_file,
data_root=data_root,
data_prefix=data_prefix,
test_mode=test_mode,
**kwargs)
def load_data_list(self):
"""Load images and ground truth labels."""
pairs = list_from_file(self.ann_file)
data_list = []
for pair in pairs:
path, gt_label = pair.split()
img_path = self.backend.join_path(self.img_prefix, path)
info = dict(img_path=img_path, gt_label=int(gt_label))
data_list.append(info)
return data_list
def extra_repr(self) -> List[str]:
"""The extra repr information of the dataset."""
body = [
f'Root of dataset: \t{self.data_root}',
]
return body