Spaces:

KyanChen
/

TTP

Runtime error

App Files Files Community

TTP / mmpretrain /datasets /caltech101.py

KyanChen

Upload 1861 files

3b96cb1 about 1 year ago

raw

history blame contribute delete

3.77 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	from typing import List

	from mmengine import get_file_backend, list_from_file

	from mmpretrain.registry import DATASETS
	from .base_dataset import BaseDataset
	from .categories import CALTECH101_CATEGORIES


	@DATASETS.register_module()
	class Caltech101(BaseDataset):
	"""The Caltech101 Dataset.

	Support the `Caltech101 <https://data.caltech.edu/records/mzrjq-6wc02>`_ Dataset.
	After downloading and decompression, the dataset directory structure is as follows.

	Caltech101 dataset directory: ::

	caltech-101
	├── 101_ObjectCategories
	│ ├── class_x
	│ │ ├── xx1.jpg
	│ │ ├── xx2.jpg
	│ │ └── ...
	│ ├── class_y
	│ │ ├── yy1.jpg
	│ │ ├── yy2.jpg
	│ │ └── ...
	│ └── ...
	├── Annotations
	│ ├── class_x
	│ │ ├── xx1.mat
	│ │ └── ...
	│ └── ...
	├── meta
	│ ├── train.txt
	│ └── test.txt
	└── ....

	Please note that since there is no official splitting for training and
	test set, you can use the train.txt and text.txt provided by us or
	create your own annotation files. Here is the download
	`link <https://download.openmmlab.com/mmpretrain/datasets/caltech_meta.zip>`_
	for the annotations.

	Args:
	data_root (str): The root directory for the Caltech101 dataset.
	split (str, optional): The dataset split, supports "train" and "test".
	Default to "train".

	Examples:
	>>> from mmpretrain.datasets import Caltech101
	>>> train_dataset = Caltech101(data_root='data/caltech-101', split='train')
	>>> train_dataset
	Dataset Caltech101
	Number of samples: 3060
	Number of categories: 102
	Root of dataset: data/caltech-101
	>>> test_dataset = Caltech101(data_root='data/caltech-101', split='test')
	>>> test_dataset
	Dataset Caltech101
	Number of samples: 6728
	Number of categories: 102
	Root of dataset: data/caltech-101
	""" # noqa: E501

	METAINFO = {'classes': CALTECH101_CATEGORIES}

	def __init__(self, data_root: str, split: str = 'train', **kwargs):

	splits = ['train', 'test']
	assert split in splits, \
	f"The split must be one of {splits}, but get '{split}'"
	self.split = split

	self.backend = get_file_backend(data_root, enable_singleton=True)

	if split == 'train':
	ann_file = self.backend.join_path('meta', 'train.txt')
	else:
	ann_file = self.backend.join_path('meta', 'test.txt')

	data_prefix = '101_ObjectCategories'
	test_mode = split == 'test'

	super(Caltech101, self).__init__(
	ann_file=ann_file,
	data_root=data_root,
	data_prefix=data_prefix,
	test_mode=test_mode,
	**kwargs)

	def load_data_list(self):
	"""Load images and ground truth labels."""

	pairs = list_from_file(self.ann_file)
	data_list = []

	for pair in pairs:
	path, gt_label = pair.split()
	img_path = self.backend.join_path(self.img_prefix, path)
	info = dict(img_path=img_path, gt_label=int(gt_label))
	data_list.append(info)

	return data_list

	def extra_repr(self) -> List[str]:
	"""The extra repr information of the dataset."""
	body = [
	f'Root of dataset: \t{self.data_root}',
	]
	return body