Spaces:

OpenGVLab
/

InternGPT

Running

App Files Files Community

InternGPT / third-party /lama /models /ade20k /segm_lib /utils /data /dataset.py

zyliu

release iChatApp

0f90f73 about 1 year ago

raw

history blame

No virus

3.47 kB

	import bisect
	import warnings

	from torch._utils import _accumulate
	from torch import randperm


	class Dataset(object):
	"""An abstract class representing a Dataset.

	All other datasets should subclass it. All subclasses should override
	``__len__``, that provides the size of the dataset, and ``__getitem__``,
	supporting integer indexing in range from 0 to len(self) exclusive.
	"""

	def __getitem__(self, index):
	raise NotImplementedError

	def __len__(self):
	raise NotImplementedError

	def __add__(self, other):
	return ConcatDataset([self, other])


	class TensorDataset(Dataset):
	"""Dataset wrapping data and target tensors.

	Each sample will be retrieved by indexing both tensors along the first
	dimension.

	Arguments:
	data_tensor (Tensor): contains sample data.
	target_tensor (Tensor): contains sample targets (labels).
	"""

	def __init__(self, data_tensor, target_tensor):
	assert data_tensor.size(0) == target_tensor.size(0)
	self.data_tensor = data_tensor
	self.target_tensor = target_tensor

	def __getitem__(self, index):
	return self.data_tensor[index], self.target_tensor[index]

	def __len__(self):
	return self.data_tensor.size(0)


	class ConcatDataset(Dataset):
	"""
	Dataset to concatenate multiple datasets.
	Purpose: useful to assemble different existing datasets, possibly
	large-scale datasets as the concatenation operation is done in an
	on-the-fly manner.

	Arguments:
	datasets (iterable): List of datasets to be concatenated
	"""

	@staticmethod
	def cumsum(sequence):
	r, s = [], 0
	for e in sequence:
	l = len(e)
	r.append(l + s)
	s += l
	return r

	def __init__(self, datasets):
	super(ConcatDataset, self).__init__()
	assert len(datasets) > 0, 'datasets should not be an empty iterable'
	self.datasets = list(datasets)
	self.cumulative_sizes = self.cumsum(self.datasets)

	def __len__(self):
	return self.cumulative_sizes[-1]

	def __getitem__(self, idx):
	dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
	if dataset_idx == 0:
	sample_idx = idx
	else:
	sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
	return self.datasets[dataset_idx][sample_idx]

	@property
	def cummulative_sizes(self):
	warnings.warn("cummulative_sizes attribute is renamed to "
	"cumulative_sizes", DeprecationWarning, stacklevel=2)
	return self.cumulative_sizes


	class Subset(Dataset):
	def __init__(self, dataset, indices):
	self.dataset = dataset
	self.indices = indices

	def __getitem__(self, idx):
	return self.dataset[self.indices[idx]]

	def __len__(self):
	return len(self.indices)


	def random_split(dataset, lengths):
	"""
	Randomly split a dataset into non-overlapping new datasets of given lengths
	ds

	Arguments:
	dataset (Dataset): Dataset to be split
	lengths (iterable): lengths of splits to be produced
	"""
	if sum(lengths) != len(dataset):
	raise ValueError("Sum of input lengths does not equal the length of the input dataset!")

	indices = randperm(sum(lengths))
	return [Subset(dataset, indices[offset - length:offset]) for offset, length in zip(_accumulate(lengths), lengths)]