Spaces:
Sleeping
Sleeping
""" | |
This file contains the definition of different heterogeneous datasets used for training | |
""" | |
import torch | |
import numpy as np | |
from .base_dataset import BaseDataset | |
class MixedDataset(torch.utils.data.Dataset): | |
def __init__(self, ds_list, mode, dataset_mix_pdf, **kwargs): | |
self.dataset_list = ds_list | |
print('Training Dataset list: ', self.dataset_list) | |
self.num_datasets = len(self.dataset_list) | |
self.datasets = [] | |
for ds in self.dataset_list: | |
if ds in ['rich', 'prox']: | |
self.datasets.append(BaseDataset(ds, mode, model_type='smplx', **kwargs)) | |
elif ds in ['damon']: | |
self.datasets.append(BaseDataset(ds, mode, model_type='smpl', **kwargs)) | |
else: | |
raise ValueError('Dataset not supported') | |
total_length = sum([len(ds) for ds in self.datasets]) | |
length_itw = sum([len(ds) for ds in self.datasets]) | |
self.length = max([len(ds) for ds in self.datasets]) | |
# convert list of strings to list of floats | |
self.partition = [float(i) for i in dataset_mix_pdf] # should sum to 1.0 unless you want to weight by dataset size | |
assert sum(self.partition) == 1.0, "Dataset Mix PDF must sum to 1.0 unless you want to weight by dataset size" | |
assert len(self.partition) == self.num_datasets, "Number of partitions must be equal to number of datasets" | |
self.partition = np.array(self.partition).cumsum() | |
def __getitem__(self, index): | |
p = np.random.rand() | |
for i in range(self.num_datasets): | |
if p <= self.partition[i]: | |
return self.datasets[i][index % len(self.datasets[i])] | |
def __len__(self): | |
return self.length | |