import numpy as np import h5py import csv import time import logging from utilities import int16_to_float32 def read_black_list(black_list_csv): """Read audio names from black list. """ with open(black_list_csv, 'r') as fr: reader = csv.reader(fr) lines = list(reader) black_list_names = ['Y{}.wav'.format(line[0]) for line in lines] return black_list_names class AudioSetDataset(object): def __init__(self, sample_rate=32000): """This class takes the meta of an audio clip as input, and return the waveform and target of the audio clip. This class is used by DataLoader. """ self.sample_rate = sample_rate def __getitem__(self, meta): """Load waveform and target of an audio clip. Args: meta: { 'hdf5_path': str, 'index_in_hdf5': int} Returns: data_dict: { 'audio_name': str, 'waveform': (clip_samples,), 'target': (classes_num,)} """ hdf5_path = meta['hdf5_path'] index_in_hdf5 = meta['index_in_hdf5'] with h5py.File(hdf5_path, 'r') as hf: audio_name = hf['audio_name'][index_in_hdf5].decode() waveform = int16_to_float32(hf['waveform'][index_in_hdf5]) waveform = self.resample(waveform) target = hf['target'][index_in_hdf5].astype(np.float32) data_dict = { 'audio_name': audio_name, 'waveform': waveform, 'target': target} return data_dict def resample(self, waveform): """Resample. Args: waveform: (clip_samples,) Returns: (resampled_clip_samples,) """ if self.sample_rate == 32000: return waveform elif self.sample_rate == 16000: return waveform[0 :: 2] elif self.sample_rate == 8000: return waveform[0 :: 4] else: raise Exception('Incorrect sample rate!') class Base(object): def __init__(self, indexes_hdf5_path, batch_size, black_list_csv, random_seed): """Base class of train sampler. Args: indexes_hdf5_path: string batch_size: int black_list_csv: string random_seed: int """ self.batch_size = batch_size self.random_state = np.random.RandomState(random_seed) # Black list if black_list_csv: self.black_list_names = read_black_list(black_list_csv) else: self.black_list_names = [] logging.info('Black list samples: {}'.format(len(self.black_list_names))) # Load target load_time = time.time() with h5py.File(indexes_hdf5_path, 'r') as hf: self.audio_names = [audio_name.decode() for audio_name in hf['audio_name'][:]] self.hdf5_paths = [hdf5_path.decode() for hdf5_path in hf['hdf5_path'][:]] self.indexes_in_hdf5 = hf['index_in_hdf5'][:] self.targets = hf['target'][:].astype(np.float32) (self.audios_num, self.classes_num) = self.targets.shape logging.info('Training number: {}'.format(self.audios_num)) logging.info('Load target time: {:.3f} s'.format(time.time() - load_time)) class TrainSampler(Base): def __init__(self, indexes_hdf5_path, batch_size, black_list_csv=None, random_seed=1234): """Balanced sampler. Generate batch meta for training. Args: indexes_hdf5_path: string batch_size: int black_list_csv: string random_seed: int """ super(TrainSampler, self).__init__(indexes_hdf5_path, batch_size, black_list_csv, random_seed) self.indexes = np.arange(self.audios_num) # Shuffle indexes self.random_state.shuffle(self.indexes) self.pointer = 0 def __iter__(self): """Generate batch meta for training. Returns: batch_meta: e.g.: [ {'hdf5_path': string, 'index_in_hdf5': int}, ...] """ batch_size = self.batch_size while True: batch_meta = [] i = 0 while i < batch_size: index = self.indexes[self.pointer] self.pointer += 1 # Shuffle indexes and reset pointer if self.pointer >= self.audios_num: self.pointer = 0 self.random_state.shuffle(self.indexes) # If audio in black list then continue if self.audio_names[index] in self.black_list_names: continue else: batch_meta.append({ 'hdf5_path': self.hdf5_paths[index], 'index_in_hdf5': self.indexes_in_hdf5[index]}) i += 1 yield batch_meta def state_dict(self): state = { 'indexes': self.indexes, 'pointer': self.pointer} return state def load_state_dict(self, state): self.indexes = state['indexes'] self.pointer = state['pointer'] class BalancedTrainSampler(Base): def __init__(self, indexes_hdf5_path, batch_size, black_list_csv=None, random_seed=1234): """Balanced sampler. Generate batch meta for training. Data are equally sampled from different sound classes. Args: indexes_hdf5_path: string batch_size: int black_list_csv: string random_seed: int """ super(BalancedTrainSampler, self).__init__(indexes_hdf5_path, batch_size, black_list_csv, random_seed) self.samples_num_per_class = np.sum(self.targets, axis=0) logging.info('samples_num_per_class: {}'.format( self.samples_num_per_class.astype(np.int32))) # Training indexes of all sound classes. E.g.: # [[0, 11, 12, ...], [3, 4, 15, 16, ...], [7, 8, ...], ...] self.indexes_per_class = [] for k in range(self.classes_num): self.indexes_per_class.append( np.where(self.targets[:, k] == 1)[0]) # Shuffle indexes for k in range(self.classes_num): self.random_state.shuffle(self.indexes_per_class[k]) self.queue = [] self.pointers_of_classes = [0] * self.classes_num def expand_queue(self, queue): classes_set = np.arange(self.classes_num).tolist() self.random_state.shuffle(classes_set) queue += classes_set return queue def __iter__(self): """Generate batch meta for training. Returns: batch_meta: e.g.: [ {'hdf5_path': string, 'index_in_hdf5': int}, ...] """ batch_size = self.batch_size while True: batch_meta = [] i = 0 while i < batch_size: if len(self.queue) == 0: self.queue = self.expand_queue(self.queue) class_id = self.queue.pop(0) pointer = self.pointers_of_classes[class_id] self.pointers_of_classes[class_id] += 1 index = self.indexes_per_class[class_id][pointer] # When finish one epoch of a sound class, then shuffle its indexes and reset pointer if self.pointers_of_classes[class_id] >= self.samples_num_per_class[class_id]: self.pointers_of_classes[class_id] = 0 self.random_state.shuffle(self.indexes_per_class[class_id]) # If audio in black list then continue if self.audio_names[index] in self.black_list_names: continue else: batch_meta.append({ 'hdf5_path': self.hdf5_paths[index], 'index_in_hdf5': self.indexes_in_hdf5[index]}) i += 1 yield batch_meta def state_dict(self): state = { 'indexes_per_class': self.indexes_per_class, 'queue': self.queue, 'pointers_of_classes': self.pointers_of_classes} return state def load_state_dict(self, state): self.indexes_per_class = state['indexes_per_class'] self.queue = state['queue'] self.pointers_of_classes = state['pointers_of_classes'] class AlternateTrainSampler(Base): def __init__(self, indexes_hdf5_path, batch_size, black_list_csv=None, random_seed=1234): """AlternateSampler is a combination of Sampler and Balanced Sampler. AlternateSampler alternately sample data from Sampler and Blanced Sampler. Args: indexes_hdf5_path: string batch_size: int black_list_csv: string random_seed: int """ self.sampler1 = TrainSampler(indexes_hdf5_path, batch_size, black_list_csv, random_seed) self.sampler2 = BalancedTrainSampler(indexes_hdf5_path, batch_size, black_list_csv, random_seed) self.batch_size = batch_size self.count = 0 def __iter__(self): """Generate batch meta for training. Returns: batch_meta: e.g.: [ {'hdf5_path': string, 'index_in_hdf5': int}, ...] """ batch_size = self.batch_size while True: self.count += 1 if self.count % 2 == 0: batch_meta = [] i = 0 while i < batch_size: index = self.sampler1.indexes[self.sampler1.pointer] self.sampler1.pointer += 1 # Shuffle indexes and reset pointer if self.sampler1.pointer >= self.sampler1.audios_num: self.sampler1.pointer = 0 self.sampler1.random_state.shuffle(self.sampler1.indexes) # If audio in black list then continue if self.sampler1.audio_names[index] in self.sampler1.black_list_names: continue else: batch_meta.append({ 'hdf5_path': self.sampler1.hdf5_paths[index], 'index_in_hdf5': self.sampler1.indexes_in_hdf5[index]}) i += 1 elif self.count % 2 == 1: batch_meta = [] i = 0 while i < batch_size: if len(self.sampler2.queue) == 0: self.sampler2.queue = self.sampler2.expand_queue(self.sampler2.queue) class_id = self.sampler2.queue.pop(0) pointer = self.sampler2.pointers_of_classes[class_id] self.sampler2.pointers_of_classes[class_id] += 1 index = self.sampler2.indexes_per_class[class_id][pointer] # When finish one epoch of a sound class, then shuffle its indexes and reset pointer if self.sampler2.pointers_of_classes[class_id] >= self.sampler2.samples_num_per_class[class_id]: self.sampler2.pointers_of_classes[class_id] = 0 self.sampler2.random_state.shuffle(self.sampler2.indexes_per_class[class_id]) # If audio in black list then continue if self.sampler2.audio_names[index] in self.sampler2.black_list_names: continue else: batch_meta.append({ 'hdf5_path': self.sampler2.hdf5_paths[index], 'index_in_hdf5': self.sampler2.indexes_in_hdf5[index]}) i += 1 yield batch_meta def state_dict(self): state = { 'sampler1': self.sampler1.state_dict(), 'sampler2': self.sampler2.state_dict()} return state def load_state_dict(self, state): self.sampler1.load_state_dict(state['sampler1']) self.sampler2.load_state_dict(state['sampler2']) class EvaluateSampler(object): def __init__(self, indexes_hdf5_path, batch_size): """Evaluate sampler. Generate batch meta for evaluation. Args: indexes_hdf5_path: string batch_size: int """ self.batch_size = batch_size with h5py.File(indexes_hdf5_path, 'r') as hf: self.audio_names = [audio_name.decode() for audio_name in hf['audio_name'][:]] self.hdf5_paths = [hdf5_path.decode() for hdf5_path in hf['hdf5_path'][:]] self.indexes_in_hdf5 = hf['index_in_hdf5'][:] self.targets = hf['target'][:].astype(np.float32) self.audios_num = len(self.audio_names) def __iter__(self): """Generate batch meta for training. Returns: batch_meta: e.g.: [ {'hdf5_path': string, 'index_in_hdf5': int} ...] """ batch_size = self.batch_size pointer = 0 while pointer < self.audios_num: batch_indexes = np.arange(pointer, min(pointer + batch_size, self.audios_num)) batch_meta = [] for index in batch_indexes: batch_meta.append({ 'audio_name': self.audio_names[index], 'hdf5_path': self.hdf5_paths[index], 'index_in_hdf5': self.indexes_in_hdf5[index], 'target': self.targets[index]}) pointer += batch_size yield batch_meta def collate_fn(list_data_dict): """Collate data. Args: list_data_dict, e.g., [{'audio_name': str, 'waveform': (clip_samples,), ...}, {'audio_name': str, 'waveform': (clip_samples,), ...}, ...] Returns: np_data_dict, dict, e.g., {'audio_name': (batch_size,), 'waveform': (batch_size, clip_samples), ...} """ np_data_dict = {} for key in list_data_dict[0].keys(): np_data_dict[key] = np.array([data_dict[key] for data_dict in list_data_dict]) return np_data_dict