import torch from torch.utils.data import DataLoader import numpy as np import os class CustomDataset(torch.utils.data.Dataset): def __init__(self, sample_dir, embed_dim = 384, train = False, time = 2): self.sample_dir = sample_dir self.n_segments = len(os.listdir(self.sample_dir)) self.data = np.zeros((self.n_segments, embed_dim)) # Sorted segment based on start_time self.sorted_segments = sorted(os.listdir(sample_dir), key= lambda x: float(x.split("_")[-2])) # Assign segments for idx, segment_npy in enumerate(self.sorted_segments): segment_path = self.sample_dir + "/" + segment_npy segment_embed = np.load(segment_path) self.data[idx] = segment_embed if train: for time in range(time): self.data = np.concatenate((self.data, self.data), axis = 0) def __len__(self): return len(self.data) def __getitem__(self, idx): sample = torch.from_numpy(self.data[idx]).float() return sample class AutoEncoderDataset(torch.utils.data.Dataset): """ Create dataset from predefined tensor for each autoencoder in MOE """ def __init__(self, data): self.data = data def __len__(self): return len(self.data) def __getitem__(self, idx): return self.data[idx] import argparse parser = argparse.ArgumentParser(description='Deep Clustering Network') # Dataset parameters parser.add_argument('--dir', default='./datasets/spanish/', help='dataset directory') parser.add_argument('--input_dim', type=int, default=384, help='input dimension') parser.add_argument('--n-classes', type=int, default=2, help='output dimension') # Training parameters parser.add_argument('--lr', type=float, default=1e-3, help='learning rate (default: 1e-4)') parser.add_argument('--wd', type=float, default=1e-4, help='weight decay (default: 5e-4)') parser.add_argument('--batch-size', type=int, default=16, help='input batch size for training') parser.add_argument('--epoch', type=int, default=50, help='number of epochs to train') parser.add_argument('--pre-epoch', type=int, default=100, help='number of pre-train epochs') parser.add_argument('--pretrain', type=bool, default=True, help='whether use pre-training') # Model parameters parser.add_argument('--lamda', type=float, default=1, help='coefficient of the reconstruction loss') parser.add_argument('--beta', type=float, default=1, help=('coefficient of the regularization term on ' 'clustering')) parser.add_argument('--hidden-dims', default=[256, 128, 64, 32, 16], help='learning rate (default: 1e-4)') parser.add_argument('--latent_dim', type=int, default=2, help='latent space dimension') parser.add_argument('--n-clusters', type=int, default=2, help='number of clusters in the latent space') parser.add_argument('--n_1Dconv', type=int, default=4, help='n_1dconv') parser.add_argument('--kernel_size', default=[7, 5, 3, 3], help='kernel_size') parser.add_argument('--stride', type = int, default=1, help='stride') parser.add_argument('--num_blocks', type = int, default=4, help='num_blocks') parser.add_argument('--channels', type = int, default=[128, 64, 32, 16], help='channels') # Utility parameters parser.add_argument('--n-jobs', type=int, default=1, help='number of jobs to run in parallel') parser.add_argument('--log-interval', type=int, default=20, help=('how many batches to wait before logging the ' 'training status')) parser.add_argument("--window_length", type = float, default= 0.4, help="window length") parser.add_argument("--overlap", type = float, default= 0, help="overlap") args = parser.parse_args() if __name__ == "__main__": # Example usage: sample_dir = "datasets/spanish/segments/0096_[cut_193sec].wav" dataset = CustomDataset(sample_dir=sample_dir,train= False) # dataset = CustomDataset(sample_dir=sample_dir,train= False)