|
[[validate_dataset]] |
|
path = "dataloader.Dataset" |
|
|
|
[validate_dataset.args] |
|
librispeech_dir = "~/data/LibriSpeech/LibriSpeech" |
|
librispeech_metadata_fpath = "/home/xhao/proj/audiozen/recipes/librimix_sot/local/metadata/LibriSpeech/train-clean-100-24K.csv" |
|
duration = 6.0 |
|
sr = 24000 |
|
num_samples = 10 |
|
[validate_dataset.dataloader] |
|
batch_size = 1 |
|
num_workers = 1 |
|
|
|
[meta] |
|
save_dir = "exp" |
|
description = "Train a model using Generative Adversarial Networks (GANs)" |
|
seed = 20220815 |
|
exp_id = "swin_default_LR1e-2_AR-NAR" |
|
config_path = "/fred/oz325/xhao/proj/audiozen/recipes/librimix_sot/tokenizer_separation/conf/swin_default_LR1e-2_AR-NAR.toml" |
|
|
|
[trainer] |
|
path = "trainer.Trainer" |
|
|
|
[loss_function] |
|
path = "torch.nn.MSELoss" |
|
|
|
[optimizer] |
|
path = "torch.optim.AdamW" |
|
|
|
[model] |
|
path = "model_ar_nar.Model" |
|
|
|
[acoustics] |
|
n_fft = 512 |
|
hop_length = 128 |
|
win_length = 512 |
|
sr = 24000 |
|
|
|
[train_dataset] |
|
path = "dataloader.Dataset" |
|
|
|
[test_dataset] |
|
path = "dataloader.Dataset" |
|
|
|
[trainer.args] |
|
debug = false |
|
max_steps = 0 |
|
max_epochs = 1000 |
|
max_grad_norm = 1.0 |
|
save_max_score = true |
|
save_ckpt_interval = 5 |
|
max_patience = 200 |
|
plot_norm = true |
|
validation_interval = 200 |
|
max_num_checkpoints = 100 |
|
scheduler_name = "constant_schedule_with_warmup" |
|
warmup_steps = 1000 |
|
warmup_ratio = 0.0 |
|
gradient_accumulation_steps = 1 |
|
|
|
[loss_function.args] |
|
|
|
[optimizer.args] |
|
lr = 0.001 |
|
|
|
[model.args] |
|
|
|
[train_dataset.args] |
|
librispeech_dir = "~/data/LibriSpeech/LibriSpeech" |
|
librispeech_metadata_fpath = "/home/xhao/proj/audiozen/recipes/librimix_sot/local/metadata/LibriSpeech/train-clean-100-24K.csv" |
|
duration = 6.0 |
|
sr = 24000 |
|
|
|
[train_dataset.dataloader] |
|
batch_size = 20 |
|
num_workers = 10 |
|
drop_last = true |
|
pin_memory = true |
|
|
|
[test_dataset.args] |
|
librispeech_dir = "~/data/LibriSpeech/LibriSpeech" |
|
librispeech_metadata_fpath = "/home/xhao/proj/audiozen/recipes/librimix_sot/local/metadata/LibriSpeech/train-clean-100-24K.csv" |
|
duration = 6.0 |
|
sr = 24000 |
|
num_samples = 10 |
|
|
|
[test_dataset.dataloader] |
|
batch_size = 1 |
|
num_workers = 1 |
|
|