{ "input_wavs_dir": "/private/home/adampolyak/datasets/LJ/LJSpeech-1.1/wavs_16khz_padded", "input_training_file": "/large_experiments/ust/annl/datasets/tts/TAT/filelist/Zh_Hokkien_pretrain_it3_alpha0.05_400k_it2_400k_it1_400k/layer12.km2500_frame_TAT-TTS-all_train.txt", "input_validation_file": "/large_experiments/ust/annl/datasets/tts/TAT/filelist/Zh_Hokkien_pretrain_it3_alpha0.05_400k_it2_400k_it1_400k/layer12.km2500_frame_TAT-TTS-all_dev.txt", "resblock": "1", "num_gpus": 0, "batch_size": 16, "learning_rate": 0.0002, "adam_b1": 0.8, "adam_b2": 0.99, "lr_decay": 0.999, "seed": 1234, "upsample_rates": [5,4,4,2,2], "upsample_kernel_sizes": [11,8,8,4,4], "upsample_initial_channel": 512, "resblock_kernel_sizes": [3,7,11], "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], "num_embeddings": 2500, "embedding_dim": 128, "model_in_dim": 256, "multispkr": "_", "segment_size": 8960, "code_hop_size": 320, "f0": false, "num_mels": 80, "num_freq": 1025, "n_fft": 1024, "hop_size": 256, "win_size": 1024, "dur_prediction_weight": 1.0, "dur_predictor_params": { "encoder_embed_dim": 128, "var_pred_hidden_dim": 128, "var_pred_kernel_size": 3, "var_pred_dropout": 0.5 }, "sampling_rate": 16000, "fmin": 0, "fmax": 8000, "fmax_for_loss": null, "num_workers": 4, "dist_config": { "dist_backend": "nccl", "dist_url": "env://" } }