| { | |
| "text_encoder": { | |
| "input_size": 512, | |
| "encoder_layers": 8, | |
| "encoder_heads": 2, | |
| "encoder_hidden": 384, | |
| "encoder_conv_kernel_size": [ | |
| 9, | |
| 1 | |
| ], | |
| "encoder_dropout": 0.2, | |
| "vocab_size": 366, | |
| "num_langs": 2, | |
| "num_speakers": 2, | |
| "max_position": 3072, | |
| "output_size": 512 | |
| }, | |
| "duration_predictor": { | |
| "input_size": 512, | |
| "hidden_size": 256, | |
| "k_size": 5, | |
| "layers": 3, | |
| "dropout_rate": 0.2 | |
| }, | |
| "mel_encoder": { | |
| "mel_bins": 100, | |
| "hidden_size": 512, | |
| "num_layers": 6, | |
| "kernel_size": 5, | |
| "dropout_rate": 0.2 | |
| }, | |
| "mel_decoder": { | |
| "mel_bins": 100, | |
| "hidden_size": 512, | |
| "num_layers": 6, | |
| "kernel_size": 5, | |
| "dropout_rate": 0.2 | |
| }, | |
| "post_net": { | |
| "n_mel_channels": 100, | |
| "postnet_embedding_dim": 512, | |
| "postnet_kernel_size": 9, | |
| "postnet_n_convolutions": 5 | |
| }, | |
| "wav_encoder": { | |
| "mel_bins": 100, | |
| "filter_length": 1024, | |
| "hop_length": 256, | |
| "win_length": 1024, | |
| "sampling_rate": 24000, | |
| "normalize": false, | |
| "power": 1, | |
| "norm": null, | |
| "center": true | |
| }, | |
| "wav_decoder": { | |
| "vocos_model_id": "charactr/vocos-mel-24khz" | |
| }, | |
| "delta": 0.2, | |
| "look_ahead": 3 | |
| } |