{
  "train": {
    "segment_size": 16384
  },
  "data": {
    "sampling_rate": 44100,
    "filter_length": 2048,
    "hop_length": 512,
    "add_blank": true,
    "n_speakers": 256,
    "spk2id": {
      "ZH": 1
    }
  },
  "model": {
    "use_spk_conditioned_encoder": true,
    "use_noise_scaled_mas": true,
    "use_mel_posterior_encoder": false,
    "use_duration_discriminator": true,
    "inter_channels": 192,
    "hidden_channels": 192,
    "filter_channels": 768,
    "n_heads": 2,
    "n_layers": 6,
    "n_layers_trans_flow": 3,
    "kernel_size": 3,
    "p_dropout": 0.1,
    "resblock": "1",
    "resblock_kernel_sizes": [
      3,
      7,
      11
    ],
    "resblock_dilation_sizes": [
      [
        1,
        3,
        5
      ],
      [
        1,
        3,
        5
      ],
      [
        1,
        3,
        5
      ]
    ],
    "upsample_rates": [
      8,
      8,
      2,
      2,
      2
    ],
    "upsample_initial_channel": 512,
    "upsample_kernel_sizes": [
      16,
      16,
      8,
      2,
      2
    ],
    "n_layers_q": 3,
    "use_spectral_norm": false,
    "gin_channels": 256
  },
  "symbols": [
    "_",
    "AA",
    "E",
    "EE",
    "En",
    "N",
    "OO",
    "V",
    "a",
    "a:",
    "aa",
    "ae",
    "ah",
    "ai",
    "an",
    "ang",
    "ao",
    "aw",
    "ay",
    "b",
    "by",
    "c",
    "ch",
    "d",
    "dh",
    "dy",
    "e",
    "e:",
    "eh",
    "ei",
    "en",
    "eng",
    "er",
    "ey",
    "f",
    "g",
    "gy",
    "h",
    "hh",
    "hy",
    "i",
    "i0",
    "i:",
    "ia",
    "ian",
    "iang",
    "iao",
    "ie",
    "ih",
    "in",
    "ing",
    "iong",
    "ir",
    "iu",
    "iy",
    "j",
    "jh",
    "k",
    "ky",
    "l",
    "m",
    "my",
    "n",
    "ng",
    "ny",
    "o",
    "o:",
    "ong",
    "ou",
    "ow",
    "oy",
    "p",
    "py",
    "q",
    "r",
    "ry",
    "s",
    "sh",
    "t",
    "th",
    "ts",
    "ty",
    "u",
    "u:",
    "ua",
    "uai",
    "uan",
    "uang",
    "uh",
    "ui",
    "un",
    "uo",
    "uw",
    "v",
    "van",
    "ve",
    "vn",
    "w",
    "x",
    "y",
    "z",
    "zh",
    "zy",
    "!",
    "?",
    "…",
    ",",
    ".",
    "'",
    "-",
    "SP",
    "UNK"
  ],
  "num_tones": 11,
  "num_languages": 4
}