Text-to-Speech
Fairseq
English
audio
File size: 620 Bytes
f1ccb1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7b3f8a
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
features:
  energy_max: 3.2244551181793213
  energy_min: -4.9544901847839355
  eps: 1.0e-05
  f_max: 8000
  f_min: 0
  hop_len_t: 0.011609977324263039
  hop_length: 256
  n_fft: 1024
  n_mels: 80
  n_stft: 513
  pitch_max: 5.733940816898645
  pitch_min: -4.660287183665281
  sample_rate: 22050
  type: spectrogram+melscale+log
  win_len_t: 0.046439909297052155
  win_length: 1024
  window_fn: hann
global_cmvn:
  stats_npz_path: fbank_mfa_gcmvn_stats.npz
transforms:
  '*':
  - global_cmvn
vocab_filename: vocab.txt
vocoder:
  type: hifigan
  config: hifigan.json
  checkpoint: hifigan.pt
inference:
  tokenization: g2p