Comparative-Analysis-of-Speech-Synthesis-Models
/
TensorFlowTTS
/preprocess
/synpaflex_preprocess.yaml
########################################################### | |
# FEATURE EXTRACTION SETTING # | |
########################################################### | |
sampling_rate: 22050 # Sampling rate. | |
fft_size: 1024 # FFT size. | |
hop_size: 256 # Hop size. (fixed value, don't change) | |
win_length: null # Window length. | |
# If set to null, it will be the same as fft_size. | |
window: "hann" # Window function. | |
num_mels: 80 # Number of mel basis. | |
fmin: 80 # Minimum freq in mel basis calculation. | |
fmax: 7600 # Maximum frequency in mel basis calculation. | |
global_gain_scale: 1.0 # Will be multiplied to all of waveform. | |
trim_silence: true # Whether to trim the start and end of silence. | |
trim_threshold_in_db: 20 # Need to tune carefully if the recording is not good. | |
trim_frame_size: 2048 # Frame size in trimming. | |
trim_hop_size: 512 # Hop size in trimming. | |
format: "npy" # Feature file format. Only "npy" is supported. | |