{ | |
"architectures": ["E2TTS"], | |
"backbone": { | |
"dim": 1024, | |
"depth": 26, | |
"heads": 16, | |
"ff_mult": 4 | |
}, | |
"mel_spectrogram": { | |
"n_mel_channels": 100, | |
"n_fft": 1024, | |
"hop_length": 256, | |
"win_length": 1024, | |
"target_sample_rate": 24000, | |
"mel_spec_type": "vocos" | |
}, | |
"odeint": { | |
"method": "euler" | |
}, | |
"sampling": { | |
"nfe_step": 32, | |
"cfg_strength": 2.0, | |
"sway_sampling_coef": -1.0, | |
"target_rms": 0.1 | |
} | |
} | |