{ "architectures": ["E2TTS"], "backbone": { "dim": 1024, "depth": 26, "heads": 16, "ff_mult": 4 }, "mel_spectrogram": { "n_mel_channels": 100, "n_fft": 1024, "hop_length": 256, "win_length": 1024, "target_sample_rate": 24000, "mel_spec_type": "vocos" }, "odeint": { "method": "euler" }, "sampling": { "nfe_step": 32, "cfg_strength": 2.0, "sway_sampling_coef": -1.0, "target_rms": 0.1 } }