Spaces:
Running
on
T4
Running
on
T4
Update tortoise/api.py
Browse files- tortoise/api.py +9 -4
tortoise/api.py
CHANGED
@@ -296,11 +296,16 @@ class TextToSpeech:
|
|
296 |
|
297 |
# Perform the crossfade if there is an overlap
|
298 |
if wav_overlap is not None:
|
299 |
-
|
|
|
300 |
|
301 |
-
|
302 |
-
|
303 |
-
|
|
|
|
|
|
|
|
|
304 |
|
305 |
# Save the last part of this chunk for overlapping with the next chunk
|
306 |
wav_overlap = wav_gen[-overlap_len:]
|
|
|
296 |
|
297 |
# Perform the crossfade if there is an overlap
|
298 |
if wav_overlap is not None:
|
299 |
+
# Using a Hanning window for smoother transition
|
300 |
+
crossfade_window = torch.hann_window(overlap_len).to(wav_gen.device)
|
301 |
|
302 |
+
# Equal-power crossfade
|
303 |
+
crossfade_wav = torch.sqrt(crossfade_window) * wav_chunk[:overlap_len]
|
304 |
+
wav_overlap = torch.sqrt(1 - crossfade_window) * wav_overlap
|
305 |
+
|
306 |
+
# Overlap-Add (OLA) for merging audio chunks
|
307 |
+
wav_chunk[:overlap_len] = F.pad(wav_overlap, (0, wav_chunk[:overlap_len].shape[0] - wav_overlap.shape[0])) + \
|
308 |
+
F.pad(crossfade_wav, (0, wav_chunk[:overlap_len].shape[0] - crossfade_wav.shape[0]))
|
309 |
|
310 |
# Save the last part of this chunk for overlapping with the next chunk
|
311 |
wav_overlap = wav_gen[-overlap_len:]
|