Manmay commited on
Commit
b00c1f4
1 Parent(s): 7f2cd3b

Update tortoise/api.py

Browse files
Files changed (1) hide show
  1. tortoise/api.py +9 -4
tortoise/api.py CHANGED
@@ -296,11 +296,16 @@ class TextToSpeech:
296
 
297
  # Perform the crossfade if there is an overlap
298
  if wav_overlap is not None:
299
- crossfade_window = torch.linspace(0.0, 1.0, overlap_len).to(wav_gen.device)
 
300
 
301
- crossfade_wav = wav_chunk[:overlap_len] * crossfade_window
302
- wav_chunk[:overlap_len] = wav_overlap * (1 - crossfade_window)
303
- wav_chunk[:overlap_len] += crossfade_wav
 
 
 
 
304
 
305
  # Save the last part of this chunk for overlapping with the next chunk
306
  wav_overlap = wav_gen[-overlap_len:]
 
296
 
297
  # Perform the crossfade if there is an overlap
298
  if wav_overlap is not None:
299
+ # Using a Hanning window for smoother transition
300
+ crossfade_window = torch.hann_window(overlap_len).to(wav_gen.device)
301
 
302
+ # Equal-power crossfade
303
+ crossfade_wav = torch.sqrt(crossfade_window) * wav_chunk[:overlap_len]
304
+ wav_overlap = torch.sqrt(1 - crossfade_window) * wav_overlap
305
+
306
+ # Overlap-Add (OLA) for merging audio chunks
307
+ wav_chunk[:overlap_len] = F.pad(wav_overlap, (0, wav_chunk[:overlap_len].shape[0] - wav_overlap.shape[0])) + \
308
+ F.pad(crossfade_wav, (0, wav_chunk[:overlap_len].shape[0] - crossfade_wav.shape[0]))
309
 
310
  # Save the last part of this chunk for overlapping with the next chunk
311
  wav_overlap = wav_gen[-overlap_len:]