jbilcke-hf HF staff commited on
Commit
2b8e454
1 Parent(s): fbc5de6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -11
app.py CHANGED
@@ -588,29 +588,35 @@ def generate_speech_for_sentence(history, chatbot_role, sentence, return_as_byte
588
  else:
589
  # likely got a ' or " or some other text without alphanumeric in it
590
  audio_stream = None
591
-
 
 
592
  # XTTS is actually using streaming response but we are playing audio by sentence
593
  # If you want direct XTTS voice streaming (send each chunk to voice ) you may set DIRECT_STREAM=1 environment variable
594
  if audio_stream is not None:
595
  # frame_length = 0
596
  for chunk in audio_stream:
597
  try:
598
- wav_bytestream += chunk
 
599
  # frame_length += len(chunk)
600
  except:
601
  # hack to continue on playing. sometimes last chunk is empty , will be fixed on next TTS
602
  continue
603
 
604
- # Filter output for better voice
605
- filter_output=True
606
- if filter_output:
607
- data_s16 = np.frombuffer(wav_bytestream, dtype=np.int16, count=len(wav_bytestream)//2, offset=0)
608
- float_data = data_s16 * 0.5**15
609
- reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
610
- wav_bytestream = (reduced_noise * 32767).astype(np.int16)
611
- wav_bytestream = wav_bytestream.tobytes()
 
 
 
612
  # Directly encode the WAV bytestream to base64
613
- base64_audio = base64.b64encode(pcm_to_wav(wav_bytestream)).decode('utf8')
614
 
615
  if audio_stream is not None:
616
  return (history, base64_audio)
 
588
  else:
589
  # likely got a ' or " or some other text without alphanumeric in it
590
  audio_stream = None
591
+
592
+ sentence_wav_bytestream = b""
593
+
594
  # XTTS is actually using streaming response but we are playing audio by sentence
595
  # If you want direct XTTS voice streaming (send each chunk to voice ) you may set DIRECT_STREAM=1 environment variable
596
  if audio_stream is not None:
597
  # frame_length = 0
598
  for chunk in audio_stream:
599
  try:
600
+ if chunk is not None:
601
+ sentence_wav_bytestream += chunk
602
  # frame_length += len(chunk)
603
  except:
604
  # hack to continue on playing. sometimes last chunk is empty , will be fixed on next TTS
605
  continue
606
 
607
+ # Filter output for better voice
608
+ filter_output=True
609
+ if filter_output:
610
+ data_s16 = np.frombuffer(sentence_wav_bytestream, dtype=np.int16, count=len(wav_bytestream)//2, offset=0)
611
+ float_data = data_s16 * 0.5**15
612
+ reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
613
+ sentence_wav_bytestream = (reduced_noise * 32767).astype(np.int16)
614
+ sentence_wav_bytestream = sentence_wav_bytestream.tobytes()
615
+
616
+ total_wav_bytestream += sentence_wav_bytestream
617
+
618
  # Directly encode the WAV bytestream to base64
619
+ base64_audio = base64.b64encode(pcm_to_wav(total_wav_bytestream)).decode('utf8')
620
 
621
  if audio_stream is not None:
622
  return (history, base64_audio)