jbilcke-hf HF staff commited on
Commit
8df21b1
1 Parent(s): b967e55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -543,7 +543,7 @@ def generate_speech_from_history(history, chatbot_role, sentence):
543
  # sentence = sentence[:-1] + " " + sentence[-1]
544
 
545
  # regex does the job well
546
- sentence = re.sub("([^\x00-\x7F]|\w)(\.|\。|\?|\!)",r"\1 \2\2",sentence)
547
 
548
  print("Sentence for speech:", sentence)
549
 
@@ -577,7 +577,8 @@ def generate_speech_from_history(history, chatbot_role, sentence):
577
  )
578
  else:
579
  # likely got a ' or " or some other text without alphanumeric in it
580
- audio_stream = None
 
581
 
582
  # XTTS is actually using streaming response but we are playing audio by sentence
583
  # If you want direct XTTS voice streaming (send each chunk to voice ) you may set DIRECT_STREAM=1 environment variable
@@ -595,13 +596,16 @@ def generate_speech_from_history(history, chatbot_role, sentence):
595
  continue
596
 
597
  # Filter output for better voice
598
- filter_output=False
599
  if filter_output:
600
- data_s16 = np.frombuffer(sentence_wav_bytestream, dtype=np.int16, count=len(sentence_wav_bytestream)//2, offset=0)
601
- float_data = data_s16 * 0.5**15
602
- reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
603
- sentence_wav_bytestream = (reduced_noise * 32767).astype(np.int16)
604
- sentence_wav_bytestream = sentence_wav_bytestream.tobytes()
 
 
 
605
 
606
  # Directly encode the WAV bytestream to base64
607
  base64_audio = base64.b64encode(pcm_to_wav(sentence_wav_bytestream)).decode('utf8')
 
543
  # sentence = sentence[:-1] + " " + sentence[-1]
544
 
545
  # regex does the job well
546
+ sentence = re.sub("([^\x00-\x7F]|\w)([\.。?!]+)",r"\1 \2",sentence)
547
 
548
  print("Sentence for speech:", sentence)
549
 
 
577
  )
578
  else:
579
  # likely got a ' or " or some other text without alphanumeric in it
580
+ audio_stream = None
581
+ continue
582
 
583
  # XTTS is actually using streaming response but we are playing audio by sentence
584
  # If you want direct XTTS voice streaming (send each chunk to voice ) you may set DIRECT_STREAM=1 environment variable
 
596
  continue
597
 
598
  # Filter output for better voice
599
+ filter_output=True
600
  if filter_output:
601
+ try:
602
+ data_s16 = np.frombuffer(sentence_wav_bytestream, dtype=np.int16, count=len(sentence_wav_bytestream)//2, offset=0)
603
+ float_data = data_s16 * 0.5**15
604
+ reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
605
+ sentence_wav_bytestream = (reduced_noise * 32767).astype(np.int16)
606
+ sentence_wav_bytestream = sentence_wav_bytestream.tobytes()
607
+ except:
608
+ print("failed to remove noise")
609
 
610
  # Directly encode the WAV bytestream to base64
611
  base64_audio = base64.b64encode(pcm_to_wav(sentence_wav_bytestream)).decode('utf8')