ai-story-server

Paused

App Files Files Community

jbilcke-hf HF staff commited on Nov 21, 2023

Commit

0bd106b

•

1 Parent(s): 4156639

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -19

app.py CHANGED Viewed

@@ -592,32 +592,31 @@ def generate_speech_for_sentence(history, chatbot_role, sentence, return_as_byte
             # XTTS is actually using streaming response but we are playing audio by sentence
             # If you want direct XTTS voice streaming (send each chunk to voice ) you may set DIRECT_STREAM=1 environment variable
             if audio_stream is not None:
-                frame_length = 0
                 for chunk in audio_stream:
                     try:
                         wav_bytestream += chunk
-                        frame_length += len(chunk)
                     except:
                         # hack to continue on playing. sometimes last chunk is empty , will be fixed on next TTS
                         continue
-            # Filter output for better voice
-            filter_output=True
-            if filter_output:
-                data_s16 = np.frombuffer(wav_bytestream, dtype=np.int16, count=len(wav_bytestream)//2, offset=0)
-                float_data = data_s16 * 0.5**15
-                reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
-                wav_bytestream = (reduced_noise * 32767).astype(np.int16)
-                wav_bytestream = wav_bytestream.tobytes()
-            # Directly encode the WAV bytestream to base64
-            base64_audio = base64.b64encode(pcm_to_wav(wav_bytestream)).decode('utf8')
-            if audio_stream is not None:
-                return (history, base64_audio)
-            else:
-                # Handle the case where the audio stream is None (e.g., silent response)
-                return (history, None)
     except RuntimeError as e:

             # XTTS is actually using streaming response but we are playing audio by sentence
             # If you want direct XTTS voice streaming (send each chunk to voice ) you may set DIRECT_STREAM=1 environment variable
             if audio_stream is not None:
+                # frame_length = 0
                 for chunk in audio_stream:
                     try:
                         wav_bytestream += chunk
+                        # frame_length += len(chunk)
                     except:
                         # hack to continue on playing. sometimes last chunk is empty , will be fixed on next TTS
                         continue
+        # Filter output for better voice
+        filter_output=True
+        if filter_output:
+            data_s16 = np.frombuffer(wav_bytestream, dtype=np.int16, count=len(wav_bytestream)//2, offset=0)
+            float_data = data_s16 * 0.5**15
+            reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
+            wav_bytestream = (reduced_noise * 32767).astype(np.int16)
+            wav_bytestream = wav_bytestream.tobytes(
+        # Directly encode the WAV bytestream to base64
+        base64_audio = base64.b64encode(pcm_to_wav(wav_bytestream)).decode('utf8')
+        if audio_stream is not None:
+            return (history, base64_audio)
+        else:
+            # Handle the case where the audio stream is None (e.g., silent response)
+            return (history, None)
     except RuntimeError as e: