Spaces:

anuj-exe
/

text2speech

Sleeping

anuj-exe commited on Sep 27

Commit

245735d

verified ·

1 Parent(s): 0f914d0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,20 +7,25 @@ import soundfile as sf
 app = FastAPI()
-# Load TTS pipeline
 tts = pipeline("text-to-speech", model="suno/bark-small")
 @app.get("/speak")
 def speak(text: str):
     # Generate audio (float32, -1..1)
     output = tts(text)
-    audio = output["audio"]
-    # Convert float32 [-1,1] -> int16
     audio_int16 = np.int16(audio * 32767)
-    # Write WAV to in-memory buffer
     buf = io.BytesIO()
     sf.write(buf, audio_int16, 24000, format="WAV", subtype="PCM_16")
     buf.seek(0)

 app = FastAPI()
+# Load Bark TTS pipeline
 tts = pipeline("text-to-speech", model="suno/bark-small")
 @app.get("/speak")
 def speak(text: str):
     # Generate audio (float32, -1..1)
     output = tts(text)
+    audio = output["audio"]  # shape: (num_samples,) or (num_samples, channels)
+    # Convert to mono if needed
+    if audio.ndim > 1:
+        audio = np.mean(audio, axis=1)
+    # Convert float32 -> int16
     audio_int16 = np.int16(audio * 32767)
+    # Prepare BytesIO with a fake name so soundfile detects WAV
     buf = io.BytesIO()
+    buf.name = "output.wav"
     sf.write(buf, audio_int16, 24000, format="WAV", subtype="PCM_16")
     buf.seek(0)