Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,20 +7,25 @@ import soundfile as sf
|
|
| 7 |
|
| 8 |
app = FastAPI()
|
| 9 |
|
| 10 |
-
# Load TTS pipeline
|
| 11 |
tts = pipeline("text-to-speech", model="suno/bark-small")
|
| 12 |
|
| 13 |
@app.get("/speak")
|
| 14 |
def speak(text: str):
|
| 15 |
# Generate audio (float32, -1..1)
|
| 16 |
output = tts(text)
|
| 17 |
-
audio = output["audio"]
|
| 18 |
|
| 19 |
-
# Convert
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
audio_int16 = np.int16(audio * 32767)
|
| 21 |
|
| 22 |
-
#
|
| 23 |
buf = io.BytesIO()
|
|
|
|
| 24 |
sf.write(buf, audio_int16, 24000, format="WAV", subtype="PCM_16")
|
| 25 |
buf.seek(0)
|
| 26 |
|
|
|
|
| 7 |
|
| 8 |
app = FastAPI()
|
| 9 |
|
| 10 |
+
# Load Bark TTS pipeline
|
| 11 |
tts = pipeline("text-to-speech", model="suno/bark-small")
|
| 12 |
|
| 13 |
@app.get("/speak")
|
| 14 |
def speak(text: str):
|
| 15 |
# Generate audio (float32, -1..1)
|
| 16 |
output = tts(text)
|
| 17 |
+
audio = output["audio"] # shape: (num_samples,) or (num_samples, channels)
|
| 18 |
|
| 19 |
+
# Convert to mono if needed
|
| 20 |
+
if audio.ndim > 1:
|
| 21 |
+
audio = np.mean(audio, axis=1)
|
| 22 |
+
|
| 23 |
+
# Convert float32 -> int16
|
| 24 |
audio_int16 = np.int16(audio * 32767)
|
| 25 |
|
| 26 |
+
# Prepare BytesIO with a fake name so soundfile detects WAV
|
| 27 |
buf = io.BytesIO()
|
| 28 |
+
buf.name = "output.wav"
|
| 29 |
sf.write(buf, audio_int16, 24000, format="WAV", subtype="PCM_16")
|
| 30 |
buf.seek(0)
|
| 31 |
|