anuj-exe commited on
Commit
245735d
·
verified ·
1 Parent(s): 0f914d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -7,20 +7,25 @@ import soundfile as sf
7
 
8
  app = FastAPI()
9
 
10
- # Load TTS pipeline
11
  tts = pipeline("text-to-speech", model="suno/bark-small")
12
 
13
  @app.get("/speak")
14
  def speak(text: str):
15
  # Generate audio (float32, -1..1)
16
  output = tts(text)
17
- audio = output["audio"]
18
 
19
- # Convert float32 [-1,1] -> int16
 
 
 
 
20
  audio_int16 = np.int16(audio * 32767)
21
 
22
- # Write WAV to in-memory buffer
23
  buf = io.BytesIO()
 
24
  sf.write(buf, audio_int16, 24000, format="WAV", subtype="PCM_16")
25
  buf.seek(0)
26
 
 
7
 
8
  app = FastAPI()
9
 
10
+ # Load Bark TTS pipeline
11
  tts = pipeline("text-to-speech", model="suno/bark-small")
12
 
13
  @app.get("/speak")
14
  def speak(text: str):
15
  # Generate audio (float32, -1..1)
16
  output = tts(text)
17
+ audio = output["audio"] # shape: (num_samples,) or (num_samples, channels)
18
 
19
+ # Convert to mono if needed
20
+ if audio.ndim > 1:
21
+ audio = np.mean(audio, axis=1)
22
+
23
+ # Convert float32 -> int16
24
  audio_int16 = np.int16(audio * 32767)
25
 
26
+ # Prepare BytesIO with a fake name so soundfile detects WAV
27
  buf = io.BytesIO()
28
+ buf.name = "output.wav"
29
  sf.write(buf, audio_int16, 24000, format="WAV", subtype="PCM_16")
30
  buf.seek(0)
31