Spaces:

stevenhillis
/

intone_mvp

Sleeping

stevenhillis commited on Aug 31, 2023

Commit

ccbb9b9

•

1 Parent(s): 6a9e916

send encoded bytes not numpy

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,10 +10,16 @@ base_url = "https://api.sandbox.deepgram.com/nlu"
 token_str = os.environ['DG_TOKEN']
 def tts_fn(text, prompt_audio, prompt_seconds, inference_steps, inference_temperature, pitch_steps):
     texts = [text]
-    prompt_audio = np.reshape(prompt_audio[1], (1, 1, -1)).astype(np.float32, order='C') / 32768.0
     response = requests.post(
         f'{base_url}',
-        files=[('texts', ('texts', json.dumps(texts), 'application/json')), ('prompt_audio', ('prompt_audio', json.dumps(prompt_audio.tolist()), 'application/json'))],
         params={'synthesize': 'true', 'pitch_steps': int(pitch_steps), 'soundstorm_steps': inference_steps, 'temperature': inference_temperature, 'prompt_seconds': prompt_seconds},
         headers={
             'Authorization': f'Token {token_str}'

 token_str = os.environ['DG_TOKEN']
 def tts_fn(text, prompt_audio, prompt_seconds, inference_steps, inference_temperature, pitch_steps):
     texts = [text]
+    sr = prompt_audio[0]
+    prompt_audio = np.reshape(prompt_audio[1], (1, -1)).astype(np.float32, order='C') / 32768.0
+    audio_bytes = bytes()
+    byte_io = io.BytesIO(audio_bytes)
+    wavfile.write(byte_io, sr, prompt_audio)
+    prompt_audio = [base64.b64encode(byte_io).decode('utf-8')]
     response = requests.post(
         f'{base_url}',
+        files=[('texts', ('texts', json.dumps(texts), 'application/json')), ('prompt_audio', ('prompt_audio', json.dumps(prompt_audio), 'application/json'))],
         params={'synthesize': 'true', 'pitch_steps': int(pitch_steps), 'soundstorm_steps': inference_steps, 'temperature': inference_temperature, 'prompt_seconds': prompt_seconds},
         headers={
             'Authorization': f'Token {token_str}'