Spaces:
Sleeping
Sleeping
File size: 3,201 Bytes
6a6023c d86f106 6a6023c d86f106 60bea2a d86f106 f3e678b d86f106 cb2b2ed d86f106 ccbb9b9 0270007 f3e678b 0270007 f3e678b 2ae17a3 1a5fb77 2ae17a3 d86f106 6a9e916 d86f106 2ae17a3 d86f106 cb2b2ed 2ae17a3 cb2b2ed d86f106 cb2b2ed aaca925 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import io
import json
import os
import requests
import urllib
import gradio as gr
import numpy as np
from scipy.io import wavfile
base_url = "https://api.sandbox.deepgram.com/tts"
token_str = os.environ['DG_TOKEN']
def tts_fn(text, prompt_audio, prompt_seconds, inference_steps, inference_temperature, pitch_steps):
texts = [text]
sr = prompt_audio[0]
prompt_audio = np.reshape(prompt_audio[1], (1, 1, -1)).astype(np.float32, order='C') / 32768.0
params={'synthesize': 'true', 'pitch_steps': int(pitch_steps), 'soundstorm_steps': inference_steps, 'temperature': inference_temperature, 'prompt_seconds': prompt_seconds}
files=[('texts', ('texts', json.dumps(texts), 'application/json')), ('prompt_audio', ('prompt_audio', json.dumps(prompt_audio.tolist()), 'application/json'))]
response = requests.post(base_url, files=files, params=params, headers={'Authorization': f'Token {token_str}'}).json()
try:
sample_rate = int(response['results'][0]['sample_rate'])
audio = (np.array(response['results'][0]['audio']).transpose() / 1.414 * 32767).astype(np.int16)
except Exception:
print(response)
return (sample_rate, audio)
demo_files = ['demo_files/man.wav', 'demo_files/woman.wav', 'demo_files/man_2.wav', 'demo_files/woman_2.wav', 'demo_files/man_3.wav', 'demo_files/woman_3.wav', 'demo_files/woman_4.wav', 'demo_files/meditation.wav']
app = gr.Blocks()
with app:
with gr.Tab("TTS MVP"):
with gr.Row():
with gr.Column():
pangram = "The beige hue on the waters of the loch impressed all, including the French queen, before she heard that symphony again, just as young Arthur wanted."
cherry = "Your request has been processed and the audio is ready for playback."
textbox = gr.TextArea(label="Text", placeholder="Type a sentence here", value=cherry)
prompt_audio = gr.Audio(label="Prompt Audio", source='upload')
examples = gr.Examples(label='Sample Speakers', examples=demo_files, inputs=prompt_audio)
# speed = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Speed")
# variability = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.1, label="Variability")
inference_steps = gr.Slider(minimum=1, maximum=32, value=1, step=1, label="Inference Steps: quality vs latency tradeoff. Results are sometimes unstable for values >1.")
inference_temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, step=0.05, label="Temperature: fidelity vs variability tradeoff")
prompt_seconds = gr.Slider(minimum=1.0, maximum=10.0, value=3.0, step=1.0, label="Use first N seconds of prompt audio")
pitch_steps = gr.Slider(minimum=-24, maximum=24, value=0, step=1, label="Pitch Steps: 12 to an octave")
with gr.Column():
audio_output = gr.Audio(label="Output Audio", elem_id='tts-audio')
btn = gr.Button("Generate")
btn.click(tts_fn, inputs=[textbox, prompt_audio, prompt_seconds, inference_steps, inference_temperature, pitch_steps], outputs=[audio_output])
app.launch() |