marcofrodl commited on
Commit
75673eb
1 Parent(s): 7067548
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import tempfile
4
+ from openai import OpenAI
5
+
6
+
7
+ def generate_tts(text, model, voice, api_key,response_format, speed):
8
+ if api_key == '':
9
+ raise gr.Error('Please enter your OpenAI API key to run this generator')
10
+ else:
11
+ try:
12
+ client = OpenAI(api_key=api_key)
13
+
14
+ response = client.audio.speech.create(
15
+ model=model, # tts-1, tts-1-hd
16
+ voice=voice, # alloy, echo, fable, onyx, nova, shimmer
17
+ response_format=response_format, # mp3, opus, aac, flac
18
+ speed=speed, # 0.25x - 4x
19
+ input=text,
20
+ )
21
+
22
+ except Exception as error:
23
+ # Handle any exception that occurs
24
+ raise gr.Error("An error occurred while generating speech output. Please check your API key and try again.")
25
+ print(str(error))
26
+
27
+ # Create a temp file to save the audio from OpenAI api
28
+ output_suffix = "." + response_format
29
+ with tempfile.NamedTemporaryFile(suffix=output_suffix, delete=False) as temp_file:
30
+ temp_file.write(response.content)
31
+
32
+ # Get the file path of the temp file
33
+ temp_file_path = temp_file.name
34
+
35
+ return temp_file_path
36
+
37
+ css = """
38
+ .header-text p {line-height: 80px !important; text-align: left; font-size: 26px;}
39
+ .header-logo {text-align: left}
40
+ """
41
+
42
+ with gr.Blocks(css=css) as tts_demo:
43
+ with gr.Row():
44
+ with gr.Column(scale=1, min_width=80):
45
+ gr.Image("tt-logo.jpg", width=80, height=80, show_download_button=False, show_share_button=False, interactive=False, show_label=False, elem_id="thinktecture-logo", elem_classes="header-logo", container=False)
46
+ with gr.Column(scale=11):
47
+ gr.Markdown("OpenAI Text-To-Speech Generator (TTS-1 Model)", elem_classes="header-text")
48
+
49
+ with gr.Row(variant='panel'):
50
+ api_key = gr.Textbox(type='password', label='OpenAI API Key', placeholder='Your OpenAI API key')
51
+ model = gr.Dropdown(choices=['tts-1','tts-1-hd'], label='Model', value='tts-1')
52
+ voice = gr.Dropdown(choices=['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'], label='Voice', value='fable')
53
+ response_format = gr.Dropdown(choices=['mp3', 'opus', 'aac', 'flac'], label='Output format', value='mp3')
54
+ speed = gr.Slider(0.25, 4, value=1, step=.25, label="Speed", info="Speach speed (0.25x - 4x)")
55
+
56
+ with gr.Row(variant='panel'):
57
+ text = gr.Textbox(label="Input text", placeholder="Your input for Text-To-Speech - press 'Say it' or hit 'Enter' key when ready.")
58
+ with gr.Row(variant='panel'):
59
+ with gr.Column(scale=1):
60
+ gr.Markdown(" ")
61
+ with gr.Column(scale=1):
62
+ btn = gr.Button("Say it")
63
+ with gr.Column(scale=1):
64
+ gr.Markdown(" ")
65
+
66
+ output_audio = gr.Audio(label="Text-To-Speech Output", waveform_options={"waveform_progress_color" : "#ff584f"})
67
+
68
+ text.submit(fn=generate_tts, inputs=[text, model, voice, api_key, response_format, speed], outputs=output_audio, api_name="tts_enter_key", concurrency_limit=None)
69
+ btn.click(fn=generate_tts, inputs=[text, model, voice, api_key, response_format, speed], outputs=output_audio, api_name="tts_button", concurrency_limit=None)
70
+
71
+ tts_demo.launch()