|
import io |
|
import os |
|
|
|
import gradio as gr |
|
from openai import OpenAI |
|
from pydub import AudioSegment |
|
|
|
from TTSs.base_tts import Base_TTS |
|
|
|
|
|
class OpenAI_TTS(Base_TTS): |
|
|
|
def get_name(self): |
|
return 'OpenAI' |
|
|
|
def _get_config_page(self): |
|
with gr.Group(visible=False) as config: |
|
openai_api_key = gr.Textbox(label="openai的API Key(默认为环境变量值)", |
|
placeholder="请输入openai的API Key", |
|
type="password", |
|
interactive=True, |
|
value=os.environ.get('OPENAI_API_KEY', '')) |
|
with gr.Row(): |
|
openai_model_choices = ['tts-1', 'tts-1-hd'] |
|
openai_voices = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'] |
|
openai_model = gr.Dropdown(choices=openai_model_choices, value=openai_model_choices[0], |
|
label="模型选择", |
|
interactive=True) |
|
openai_speaker = gr.Dropdown(choices=openai_voices, value=openai_voices[0], label="音色选择", |
|
interactive=True) |
|
speed = gr.Slider(minimum=0.25, maximum=4.0, value=1, label='语速', step=0.1, interactive=True) |
|
|
|
inputs = [ |
|
openai_api_key, openai_model, openai_speaker, speed |
|
] |
|
|
|
return config, inputs |
|
|
|
def _generate(self, text, openai_api_key, openai_model, openai_speaker, speed): |
|
client = OpenAI(api_key=openai_api_key) |
|
response = client.audio.speech.create( |
|
model=openai_model, |
|
voice=openai_speaker, |
|
speed=speed, |
|
input=text |
|
) |
|
|
|
original_audio = AudioSegment.from_file(io.BytesIO(response.content), format="mp3") |
|
return original_audio |
|
|