File size: 1,917 Bytes
446c342 14a6e16 446c342 14a6e16 446c342 14a6e16 446c342 14a6e16 446c342 14a6e16 446c342 14a6e16 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import io
import os
import gradio as gr
from openai import OpenAI
from pydub import AudioSegment
from TTSs.base_tts import Base_TTS
class OpenAI_TTS(Base_TTS):
def get_name(self):
return 'OpenAI'
def _get_config_page(self):
with gr.Group(visible=False) as config:
openai_api_key = gr.Textbox(label="openai的API Key(默认为环境变量值)",
placeholder="请输入openai的API Key",
type="password",
interactive=True,
value=os.environ.get('OPENAI_API_KEY', ''))
with gr.Row():
openai_model_choices = ['tts-1', 'tts-1-hd']
openai_voices = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
openai_model = gr.Dropdown(choices=openai_model_choices, value=openai_model_choices[0],
label="模型选择",
interactive=True)
openai_speaker = gr.Dropdown(choices=openai_voices, value=openai_voices[0], label="音色选择",
interactive=True)
speed = gr.Slider(minimum=0.25, maximum=4.0, value=1, label='语速', step=0.1, interactive=True)
inputs = [
openai_api_key, openai_model, openai_speaker, speed
]
return config, inputs
def _generate(self, text, openai_api_key, openai_model, openai_speaker, speed):
client = OpenAI(api_key=openai_api_key)
response = client.audio.speech.create(
model=openai_model,
voice=openai_speaker,
speed=speed,
input=text
)
original_audio = AudioSegment.from_file(io.BytesIO(response.content), format="mp3")
return original_audio
|