import io import os import gradio as gr from openai import OpenAI from pydub import AudioSegment from TTSs.base_tts import Base_TTS class OpenAI_TTS(Base_TTS): def get_name(self): return 'OpenAI' def _get_config_page(self): with gr.Group(visible=False) as config: openai_api_key = gr.Textbox(label="openai的API Key(默认为环境变量值)", placeholder="请输入openai的API Key", type="password", interactive=True, value=os.environ.get('OPENAI_API_KEY', '')) with gr.Row(): openai_model_choices = ['tts-1', 'tts-1-hd'] openai_voices = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'] openai_model = gr.Dropdown(choices=openai_model_choices, value=openai_model_choices[0], label="模型选择", interactive=True) openai_speaker = gr.Dropdown(choices=openai_voices, value=openai_voices[0], label="音色选择", interactive=True) speed = gr.Slider(minimum=0.25, maximum=4.0, value=1, label='语速', step=0.1, interactive=True) inputs = [ openai_api_key, openai_model, openai_speaker, speed ] return config, inputs def _generate(self, text, openai_api_key, openai_model, openai_speaker, speed): client = OpenAI(api_key=openai_api_key) response = client.audio.speech.create( model=openai_model, voice=openai_speaker, speed=speed, input=text ) original_audio = AudioSegment.from_file(io.BytesIO(response.content), format="mp3") return original_audio