Text_to_speech / app.py
nekoniii3's picture
Update app.py
ec59055
import gradio as gr
from openai import OpenAI
import os
from datetime import datetime
from zoneinfo import ZoneInfo
auto_play_bl = {'ON': True, 'OFF': False}
voice_list = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
def set_state(state, openai_key, voice, auto_play, speed):
state["openai_key"] = openai_key
state["voice"] = voice
state["auto_play"] = auto_play_bl[auto_play]
state["speed"] = speed
return state
def create_voice(state, text, file):
err_msg = ""
# セッション情報取得
client = state["client"]
user_id = state["user_id"]
voice = state["voice"]
speed = state["speed"]
# OpenAIキーチェック
if state["openai_key"] == "":
err_msg = "OpenAIキーを入力してください。(設定タブ)"
return None, err_msg
# ファイル入力チェック
if text.strip() == "" and file is None:
err_msg = "画面から文章を入力するか、テキストファイルをアップして下さい。"
return None, err_msg
if client is None:
os.environ["OPENAI_API_KEY"] = state["openai_key"]
# クライアント作成
client = OpenAI()
# client作成後は消す
os.environ["OPENAI_API_KEY"] = ""
state["client"] = client
if user_id == "":
# IDとして現在時刻をセット
dt = datetime.now(ZoneInfo("Asia/Tokyo"))
user_id = dt.strftime("%Y%m%d%H%M%S")
# ユーザIDでフォルダ作成
os.makedirs(user_id, exist_ok=True)
state["user_id"] = user_id
if file:
with open(file, 'r') as f:
input_text = f.read()
else:
input_text = text
# ファイル名は現在時刻
dt = datetime.now(ZoneInfo("Asia/Tokyo"))
file_name = dt.strftime("%Y%m%d%H%M%S") + ".mp3"
file_path = user_id + "/" + file_name
# 音声にする
result = request_tts(client, voice, speed, file_path, input_text)
if result != "":
err_msg = result
file_path = None
return file_path, err_msg
def request_tts(client, voice , speed, file_path, text):
""" テキストを音声にする """
err_msg = ""
try:
response = client.audio.speech.create(
model= "tts-1", # "tts-1-hd",
voice=voice,
input=text,
speed=speed
)
# 音声ファイルに出力
response.stream_to_file(file_path)
except Exception as e:
err_msg = "音声作成中にエラーが発生しました。"
print(e)
finally:
return err_msg
with gr.Blocks() as demo:
title = "<h2>Text to Speechデモアプリ</h2>"
message = "<h3>最初に[設定]タブからOpenAIキーを入力してください。"
message += "</h3>"
gr.Markdown(title + message)
state = gr.State({
"openai_key" : "",
"client" : None,
"user_id" : "",
"auto_play" : True,
"speed" : 0.8,
"voice" : "nova"
})
with gr.Tab("音声にする") as main_tab:
text = gr.Textbox(label="音声にするテキスト", lines=3, interactive = True)
file = gr.File(label="入力ファイル", type="filepath",file_types=[".txt"], interactive = True)
with gr.Row():
btn = gr.Button("音声にする")
clear = gr.ClearButton([text, file], value="クリア")
sys_msg = gr.Text(label="システムメッセージ", interactive = False)
voice = gr.Audio(label="出力音声", type="filepath", interactive = False, autoplay = True)
btn.click(create_voice, [state, text, file], [voice, sys_msg])
with gr.Tab("設定") as set_tab:
openai_key = gr.Textbox(label="OpenAI API Key", interactive = True)
voice = gr.Dropdown(choices=voice_list, value = "nova", label="Voice", interactive = True)
auto_play = gr.Dropdown(choices=["ON", "OFF"], value = "ON", label="Auto Play", interactive = True)
speed = gr.Slider(0, 1, value=0.8, label="Speed", info="1に近づけるほど読むスピードが速くなります。", interactive = True)
# 設定変更時
main_tab.select(set_state, [state, openai_key, voice, auto_play, speed], state)
with gr.Tab("声サンプル") as voice_chk:
gr.Markdown("<h3>Text to speechの声のサンプルです。(速度は0.8です)</h3>")
with gr.Row():
btn_alloy = gr.Button(value="alloy")
btn_echo = gr.Button(value="echo")
btn_fable = gr.Button(value="fable")
with gr.Row():
btn_onyx = gr.Button(value="onyx")
btn_nova = gr.Button(value="nova")
btn_shimmer = gr.Button(value="shimmer")
sample_voice=gr.Audio(type="filepath", interactive = False, autoplay = True)
btn_alloy.click(lambda:"voice_sample/alloy.mp3", None, sample_voice)
btn_echo.click(lambda:"voice_sample/echo.mp3", None, sample_voice)
btn_fable.click(lambda:"voice_sample/fable.mp3", None, sample_voice)
btn_onyx.click(lambda:"voice_sample/onyx.mp3", None, sample_voice)
btn_nova.click(lambda:"voice_sample/nova.mp3", None, sample_voice)
btn_shimmer.click(lambda:"voice_sample/shimmer.mp3", None, sample_voice)
demo.queue()
demo.launch(debug=False)