|
import base64 |
|
import io |
|
import json |
|
import os |
|
import uuid |
|
from typing import Optional |
|
|
|
import gradio as gr |
|
import pandas as pd |
|
import requests |
|
from pydub import AudioSegment |
|
|
|
from TTSs.base_tts import Base_TTS |
|
|
|
|
|
class avaliable_voice_type: |
|
语言: Optional[str] = "" |
|
场景: Optional[str] = "" |
|
音色名称: str |
|
voice_type: str |
|
时间戳支持: bool = False |
|
支持情感与风格类型: Optional[str] = "" |
|
支持语言类型: Optional[str] = "" |
|
|
|
def __repr__(self): |
|
data = self.__dict__ |
|
text = "" |
|
|
|
text += f"{data['音色名称']}" |
|
if data["语言"]: |
|
text += f"——{data['语言']}" |
|
if data["场景"]: |
|
text += f"——{data['场景']}" |
|
if data["支持情感与风格类型"]: |
|
text += f"——{data['支持情感与风格类型']}" |
|
if data["支持语言类型"]: |
|
text += f"——{data['支持语言类型']}" |
|
|
|
return text |
|
|
|
|
|
class Volcengine_TTS(Base_TTS): |
|
|
|
def get_name(self): |
|
return '火山引擎' |
|
|
|
def __init__(self): |
|
self.useful_voice = self.get_data_map() |
|
|
|
def get_data_map(self, filename="voice_list.xlsx"): |
|
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename) |
|
df = pd.read_excel(path) |
|
df.fillna('', inplace=True) |
|
|
|
useful_voice = {} |
|
for index, row in df.iterrows(): |
|
data = avaliable_voice_type() |
|
data.语言 = row['语言'] |
|
data.场景 = row['场景'] |
|
data.音色名称 = row['音色名称'] |
|
data.voice_type = row['voice_type'] |
|
data.时间戳支持 = row['时间戳'] |
|
data.支持情感与风格类型 = row['支持情感/风格类型'] |
|
data.支持语言类型 = row['支持语言类型'] |
|
useful_voice[str(data)] = data |
|
|
|
return useful_voice |
|
|
|
def _get_config_page(self): |
|
with gr.Group(visible=False) as config_volcengine: |
|
voices = list(self.useful_voice.keys()) |
|
|
|
with gr.Row(): |
|
volcengine_appid = gr.Textbox(label="volcengine的appid(默认为环境变量值)", |
|
placeholder="请输入volcengine的appid", |
|
type="password", |
|
interactive=True, |
|
value=os.environ.get('VOLCENGINE_APPID', '')) |
|
volcengine_access_token = gr.Textbox(label="volcengine的access_token(默认为环境变量值)", |
|
placeholder="请输入volengine的access_token", |
|
type="password", |
|
interactive=True, |
|
value=os.environ.get('VOLCENGINE_ACCESS_TOKEN', '')) |
|
|
|
voice_type = gr.Dropdown(choices=voices, value=voices[0], label="音色选择", interactive=True) |
|
|
|
with gr.Row(): |
|
speed_ratio = gr.Slider(minimum=0.2, maximum=3, value=1, step=0.1, label="语速", |
|
interactive=True) |
|
volume_ratio = gr.Slider(minimum=0.1, maximum=3, value=1, step=0.1, label="音量", |
|
interactive=True) |
|
pitch_ratio = gr.Slider(minimum=0.1, maximum=3, value=1, step=0.1, label="音高", |
|
interactive=True) |
|
|
|
with gr.Row(): |
|
emotion = gr.Textbox(label="情感/风格(还未适配)", placeholder="请输入情感", interactive=True) |
|
language = gr.Textbox(label="语言类型(还未适配)", placeholder="请输入语言", interactive=True) |
|
|
|
inputs = [ |
|
volcengine_appid, volcengine_access_token, voice_type, speed_ratio, |
|
volume_ratio, pitch_ratio |
|
] |
|
|
|
return config_volcengine, inputs |
|
|
|
def _generate(self, text, appid, access_token, voice, speed_ratio, |
|
volume_ratio, pitch_ratio): |
|
host = "openspeech.bytedance.com" |
|
api_url = f"https://{host}/api/v1/tts" |
|
|
|
header = {"Authorization": f"Bearer;{access_token}"} |
|
|
|
request_json = { |
|
"app": { |
|
"appid": appid, |
|
"token": "access_token", |
|
"cluster": "volcano_tts" |
|
}, |
|
"user": { |
|
"uid": "388808087185088" |
|
}, |
|
"audio": { |
|
"voice_type": self.useful_voice[voice].voice_type, |
|
"encoding": "mp3", |
|
"speed_ratio": speed_ratio, |
|
"volume_ratio": volume_ratio, |
|
"pitch_ratio": pitch_ratio, |
|
}, |
|
"request": { |
|
"reqid": str(uuid.uuid4()), |
|
"text": text, |
|
"text_type": "plain", |
|
"operation": "query", |
|
"with_frontend": 1, |
|
"frontend_type": "unitTson" |
|
|
|
} |
|
} |
|
|
|
resp = requests.post(api_url, data=json.dumps(request_json), headers=header) |
|
|
|
if "data" not in resp.json(): |
|
raise Exception(resp.json()) |
|
|
|
data = resp.json()["data"] |
|
mp3_file = base64.b64decode(data) |
|
|
|
original_audio = AudioSegment.from_mp3(io.BytesIO(mp3_file)) |
|
|
|
return original_audio |
|
|