Spaces:
Runtime error
Runtime error
import base64 | |
import io | |
import json | |
import os | |
import uuid | |
from typing import Optional | |
import gradio as gr | |
import pandas as pd | |
import requests | |
from pydub import AudioSegment | |
from TTSs.base_tts import Base_TTS | |
class avaliable_voice_type: | |
语言: Optional[str] = "" | |
场景: Optional[str] = "" | |
音色名称: str | |
voice_type: str | |
时间戳支持: bool = False | |
支持情感与风格类型: Optional[str] = "" | |
支持语言类型: Optional[str] = "" | |
def __repr__(self): | |
data = self.__dict__ | |
text = "" | |
text += f"{data['音色名称']}" | |
if data["语言"]: | |
text += f"——{data['语言']}" | |
if data["场景"]: | |
text += f"——{data['场景']}" | |
if data["支持情感与风格类型"]: | |
text += f"——{data['支持情感与风格类型']}" | |
if data["支持语言类型"]: | |
text += f"——{data['支持语言类型']}" | |
return text | |
class Volcengine_TTS(Base_TTS): | |
def get_name(self): | |
return '火山引擎' | |
def __init__(self): | |
self.useful_voice = self.get_data_map() | |
def get_data_map(self, filename="voice_list.xlsx"): | |
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename) | |
df = pd.read_excel(path) | |
df.fillna('', inplace=True) | |
useful_voice = {} | |
for index, row in df.iterrows(): | |
data = avaliable_voice_type() | |
data.语言 = row['语言'] | |
data.场景 = row['场景'] | |
data.音色名称 = row['音色名称'] | |
data.voice_type = row['voice_type'] | |
data.时间戳支持 = row['时间戳'] | |
data.支持情感与风格类型 = row['支持情感/风格类型'] | |
data.支持语言类型 = row['支持语言类型'] | |
useful_voice[str(data)] = data | |
return useful_voice | |
def _get_config_page(self): | |
with gr.Group(visible=False) as config_volcengine: | |
voices = list(self.useful_voice.keys()) | |
with gr.Row(): | |
volcengine_appid = gr.Textbox(label="volcengine的appid(默认为环境变量值)", | |
placeholder="请输入volcengine的appid", | |
type="password", | |
interactive=True, | |
value=os.environ.get('VOLCENGINE_APPID', '')) | |
volcengine_access_token = gr.Textbox(label="volcengine的access_token(默认为环境变量值)", | |
placeholder="请输入volengine的access_token", | |
type="password", | |
interactive=True, | |
value=os.environ.get('VOLCENGINE_ACCESS_TOKEN', '')) | |
voice_type = gr.Dropdown(choices=voices, value=voices[0], label="音色选择", interactive=True) | |
with gr.Row(): | |
speed_ratio = gr.Slider(minimum=0.2, maximum=3, value=1, step=0.1, label="语速", | |
interactive=True) | |
volume_ratio = gr.Slider(minimum=0.1, maximum=3, value=1, step=0.1, label="音量", | |
interactive=True) | |
pitch_ratio = gr.Slider(minimum=0.1, maximum=3, value=1, step=0.1, label="音高", | |
interactive=True) | |
with gr.Row(): | |
emotion = gr.Textbox(label="情感/风格(还未适配)", placeholder="请输入情感", interactive=True) | |
language = gr.Textbox(label="语言类型(还未适配)", placeholder="请输入语言", interactive=True) | |
inputs = [ | |
volcengine_appid, volcengine_access_token, voice_type, speed_ratio, | |
volume_ratio, pitch_ratio | |
] | |
return config_volcengine, inputs | |
def _generate(self, text, appid, access_token, voice, speed_ratio, | |
volume_ratio, pitch_ratio): | |
host = "openspeech.bytedance.com" | |
api_url = f"https://{host}/api/v1/tts" | |
header = {"Authorization": f"Bearer;{access_token}"} | |
request_json = { | |
"app": { | |
"appid": appid, | |
"token": "access_token", | |
"cluster": "volcano_tts" | |
}, | |
"user": { | |
"uid": "388808087185088" | |
}, | |
"audio": { | |
"voice_type": self.useful_voice[voice].voice_type, | |
"encoding": "mp3", | |
"speed_ratio": speed_ratio, | |
"volume_ratio": volume_ratio, | |
"pitch_ratio": pitch_ratio, | |
}, | |
"request": { | |
"reqid": str(uuid.uuid4()), | |
"text": text, | |
"text_type": "plain", | |
"operation": "query", | |
"with_frontend": 1, | |
"frontend_type": "unitTson" | |
} | |
} | |
resp = requests.post(api_url, data=json.dumps(request_json), headers=header) | |
if "data" not in resp.json(): | |
raise Exception(resp.json()) | |
data = resp.json()["data"] | |
mp3_file = base64.b64decode(data) | |
original_audio = AudioSegment.from_mp3(io.BytesIO(mp3_file)) | |
return original_audio | |