import base64 import io import json import os import uuid from typing import Optional import pandas as pd import requests from pydub import AudioSegment from ..tts_utils import mix_background_music class avaliable_voice_type: 语言: Optional[str] = "" 场景: Optional[str] = "" 音色名称: str voice_type: str 时间戳支持: bool = False 支持情感与风格类型: Optional[str] = "" 支持语言类型: Optional[str] = "" def __repr__(self): data = self.__dict__ text = "" text += f"音色: {data['音色名称']}" if data["语言"]: text += f"——{data['语言']}" if data["场景"]: text += f"——{data['场景']}" if data["支持情感与风格类型"]: text += f"——{data['支持情感与风格类型']}" if data["支持语言类型"]: text += f"——{data['支持语言类型']}" return text def get_data_map(filename="voice_list.xlsx"): path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename) df = pd.read_excel(path) df.fillna('', inplace=True) useful_voice = {} for index, row in df.iterrows(): data = avaliable_voice_type() data.语言 = row['语言'] data.场景 = row['场景'] data.音色名称 = row['音色名称'] data.voice_type = row['voice_type'] data.时间戳支持 = row['时间戳'] data.支持情感与风格类型 = row['支持情感/风格类型'] data.支持语言类型 = row['支持语言类型'] useful_voice[str(data)] = data return useful_voice def tts(text, appid, access_token, voice, speed_ratio, volume_ratio, pitch_ratio, 背景音乐, speaker_up, back_up): host = "openspeech.bytedance.com" api_url = f"https://{host}/api/v1/tts" header = {"Authorization": f"Bearer;{access_token}"} request_json = { "app": { "appid": appid, "token": "access_token", "cluster": "volcano_tts" }, "user": { "uid": "388808087185088" }, "audio": { "voice_type": useful_voice[voice].voice_type, "encoding": "mp3", "speed_ratio": speed_ratio, "volume_ratio": volume_ratio, "pitch_ratio": pitch_ratio, }, "request": { "reqid": str(uuid.uuid4()), "text": text, "text_type": "plain", "operation": "query", "with_frontend": 1, "frontend_type": "unitTson" } } try: resp = requests.post(api_url, data=json.dumps(request_json), headers=header) if "data" not in resp.json(): return str(resp.json()), None, None data = resp.json()["data"] mp3_file = base64.b64decode(data) original_audio = AudioSegment.from_mp3(io.BytesIO(mp3_file)) return None, *mix_background_music(original_audio, 背景音乐, speaker_up, back_up) except Exception as e: return str(e), None, None useful_voice = get_data_map()