File size: 2,230 Bytes
446c342 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import io
import json
import requests
from elevenlabs import voices, generate, set_api_key, VoiceSettings, Voice
from pydub import AudioSegment
from .tts_utils import mix_background_music
def merge_audio(token, text, 背景音乐, 音色选择, stability, similarity_boost, style, use_speaker_boost, TTS_up, bg_up):
try:
set_api_key(token)
original_audio_bytes = generate(
text=text,
voice=Voice(voice_id=voices_eleven[speakers_eleven[音色选择]].voice_id,
settings=VoiceSettings(stability=stability, similarity_boost=similarity_boost, style=style,
use_speaker_boost=use_speaker_boost)),
model="eleven_multilingual_v2",
)
# 将字节串转换为AudioSegment对象
original_audio = AudioSegment.from_file(io.BytesIO(original_audio_bytes), format="mp3")
return None, *mix_background_music(original_audio, 背景音乐, TTS_up, bg_up)
except Exception as e:
return str(e), None, None
def get_eleven_spk():
try:
voices_eleven = voices()
speakers_eleven = {}
for i, v in enumerate(voices_eleven):
label = {}
label['口音'] = v.labels.get('accent')
if 'description ' in v.labels:
label['描述'] = v.labels['description ']
elif 'description' in v.labels:
label['描述'] = v.labels['description']
label['年龄'] = v.labels['age']
label['性别'] = v.labels['gender']
if 'use case' in v.labels:
label['用例'] = v.labels['use case']
elif 'usecase' in v.labels:
label['用例'] = v.labels['usecase']
else:
raise Exception('use case not found')
key = v.name + '-' + str(label)
speakers_eleven[key] = i
select_key = list(speakers_eleven.keys())
return voices_eleven, speakers_eleven, select_key
except Exception as e:
print(e)
return [], {'获取音色失败': '获取音色失败'}, ['获取音色失败']
voices_eleven, speakers_eleven, select_key = get_eleven_spk()
|