import io import json import requests from elevenlabs import voices, generate, set_api_key, VoiceSettings, Voice from pydub import AudioSegment from .tts_utils import mix_background_music def merge_audio(token, text, 背景音乐, 音色选择, stability, similarity_boost, style, use_speaker_boost, TTS_up, bg_up): try: set_api_key(token) original_audio_bytes = generate( text=text, voice=Voice(voice_id=voices_eleven[speakers_eleven[音色选择]].voice_id, settings=VoiceSettings(stability=stability, similarity_boost=similarity_boost, style=style, use_speaker_boost=use_speaker_boost)), model="eleven_multilingual_v2", ) # 将字节串转换为AudioSegment对象 original_audio = AudioSegment.from_file(io.BytesIO(original_audio_bytes), format="mp3") return None, *mix_background_music(original_audio, 背景音乐, TTS_up, bg_up) except Exception as e: return str(e), None, None def get_eleven_spk(): try: voices_eleven = voices() speakers_eleven = {} for i, v in enumerate(voices_eleven): label = {} label['口音'] = v.labels.get('accent') if 'description ' in v.labels: label['描述'] = v.labels['description '] elif 'description' in v.labels: label['描述'] = v.labels['description'] label['年龄'] = v.labels['age'] label['性别'] = v.labels['gender'] if 'use case' in v.labels: label['用例'] = v.labels['use case'] elif 'usecase' in v.labels: label['用例'] = v.labels['usecase'] else: raise Exception('use case not found') key = v.name + '-' + str(label) speakers_eleven[key] = i select_key = list(speakers_eleven.keys()) return voices_eleven, speakers_eleven, select_key except Exception as e: print(e) return [], {'获取音色失败': '获取音色失败'}, ['获取音色失败'] voices_eleven, speakers_eleven, select_key = get_eleven_spk()