|
import warnings |
|
warnings.filterwarnings("ignore") |
|
|
|
|
|
import re |
|
import requests |
|
import argparse |
|
import json |
|
import os |
|
import re |
|
import tempfile |
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
|
|
all_example = "Today is a wonderful day to build something people love!" |
|
|
|
microsoft_model_list = [ |
|
"en-US-JennyMultilingualNeural", |
|
"en-US-RyanMultilingualNeural", |
|
"en-US-AndrewMultilingualNeural", |
|
"en-US-AvaMultilingualNeural", |
|
"en-US-BrianMultilingualNeural", |
|
"en-US-EmmaMultilingualNeural", |
|
"en-US-AlloyMultilingualNeural", |
|
"en-US-EchoMultilingualNeural", |
|
"en-US-FableMultilingualNeural", |
|
"en-US-OnyxMultilingualNeural", |
|
"en-US-NovaMultilingualNeural", |
|
"en-US-ShimmerMultilingualNeural", |
|
"en-US-AlloyMultilingualNeuralHD", |
|
"en-US-EchoMultilingualNeuralHD", |
|
"en-US-FableMultilingualNeuralHD", |
|
"en-US-OnyxMultilingualNeuralHD", |
|
"en-US-NovaMultilingualNeuralHD4", |
|
"en-US-ShimmerMultilingualNeuralHD" |
|
] |
|
|
|
openai_model_list = [ |
|
"alloy", |
|
"echo", |
|
"fable", |
|
"onyx", |
|
"nova", |
|
"shimmer" |
|
] |
|
|
|
eleven_voice_id = [ |
|
"21m00Tcm4TlvDq8ikWAM", |
|
"29vD33N1CtxCmqQRPOHJ", |
|
"2EiwWnXFnvU5JabPnv8n", |
|
"5Q0t7uMcjvnagumLfvZi", |
|
"AZnzlk1XvdvUeBnXmlld", |
|
"CYw3kZ02Hs0563khs1Fj", |
|
"D38z5RcWu1voky8WS1ja", |
|
"EXAVITQu4vr4xnSDxMaL", |
|
"ErXwobaYiN019PkySvjV", |
|
"GBv7mTt0atIp3Br8iCZE", |
|
"IKne3meq5aSn9XLyUdCD", |
|
"JBFqnCBsd6RMkjVDRZzb", |
|
"LcfcDJNUP1GQjkzn1xUU", |
|
"MF3mGyEYCl7XYWbV9V6O", |
|
"N2lVS1w4EtoT3dr4eOWO", |
|
"ODq5zmih8GrVes37Dizd", |
|
"SOYHLrjzK2X1ezoPC6cr", |
|
"TX3LPaxmHKxFdv7VOQHJ", |
|
"ThT5KcBeYPX3keUQqHPh", |
|
"TxGEqnHWrfWFTfGW9XjX", |
|
"VR6AewLTigWG4xSOukaG", |
|
"XB0fDUnXU5powFXDhCwa", |
|
"Xb7hH8MSUJpSbSDYk0k2", |
|
"XrExE9yKIg1WjnnlVkGX", |
|
"ZQe5CZNOzWyzPSCn5a3c", |
|
"Zlb1dXrM653N07WRdFW3", |
|
"bVMeCyTHy58xNoL34h3p", |
|
"flq6f7yk4E4fJM5XTYuZ", |
|
"g5CIjZEefAph4nQFvHAz", |
|
"iP95p4xoKVk53GoZ742B", |
|
"jBpfuIE2acCO8z3wKNLl", |
|
"jsCqWAovK2LkecY7zXl4", |
|
"nPczCjzI2devNBz1zQrb", |
|
"oWAxZDx7w5VEj9dCyTzz", |
|
"onwK4e9ZLuTAKqWW03F9", |
|
"pFZP5JQG7iQjIQuC4Bku", |
|
"pMsXgVXv3BLzUgSXRplE", |
|
"pNInz6obpgDQGcFmaJgB", |
|
"piTKgcLEGmPE4e6mEKli", |
|
"pqHfZKP75CvOlQylNhV4", |
|
"t0jbNlBVZ17f02VDIeMI", |
|
"yoZ06aMxZJJ28mfd3POQ", |
|
"z9fAnlkpzviPz146aGWa", |
|
"zcAOhNBS3c14rBihAFp1", |
|
"zrHiDhphv9ZnVXBqCLjz", |
|
] |
|
|
|
eleven_name = [ |
|
"Rachel", |
|
"Drew", |
|
"Clyde", |
|
"Paul", |
|
"Domi", |
|
"Dave", |
|
"Fin", |
|
"Sarah", |
|
"Antoni", |
|
"Thomas", |
|
"Charlie", |
|
"George", |
|
"Emily", |
|
"Elli", |
|
"Callum", |
|
"Patrick", |
|
"Harry", |
|
"Liam", |
|
"Dorothy", |
|
"Josh", |
|
"Arnold", |
|
"Charlotte", |
|
"Alice", |
|
"Matilda", |
|
"James", |
|
"Joseph", |
|
"Jeremy", |
|
"Michael", |
|
"Ethan", |
|
"Chris", |
|
"Gigi", |
|
"Freya", |
|
"Brian", |
|
"Grace", |
|
"Daniel", |
|
"Lily", |
|
"Serena", |
|
"Adam", |
|
"Nicole", |
|
"Bill", |
|
"Jessie", |
|
"Sam", |
|
"Glinda", |
|
"Giovanni", |
|
"Mimi", |
|
] |
|
eleven_id_model_name_dict = dict(zip(eleven_name, eleven_voice_id)) |
|
|
|
def openai(text, name): |
|
|
|
headers = { |
|
'Authorization': 'Bearer ' + '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', |
|
'Content-Type': 'application/json', |
|
} |
|
|
|
json_data = { |
|
'model': 'tts-1-hd', |
|
'input': text, |
|
'voice': name, |
|
} |
|
|
|
response = requests.post('https://api.openai.com/v1/audio/speech', headers=headers, json=json_data) |
|
|
|
|
|
|
|
|
|
|
|
|
|
return "Success", response |
|
|
|
def elevenlabs(text,name): |
|
url = f"https://api.elevenlabs.io/v1/text-to-speech/{eleven_id_model_name_dict[name]}" |
|
CHUNK_SIZE = 1024 |
|
|
|
|
|
headers = { |
|
"Accept": "audio/mpeg", |
|
"Content-Type": "application/json", |
|
"xi-api-key": "a3391f0e3ff8472b61978dbb70ccc6fe" |
|
} |
|
|
|
data = { |
|
"text": text, |
|
"model_id": "eleven_monolingual_v1", |
|
"voice_settings": { |
|
"stability": 0.5, |
|
"similarity_boost": 0.5 |
|
} |
|
} |
|
|
|
response = requests.post(url, json=data, headers=headers) |
|
|
|
|
|
|
|
|
|
return "Success", response |
|
|
|
def microsoft(text, name, style="Neural"): |
|
""" |
|
:param text: |
|
:param name: |
|
:param style: |
|
:return: |
|
""" |
|
headers = { |
|
'Ocp-Apim-Subscription-Key': '1f1ef0ce53b84261be94fab81df7e628', |
|
'Content-Type': 'application/ssml+xml', |
|
'X-Microsoft-OutputFormat': 'audio-16khz-128kbitrate-mono-mp3', |
|
'User-Agent': 'curl', |
|
} |
|
|
|
data = ("<speak version='1.0' xml:lang='en-US'>" |
|
f"<voice xml:lang='en-US' name='{name}'>" |
|
f"{text}" |
|
"</voice>" |
|
"</speak>") |
|
|
|
|
|
|
|
|
|
|
|
|
|
return "Success", "sss" |
|
|
|
if __name__ == '__main__': |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('--device', type=str, default='cuda') |
|
parser.add_argument("--share", action="store_true", default=True, help="share gradio app") |
|
parser.add_argument("--port", type=int, default=8081, help="port") |
|
parser.add_argument('--model_info_path', type=str, default='/gluster/speech_data/info.json') |
|
args = parser.parse_args() |
|
|
|
app = gr.Blocks() |
|
with app: |
|
gr.Markdown("## English TTS Demo") |
|
with gr.Tabs(): |
|
|
|
with gr.TabItem("11Labs"): |
|
tts_input1 = gr.TextArea(label="Text", value=all_example) |
|
tts_input2 = gr.Dropdown(eleven_name, label="name") |
|
tts_submit = gr.Button("Generate", variant="primary") |
|
tts_output1 = gr.Textbox(label="Output Message") |
|
tts_output2 = gr.Audio(label="Output Audio") |
|
tts_submit.click(elevenlabs, [tts_input1, tts_input2], |
|
[tts_output1, tts_output2]) |
|
|
|
with gr.TabItem("微软"): |
|
tts_input1 = gr.TextArea(label="Text", value=all_example) |
|
tts_input2 = gr.Dropdown(microsoft_model_list, label="name") |
|
tts_submit = gr.Button("Generate", variant="primary") |
|
tts_output1 = gr.Textbox(label="Output Message") |
|
tts_output2 = gr.Textbox(label="Output Audio") |
|
|
|
tts_submit.click(microsoft, [tts_input1, tts_input2], |
|
[tts_output1, tts_output2]) |
|
|
|
with gr.TabItem("openai"): |
|
tts_input1 = gr.TextArea(label="Text", value=all_example) |
|
tts_input2 = gr.Dropdown(openai_model_list, label="name") |
|
tts_submit = gr.Button("Generate", variant="primary") |
|
tts_output1 = gr.Textbox(label="Output Message") |
|
tts_output2 = gr.Audio(label="Output Audio") |
|
tts_submit.click(openai, [tts_input1, tts_input2], |
|
[tts_output1, tts_output2]) |
|
|
|
|
|
app.launch(share=True) |
|
|
|
|
|
|
|
|
|
|
|
|