# -*- coding: utf-8 -*- import azure.cognitiveservices.speech as speechsdk import gradio as gr import io import os dict = { "中文": "zh-CN", "英语": "en-US", "法语": "fr-FR", "西班牙语": "es-ES", "阿拉伯语": "ar-SA", "葡萄牙语": "pt-PT", "泰语": "th-TH", "越南语": "vi-VN", "俄语": "ru-RU", "日语": "ja-JP", "德语": "de-DE", "印度尼西亚语": "id-ID", "韩语": "ko-KR", "菲律宾语": "fil-PH", "意大利语": "it-IT", "荷兰语": "nl-NL", "波兰语": "pl-PL", "瑞典语": "sv-SE", "希伯来语":"he-IL", "土耳其语": "tr-TR", "马来语": "ms-MY", "匈牙利语": "hu-HU", "希腊语": "el-GR", "捷克语": "cs-CZ", "丹麦语": "da-DK", "挪威语": "nb-NO", "芬兰语": "fi-FI", "斯洛文尼亚语": "sl-SI", "爱沙尼亚语": "et-EE", "拉脱维亚语": "lv-LV", "立陶宛语": "lt-LT", "克罗地亚语": "hr-HR", "罗马尼亚语": "ro-RO", "斯洛伐克语": "sk-SK", "保加利亚语": "bg-BG", "塞尔维亚语": "sr-RS", "乌克兰语": "uk-UA", "繁体中文": "zh-TW", "印地语": "hi-IN", "挪威博克马尔语": "nb-NO", "波斯语": "fa-IR", "罗马语": "rm-CH", "斯瓦希里语": "sw-KE", "孟加拉语": "bn-BD", "波斯尼亚语": "bs-BA", "加泰罗尼亚语": "ca-ES", "克里奥尔语": "ht-HT", "爱尔兰语": "ga-IE", "卡纳达语": "kn-IN", "哈萨克语": "kk-KZ", "马其顿语": "mk-MK", "马拉雅拉姆语": "ml-IN", "毛利语": "mi-NZ", "尼泊尔语": "ne-NP", "普什图语": "ps-AF", "旁遮普语": "pa-IN", "萨摩亚语": "sm-WS", "斯洛伐克语": "sk-SK", "索马里语": "so-SO", "瑞典语": "sv-SE", "塔加洛语": "tl-PH", "塔吉克语": "tg-TJ", "泰米尔语": "ta-IN", "泰卢固语": "te-IN", "图库尔语": "tk-TM", "乌尔都语": "ur-PK", "乌兹别克语": "uz-UZ", "威尔士语": "cy-GB", "科西嘉语": "co-FR", "弗里西语": "fy-NL", "加利西亚语": "gl-ES", } def text_to_speech(text, language_code): # Replace with your own subscription key and region identifier from Azure portal subscription_key = os.getenv('subscription_key') region = os.getenv('region') # Creates an instance of a speech config with specified subscription key and service region. speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=region, speech_recognition_language=dict[language_code]) # Creates a speech synthesizer using the default speaker as audio output. speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config) # Synthesizes the received text to speech. result = speech_synthesizer.speak_text_async(text).get() # Checks result. if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted: audio_stream = io.BytesIO(result.audio_data) file_path = save_audio(audio_stream) return "Speech synthesized to speaker for text [{}]".format(text), file_path elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details return "Speech synthesis canceled: {}".format(cancellation_details.reason), None if cancellation_details.reason == speechsdk.CancellationReason.Error: if cancellation_details.error_details: return "Error details: {}".format(cancellation_details.error_details), None return "Did you update the subscription info?", None def save_audio(audio_stream): file_path = "speech.wav" with open(file_path, "wb") as f: f.write(audio_stream.read()) return file_path input_text = gr.inputs.Textbox(lines=5, label="Input Text") output_text = gr.outputs.Textbox(label="Output Text") output_audio = gr.outputs.Audio(type="filepath", label="导出文件") language = gr.inputs.Dropdown(choices=list(dict.keys()), label="Language") interface = gr.Interface(fn=text_to_speech, inputs=[input_text, language], outputs=[output_text, output_audio], title="微软文字转语音") interface.launch()