Spaces:

hhz520
/

webchat

Configuration error

App Files Files Community

webchat / voice /azure /azure_voice.py

hhz520

Upload 170 files

61517de 8 months ago

raw

history blame contribute delete

No virus

4.66 kB

	"""
	azure voice service
	"""
	import json
	import os
	import time

	import azure.cognitiveservices.speech as speechsdk
	from langid import classify

	from bridge.reply import Reply, ReplyType
	from common.log import logger
	from common.tmp_dir import TmpDir
	from config import conf
	from voice.voice import Voice

	"""
	Azure voice
	主目录设置文件中需填写azure_voice_api_key和azure_voice_region

	查看可用的 voice： https://speech.microsoft.com/portal/voicegallery

	"""


	class AzureVoice(Voice):
	def __init__(self):
	try:
	curdir = os.path.dirname(__file__)
	config_path = os.path.join(curdir, "config.json")
	config = None
	if not os.path.exists(config_path): # 如果没有配置文件，创建本地配置文件
	config = {
	"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", # 识别不出时的默认语音
	"auto_detect": True, # 是否自动检测语言
	"speech_synthesis_zh": "zh-CN-XiaozhenNeural",
	"speech_synthesis_en": "en-US-JacobNeural",
	"speech_synthesis_ja": "ja-JP-AoiNeural",
	"speech_synthesis_ko": "ko-KR-SoonBokNeural",
	"speech_synthesis_de": "de-DE-LouisaNeural",
	"speech_synthesis_fr": "fr-FR-BrigitteNeural",
	"speech_synthesis_es": "es-ES-LaiaNeural",
	"speech_recognition_language": "zh-CN",
	}
	with open(config_path, "w") as fw:
	json.dump(config, fw, indent=4)
	else:
	with open(config_path, "r") as fr:
	config = json.load(fr)
	self.config = config
	self.api_key = conf().get("azure_voice_api_key")
	self.api_region = conf().get("azure_voice_region")
	self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region)
	self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
	self.speech_config.speech_recognition_language = self.config["speech_recognition_language"]
	except Exception as e:
	logger.warn("AzureVoice init failed: %s, ignore " % e)

	def voiceToText(self, voice_file):
	audio_config = speechsdk.AudioConfig(filename=voice_file)
	speech_recognizer = speechsdk.SpeechRecognizer(speech_config=self.speech_config, audio_config=audio_config)
	result = speech_recognizer.recognize_once()
	if result.reason == speechsdk.ResultReason.RecognizedSpeech:
	logger.info("[Azure] voiceToText voice file name={} text={}".format(voice_file, result.text))
	reply = Reply(ReplyType.TEXT, result.text)
	else:
	cancel_details = result.cancellation_details
	logger.error("[Azure] voiceToText error, result={}, errordetails={}".format(result, cancel_details.error_details))
	reply = Reply(ReplyType.ERROR, "抱歉，语音识别失败")
	return reply

	def textToVoice(self, text):
	if self.config.get("auto_detect"):
	lang = classify(text)[0]
	key = "speech_synthesis_" + lang
	if key in self.config:
	logger.info("[Azure] textToVoice auto detect language={}, voice={}".format(lang, self.config[key]))
	self.speech_config.speech_synthesis_voice_name = self.config[key]
	else:
	self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
	else:
	self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
	# Avoid the same filename under multithreading
	fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".wav"
	audio_config = speechsdk.AudioConfig(filename=fileName)
	speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)
	result = speech_synthesizer.speak_text(text)
	if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
	logger.info("[Azure] textToVoice text={} voice file name={}".format(text, fileName))
	reply = Reply(ReplyType.VOICE, fileName)
	else:
	cancel_details = result.cancellation_details
	logger.error("[Azure] textToVoice error, result={}, errordetails={}".format(result, cancel_details.error_details))
	reply = Reply(ReplyType.ERROR, "抱歉，语音合成失败")
	return reply