rogerxavier commited on
Commit
b8202f6
1 Parent(s): 8654962

Update 3mergeDialogToVideo.py

Browse files
Files changed (1) hide show
  1. 3mergeDialogToVideo.py +37 -28
3mergeDialogToVideo.py CHANGED
@@ -13,7 +13,6 @@ import time
13
  from moviepy.audio.AudioClip import AudioArrayClip
14
  from moviepy.editor import *
15
  import cv2
16
- import azure.cognitiveservices.speech as speechsdk
17
  import numpy as np
18
  import io
19
  import base64
@@ -25,7 +24,7 @@ import os
25
  azure_speech_key = os.getenv('azure_speech_key')
26
  azure_service_region = os.getenv('azure_service_region')
27
  my_openai_key = os.getenv('my_openai_key')
28
-
29
  print("azure key是",azure_speech_key)
30
  print("azure_service_region是",azure_service_region)
31
  print("my_openai_key",my_openai_key)
@@ -123,32 +122,42 @@ def get_audio_data(text:str)-> "返回audio data io句柄, duration":
123
  speech_key = azure_speech_key
124
  service_region = azure_service_region
125
 
126
- speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
127
- # Note: the voice setting will not overwrite the voice element in input SSML.
128
- speech_config.speech_synthesis_voice_name = "zh-CN-YunxiNeural" ##云希
129
-
130
- # use the default speaker as audio output.
131
- speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
132
-
133
- result = speech_synthesizer.speak_text_async(text).get()
134
- # Check result
135
- if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
136
- print("Speech synthesized for text [{}]".format(text))
137
- elif result.reason == speechsdk.ResultReason.Canceled:
138
- cancellation_details = result.cancellation_details
139
- print("Speech synthesis canceled: {}".format(cancellation_details.reason))
140
- if cancellation_details.reason == speechsdk.CancellationReason.Error:
141
- print("Error details: {}".format(cancellation_details.error_details))
142
-
143
- # print("音频持续时间是",result.audio_duration)
144
- # print("音频数据是",result.audio_data)
145
- # 创建临时文件 -当前路径下面
146
- with tempfile.NamedTemporaryFile(dir='/',delete=False) as temp_file:
147
- temp_file.write(result.audio_data)
148
- temp_file.close()
149
- # 在这里完成您对文件的操作,比如返回文件名
150
- file_name = temp_file.name
151
- return file_name, str(result.audio_duration)
 
 
 
 
 
 
 
 
 
 
152
 
153
 
154
  # 补零函数,将数字部分补齐为指定长度
 
13
  from moviepy.audio.AudioClip import AudioArrayClip
14
  from moviepy.editor import *
15
  import cv2
 
16
  import numpy as np
17
  import io
18
  import base64
 
24
  azure_speech_key = os.getenv('azure_speech_key')
25
  azure_service_region = os.getenv('azure_service_region')
26
  my_openai_key = os.getenv('my_openai_key')
27
+ speech_synthesis_voice_name = "zh-CN-YunhaoNeural" ##云皓
28
  print("azure key是",azure_speech_key)
29
  print("azure_service_region是",azure_service_region)
30
  print("my_openai_key",my_openai_key)
 
122
  speech_key = azure_speech_key
123
  service_region = azure_service_region
124
 
125
+ voiceText = text
126
+ url = f"https://{service_region}.tts.speech.microsoft.com/cognitiveservices/v1"
127
+
128
+ headers = {
129
+ "Ocp-Apim-Subscription-Key": speech_key,
130
+ "Content-Type": "application/ssml+xml",
131
+ "X-Microsoft-OutputFormat": "audio-16khz-128kbitrate-mono-mp3",
132
+ "User-Agent": "curl"
133
+ }
134
+
135
+ ssml_text = '''
136
+ <speak version='1.0' xml:lang='zh-CN'>
137
+ <voice xml:lang='zh-CN' xml:gender='male' name='{voiceName}'>
138
+ {voiceText}
139
+ </voice>
140
+ </speak>
141
+ '''.format(voiceName=speech_synthesis_voice_name,voiceText = voiceText)
142
+
143
+ response = requests.post(url, headers=headers, data=ssml_text.encode('utf-8'))
144
+
145
+ if response.status_code == 200:
146
+ # print("音频持续时间是",response.audio_duration)
147
+ # print("音频数据是",response.content)
148
+ # 创建临时文件 -当前路径下面
149
+ with tempfile.NamedTemporaryFile(dir='/',delete=False) as temp_file:
150
+ temp_file.write(response.content)
151
+ temp_file.close()
152
+ # 在这里完成您对文件的操作,比如返回文件名
153
+ file_name = temp_file.name
154
+ return file_name, str(response.audio_duration)
155
+
156
+ else:
157
+ print("Error: Failed to synthesize audio. Status code:", response.status_code)
158
+
159
+
160
+
161
 
162
 
163
  # 补零函数,将数字部分补齐为指定长度