SnJForever commited on
Commit
586fb1f
1 Parent(s): d76590a

add the azure ttl

Browse files
Files changed (2) hide show
  1. app.py +48 -3
  2. requirements.txt +1 -0
app.py CHANGED
@@ -39,7 +39,8 @@ from langchain.text_splitter import CharacterTextSplitter
39
  from langchain.vectorstores.faiss import FAISS
40
  from langchain.docstore.document import Document
41
  from langchain.chains.question_answering import load_qa_chain
42
-
 
43
 
44
  news_api_key = os.environ["NEWS_API_KEY"]
45
 
@@ -434,9 +435,9 @@ class ChatWrapper:
434
  else:
435
  temp_file = LOOPING_TALKING_HEAD
436
  html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
437
- html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
438
  else:
439
- html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
440
  else:
441
  if talking_head:
442
  temp_file = LOOPING_TALKING_HEAD
@@ -456,6 +457,50 @@ class ChatWrapper:
456
 
457
  chat = ChatWrapper()
458
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
459
 
460
  def do_html_audio_speak(words_to_speak, polly_language):
461
  polly_client = boto3.Session(
 
39
  from langchain.vectorstores.faiss import FAISS
40
  from langchain.docstore.document import Document
41
  from langchain.chains.question_answering import load_qa_chain
42
+ import azure.cognitiveservices.speech as speechsdk
43
+
44
 
45
  news_api_key = os.environ["NEWS_API_KEY"]
46
 
 
435
  else:
436
  temp_file = LOOPING_TALKING_HEAD
437
  html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
438
+ html_audio, temp_aud_file = do_html_audio_speak_azure(output, translate_to)
439
  else:
440
+ html_audio, temp_aud_file = do_html_audio_speak_azure(output, translate_to)
441
  else:
442
  if talking_head:
443
  temp_file = LOOPING_TALKING_HEAD
 
457
 
458
  chat = ChatWrapper()
459
 
460
+ def do_html_audio_speak_azure(words_to_speak, axure_language):
461
+ speech_key=os.environ["SPEECH_KEY"],
462
+ service_region=os.environ["SERVICE_REGION"],
463
+
464
+
465
+ speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
466
+ # Note: the voice setting will not overwrite the voice element in input SSML.
467
+ speech_config.speech_synthesis_voice_name = "zh-CN-XiaoxiaoNeural"
468
+
469
+ # 设置输出的音频文件路径和文件名
470
+ audio_config = speechsdk.audio.AudioOutputConfig(filename="audios/tempfile.mp3")
471
+
472
+ text = words_to_speak
473
+
474
+ # use the default speaker as audio output.
475
+ speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
476
+
477
+ result = speech_synthesizer.speak_text_async(text).get()
478
+ # Check result
479
+ if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
480
+ print("Speech synthesized for text [{}]".format(text))
481
+ try:
482
+ with open('audios/tempfile.mp3', 'wb') as f:
483
+ f.write(stream.read())
484
+ temp_aud_file = gr.File("audios/tempfile.mp3")
485
+ temp_aud_file_url = "/file=" + temp_aud_file.value['name']
486
+ html_audio = f'<audio autoplay><source src={temp_aud_file_url} type="audio/mp3"></audio>'
487
+ except IOError as error:
488
+ # Could not write to file, exit gracefully
489
+ print(error)
490
+ return None, None
491
+ elif result.reason == speechsdk.ResultReason.Canceled:
492
+ cancellation_details = result.cancellation_details
493
+ print("Speech synthesis canceled: {}".format(cancellation_details.reason))
494
+ if cancellation_details.reason == speechsdk.CancellationReason.Error:
495
+ print("Error details: {}".format(cancellation_details.error_details))
496
+ # The response didn't contain audio data, exit gracefully
497
+ print("Could not stream audio")
498
+ return None, None
499
+
500
+ html_audio = '<pre>no audio</pre>'
501
+
502
+ return html_audio, "audios/tempfile.mp3"
503
+
504
 
505
  def do_html_audio_speak(words_to_speak, polly_language):
506
  polly_client = boto3.Session(
requirements.txt CHANGED
@@ -8,3 +8,4 @@ requests==2.28.2
8
  git+https://github.com/openai/whisper.git
9
  boto3==1.26.93
10
  faiss-cpu
 
 
8
  git+https://github.com/openai/whisper.git
9
  boto3==1.26.93
10
  faiss-cpu
11
+ azure-cognitiveservices-speech==1.27.0