Spaces:
Running
Running
SnJForever
commited on
Commit
•
586fb1f
1
Parent(s):
d76590a
add the azure ttl
Browse files- app.py +48 -3
- requirements.txt +1 -0
app.py
CHANGED
@@ -39,7 +39,8 @@ from langchain.text_splitter import CharacterTextSplitter
|
|
39 |
from langchain.vectorstores.faiss import FAISS
|
40 |
from langchain.docstore.document import Document
|
41 |
from langchain.chains.question_answering import load_qa_chain
|
42 |
-
|
|
|
43 |
|
44 |
news_api_key = os.environ["NEWS_API_KEY"]
|
45 |
|
@@ -434,9 +435,9 @@ class ChatWrapper:
|
|
434 |
else:
|
435 |
temp_file = LOOPING_TALKING_HEAD
|
436 |
html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
|
437 |
-
html_audio, temp_aud_file =
|
438 |
else:
|
439 |
-
html_audio, temp_aud_file =
|
440 |
else:
|
441 |
if talking_head:
|
442 |
temp_file = LOOPING_TALKING_HEAD
|
@@ -456,6 +457,50 @@ class ChatWrapper:
|
|
456 |
|
457 |
chat = ChatWrapper()
|
458 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
459 |
|
460 |
def do_html_audio_speak(words_to_speak, polly_language):
|
461 |
polly_client = boto3.Session(
|
|
|
39 |
from langchain.vectorstores.faiss import FAISS
|
40 |
from langchain.docstore.document import Document
|
41 |
from langchain.chains.question_answering import load_qa_chain
|
42 |
+
import azure.cognitiveservices.speech as speechsdk
|
43 |
+
|
44 |
|
45 |
news_api_key = os.environ["NEWS_API_KEY"]
|
46 |
|
|
|
435 |
else:
|
436 |
temp_file = LOOPING_TALKING_HEAD
|
437 |
html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
|
438 |
+
html_audio, temp_aud_file = do_html_audio_speak_azure(output, translate_to)
|
439 |
else:
|
440 |
+
html_audio, temp_aud_file = do_html_audio_speak_azure(output, translate_to)
|
441 |
else:
|
442 |
if talking_head:
|
443 |
temp_file = LOOPING_TALKING_HEAD
|
|
|
457 |
|
458 |
chat = ChatWrapper()
|
459 |
|
460 |
+
def do_html_audio_speak_azure(words_to_speak, axure_language):
|
461 |
+
speech_key=os.environ["SPEECH_KEY"],
|
462 |
+
service_region=os.environ["SERVICE_REGION"],
|
463 |
+
|
464 |
+
|
465 |
+
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
|
466 |
+
# Note: the voice setting will not overwrite the voice element in input SSML.
|
467 |
+
speech_config.speech_synthesis_voice_name = "zh-CN-XiaoxiaoNeural"
|
468 |
+
|
469 |
+
# 设置输出的音频文件路径和文件名
|
470 |
+
audio_config = speechsdk.audio.AudioOutputConfig(filename="audios/tempfile.mp3")
|
471 |
+
|
472 |
+
text = words_to_speak
|
473 |
+
|
474 |
+
# use the default speaker as audio output.
|
475 |
+
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
|
476 |
+
|
477 |
+
result = speech_synthesizer.speak_text_async(text).get()
|
478 |
+
# Check result
|
479 |
+
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
|
480 |
+
print("Speech synthesized for text [{}]".format(text))
|
481 |
+
try:
|
482 |
+
with open('audios/tempfile.mp3', 'wb') as f:
|
483 |
+
f.write(stream.read())
|
484 |
+
temp_aud_file = gr.File("audios/tempfile.mp3")
|
485 |
+
temp_aud_file_url = "/file=" + temp_aud_file.value['name']
|
486 |
+
html_audio = f'<audio autoplay><source src={temp_aud_file_url} type="audio/mp3"></audio>'
|
487 |
+
except IOError as error:
|
488 |
+
# Could not write to file, exit gracefully
|
489 |
+
print(error)
|
490 |
+
return None, None
|
491 |
+
elif result.reason == speechsdk.ResultReason.Canceled:
|
492 |
+
cancellation_details = result.cancellation_details
|
493 |
+
print("Speech synthesis canceled: {}".format(cancellation_details.reason))
|
494 |
+
if cancellation_details.reason == speechsdk.CancellationReason.Error:
|
495 |
+
print("Error details: {}".format(cancellation_details.error_details))
|
496 |
+
# The response didn't contain audio data, exit gracefully
|
497 |
+
print("Could not stream audio")
|
498 |
+
return None, None
|
499 |
+
|
500 |
+
html_audio = '<pre>no audio</pre>'
|
501 |
+
|
502 |
+
return html_audio, "audios/tempfile.mp3"
|
503 |
+
|
504 |
|
505 |
def do_html_audio_speak(words_to_speak, polly_language):
|
506 |
polly_client = boto3.Session(
|
requirements.txt
CHANGED
@@ -8,3 +8,4 @@ requests==2.28.2
|
|
8 |
git+https://github.com/openai/whisper.git
|
9 |
boto3==1.26.93
|
10 |
faiss-cpu
|
|
|
|
8 |
git+https://github.com/openai/whisper.git
|
9 |
boto3==1.26.93
|
10 |
faiss-cpu
|
11 |
+
azure-cognitiveservices-speech==1.27.0
|