Spaces:

youngtsai
/

dialogue_generator

Runtime error

App Files Files Community

youngtsai commited on Oct 28, 2023

Commit

16e04ca

•

1 Parent(s): 7fac4f2

speech_config = speechsdk.SpeechConfig(subscription=AZURE_API_KEY, region=AZURE_REGION)

Browse files

Files changed (2) hide show

app.py +49 -22
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -1,14 +1,16 @@
 import gradio as gr
-from gtts import gTTS
 import json
 import os
 import openai
 import re
-import pyttsx3
 PASSWORD = os.environ['PASSWORD']
 OPEN_AI_KEY = os.environ['OPEN_AI_KEY']
 def validate_and_correct_chat(data, roles=["A", "B"], rounds=2):
@@ -118,30 +120,55 @@ def main_function(password: str, theme: str, language: str, method: str, rounds:
     return chatbot_dialogue, audio_path, file_name
 def dialogue_to_audio(dialogue, role1_gender, role2_gender):
-    engine = pyttsx3.init()
-    # Fetch the list of available voices
-    voices = engine.getProperty('voices')
-    print(voices)
-    # Get voice IDs for male and female voices (you might need to adjust these based on available voices on your system)
-    male_voice_id = "com.apple.speech.synthesis.voice.alex"  # Example ID for a male voice
-    female_voice_id = "com.apple.speech.synthesis.voice.victoria"  # Example ID for a female voice
-    file_path = "temp_audio.mp3"
-    for i, item in enumerate(dialogue):
-        gender = role1_gender if i % 2 == 0 else role2_gender
-        voice_id = male_voice_id if gender == "male" else female_voice_id
-        # Set the voice
-        engine.setProperty('voice', voice_id)
-        # Now, synthesize the speech
-        engine.save_to_file(item['content'], file_path)
-        engine.runAndWait()
-    return file_path

 import gradio as gr
 import json
 import os
 import openai
 import re
+import azure.cognitiveservices.speech as speechsdk
 PASSWORD = os.environ['PASSWORD']
 OPEN_AI_KEY = os.environ['OPEN_AI_KEY']
+AZURE_REGION = os.environ['AZURE_REGION']
+AZURE_API_KEY = os.environ['AZURE_API_KEY']
 def validate_and_correct_chat(data, roles=["A", "B"], rounds=2):
     return chatbot_dialogue, audio_path, file_name
 def dialogue_to_audio(dialogue, role1_gender, role2_gender):
+    # Configure Azure Speech Service
+    speech_config = speechsdk.SpeechConfig(subscription=AZURE_API_KEY, region=AZURE_REGION)
+    filename="dialogue_output.wav"
+    audio_config = speechsdk.audio.AudioOutputConfig(filename=filename)
+    speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
+    # Map genders to Azure TTS voices (This is for demonstration purposes; you may wish to have more sophisticated voice mapping.)
+    voice_map = {
+        "male": {
+            "中文": "zh-CN-HanHanNeural",
+            "英文": "en-US-GuyNeural"
+        },
+        "female": {
+            "中文": "zh-CN-XiaoxiaoNeural",
+            "英文": "en-US-JessaNeural"
+        }
+    }
+    # Convert dialogue list to text
+    dialogue_text = ""
+    for entry in dialogue:
+        role = entry["role"]
+        content = entry["content"]
+        # Set voice based on role and language
+        if role == role1_gender:
+            voice = voice_map[role1_gender][content[-2:]]
+        else:
+            voice = voice_map[role2_gender][content[-2:]]
+        # Append SSML-formatted content
+        dialogue_text += f"<voice name='{voice}'>{content[:-3]}</voice> "
+    ssml = f"""
+    <speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis'>
+        {dialogue_text}
+    </speak>
+    """
+    # Perform synthesis
+    result = speech_synthesizer.speak_ssml(ssml)
+    if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
+        print("Audio synthesized successfully!")
+    else:
+        print("Error synthesizing audio:", result.reason)
+    # Return the path to the audio file
+    return filename

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
 gradio
 gtts
 openai
-pyttsx3

 gradio
 gtts
 openai
+azure