Spaces:

FranklinWillemen
/

TARS

Paused

App Files Files Community

FranklinWillemen commited on May 27, 2023

Commit

d086a4b

•

1 Parent(s): 1b653d3

Custom voice + Gradio improvements

Browse files

Files changed (1) hide show

TARS-discourse.py +41 -5

TARS-discourse.py CHANGED Viewed

@@ -3,9 +3,15 @@ import openai
 import config
 import os
 import pyttsx3
 openai.api_key = config.OPENAI_API_KEY
-messages = [{"role": "system", "content": 'You are a therapist. Respond to all input in 25 words or less.'}]
 def transcribe(audio: str):
     global messages
@@ -23,9 +29,22 @@ def transcribe(audio: str):
     system_message = response["choices"][0]["message"]
     messages.append(system_message)
-    engine = pyttsx3.init()
-    engine.say(system_message['content'])
-    engine.runAndWait()
     chat_transcript = ""
     for message in messages:
@@ -34,4 +53,21 @@ def transcribe(audio: str):
     return chat_transcript
-ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text").launch()

 import config
 import os
 import pyttsx3
+import azure.cognitiveservices.speech as speechsdk
 openai.api_key = config.OPENAI_API_KEY
+speech_config = speechsdk.SpeechConfig(subscription=config.AZURE_VOICE_KEY, region=config.AZURE_VOICE_LOCATION)
+speech_config.speech_synthesis_voice_name = "en-US-AriaNeural"
+speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
+messages = [{"role": "system", "content": 'You are a clever and helpfull conversationalist. Respond to all input in 25 words or less. Do not mention that you are an AI or service of any kind.'}]
 def transcribe(audio: str):
     global messages
     system_message = response["choices"][0]["message"]
     messages.append(system_message)
+    # engine = pyttsx3.init()
+    # engine.say(system_message['content'])
+    # engine.runAndWait()
+    result = speech_synthesizer.speak_text_async(system_message['content']).get()
+    # Checks result.
+    # if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
+    #     print("Speech synthesized to speaker for text [{}]".format(text))
+    # elif result.reason == speechsdk.ResultReason.Canceled:
+    #     cancellation_details = result.cancellation_details
+    #     print("Speech synthesis canceled: {}".format(cancellation_details.reason))
+    #     if cancellation_details.reason == speechsdk.CancellationReason.Error:
+    #         if cancellation_details.error_details:
+    #             print("Error details: {}".format(cancellation_details.error_details))
+    #     print("Did you update the subscription info?")
     chat_transcript = ""
     for message in messages:
     return chat_transcript
+# set a custom theme
+theme = gr.themes.Default().set(
+    body_background_fill="#000000",
+)
+with gr.Blocks(theme=theme) as ui:
+    # advisor image input and microphone input
+    advisor = gr.Image(value=config.ADVISOR_IMAGE).style(width=config.ADVISOR_IMAGE_WIDTH, height=config.ADVISOR_IMAGE_HEIGHT)
+    audio_input = gr.Audio(source="microphone", type="filepath")
+    # text transcript output and audio
+    text_output = gr.Textbox(label="Conversation Transcript")
+    audio_output = gr.Audio()
+    btn = gr.Button("Run")
+    btn.click(fn=transcribe, inputs=audio_input, outputs=[text_output, audio_output])
+ui.launch(debug=True, share=True)