FranklinWillemen commited on
Commit
d086a4b
1 Parent(s): 1b653d3

Custom voice + Gradio improvements

Browse files
Files changed (1) hide show
  1. TARS-discourse.py +41 -5
TARS-discourse.py CHANGED
@@ -3,9 +3,15 @@ import openai
3
  import config
4
  import os
5
  import pyttsx3
 
 
6
  openai.api_key = config.OPENAI_API_KEY
7
 
8
- messages = [{"role": "system", "content": 'You are a therapist. Respond to all input in 25 words or less.'}]
 
 
 
 
9
 
10
  def transcribe(audio: str):
11
  global messages
@@ -23,9 +29,22 @@ def transcribe(audio: str):
23
  system_message = response["choices"][0]["message"]
24
  messages.append(system_message)
25
 
26
- engine = pyttsx3.init()
27
- engine.say(system_message['content'])
28
- engine.runAndWait()
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  chat_transcript = ""
31
  for message in messages:
@@ -34,4 +53,21 @@ def transcribe(audio: str):
34
 
35
  return chat_transcript
36
 
37
- ui = gr.Interface(fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import config
4
  import os
5
  import pyttsx3
6
+ import azure.cognitiveservices.speech as speechsdk
7
+
8
  openai.api_key = config.OPENAI_API_KEY
9
 
10
+ speech_config = speechsdk.SpeechConfig(subscription=config.AZURE_VOICE_KEY, region=config.AZURE_VOICE_LOCATION)
11
+ speech_config.speech_synthesis_voice_name = "en-US-AriaNeural"
12
+ speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
13
+
14
+ messages = [{"role": "system", "content": 'You are a clever and helpfull conversationalist. Respond to all input in 25 words or less. Do not mention that you are an AI or service of any kind.'}]
15
 
16
  def transcribe(audio: str):
17
  global messages
 
29
  system_message = response["choices"][0]["message"]
30
  messages.append(system_message)
31
 
32
+ # engine = pyttsx3.init()
33
+ # engine.say(system_message['content'])
34
+ # engine.runAndWait()
35
+
36
+ result = speech_synthesizer.speak_text_async(system_message['content']).get()
37
+
38
+ # Checks result.
39
+ # if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
40
+ # print("Speech synthesized to speaker for text [{}]".format(text))
41
+ # elif result.reason == speechsdk.ResultReason.Canceled:
42
+ # cancellation_details = result.cancellation_details
43
+ # print("Speech synthesis canceled: {}".format(cancellation_details.reason))
44
+ # if cancellation_details.reason == speechsdk.CancellationReason.Error:
45
+ # if cancellation_details.error_details:
46
+ # print("Error details: {}".format(cancellation_details.error_details))
47
+ # print("Did you update the subscription info?")
48
 
49
  chat_transcript = ""
50
  for message in messages:
 
53
 
54
  return chat_transcript
55
 
56
+ # set a custom theme
57
+ theme = gr.themes.Default().set(
58
+ body_background_fill="#000000",
59
+ )
60
+
61
+ with gr.Blocks(theme=theme) as ui:
62
+ # advisor image input and microphone input
63
+ advisor = gr.Image(value=config.ADVISOR_IMAGE).style(width=config.ADVISOR_IMAGE_WIDTH, height=config.ADVISOR_IMAGE_HEIGHT)
64
+ audio_input = gr.Audio(source="microphone", type="filepath")
65
+
66
+ # text transcript output and audio
67
+ text_output = gr.Textbox(label="Conversation Transcript")
68
+ audio_output = gr.Audio()
69
+
70
+ btn = gr.Button("Run")
71
+ btn.click(fn=transcribe, inputs=audio_input, outputs=[text_output, audio_output])
72
+
73
+ ui.launch(debug=True, share=True)