Yusin commited on
Commit
dda3d27
β€’
1 Parent(s): cd20d75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -60
app.py CHANGED
@@ -1,40 +1,33 @@
 
 
 
1
  import tempfile
2
  import gradio as gr
 
 
3
  from neon_tts_plugin_coqui import CoquiTTS
 
 
 
4
  LANGUAGES = list(CoquiTTS.langs.keys())
5
  LANGUAGES = LANGUAGES + ['cn', 'jp']
6
  default_lang = "en"
7
- #import whisper
8
- #whisper_model = whisper.load_model("small")
9
- #whisper = gr.Interface.load(name="spaces/abidlabs/whisper-large-v2")
10
  whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2")
11
- #cn_a_jp = gr.Blocks.load(name="spaces/Yusin/anime-tts_yusin")
12
- #chatgpt = gr.Blocks.load(name="spaces/fffiloni/whisper-to-chatGPT")
13
- #chatgpt = gr.Blocks.load(name="spaces/seawolf2357/chatgptclone")
14
- import os
15
- import json
16
- import openai
17
- #session_token = os.environ.get('SessionToken')
18
  api_key = os.environ.get('api_key')
19
  #if you have OpenAI API key as a string, enable the below
20
  openai.api_key = api_key
21
 
22
- title = "Speech to ChatGPT to Speech"
23
- #info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)"
24
- #badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui"
25
- coquiTTS = CoquiTTS()
26
-
27
 
28
  # ChatGPT
29
- def chat_hf(audio, custom_token, language):
30
  try:
31
  whisper_text = translate(audio)
32
  if whisper_text == "ERROR: You have to either use the microphone or upload an audio file":
33
  gpt_response = "MISSING AUDIO: Record your voice by clicking the microphone button, do not forget to stop recording before sending your message ;)"
34
  else:
35
- #gpt_response = chatgpt(whisper_text, [], fn_index=0)
36
- #print(gpt_response)
37
- #gpt_response = gpt_response[0]
38
  gpt_response = openai_create(whisper_text)
39
 
40
  except:
@@ -42,35 +35,16 @@ def chat_hf(audio, custom_token, language):
42
  gpt_response = """Sorry, I'm quite busy right now, but please try again later :)"""
43
 
44
  # to voice
45
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
46
- coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language})
47
-
48
- return whisper_text, gpt_response, fp.name
 
 
 
 
 
49
 
50
- # whisper
51
- #def translate(audio):
52
- # print("""
53
- # β€”
54
- # Sending audio to Whisper ...
55
- # β€”
56
- # """)
57
- #
58
- # audio = whisper.load_audio(audio)
59
- # audio = whisper.pad_or_trim(audio)
60
- #
61
- # mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
62
- #
63
- # _, probs = whisper_model.detect_language(mel)
64
- #
65
- # transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False)
66
- #
67
- # transcription = whisper.decode(whisper_model, mel, transcript_options)
68
- #
69
- # print("language spoken: " + transcription.language)
70
- # print("transcript: " + transcription.text)
71
- # print("β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”")
72
- #
73
- # return transcription.text
74
 
75
  def translate(audio):
76
  print("""
@@ -78,14 +52,13 @@ def translate(audio):
78
  Sending audio to Whisper ...
79
  β€”
80
  """)
81
- #_, text_result = whisper(audio, "", fn_index=0)
82
  text_result = whisper(audio, None, "transcribe", fn_index=0)
83
  print(text_result)
84
  return text_result
85
 
86
 
87
  def openai_create(prompt):
88
-
89
  response = openai.Completion.create(
90
  model="text-davinci-003",
91
  prompt=prompt,
@@ -99,11 +72,9 @@ def openai_create(prompt):
99
  print(response.choices[0].text)
100
  return response.choices[0].text
101
 
 
102
  with gr.Blocks() as blocks:
103
- gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
104
- + title
105
- + "</h1>")
106
- #gr.Markdown(description)
107
  radio = gr.Radio(label="Language", choices=LANGUAGES, value=default_lang)
108
  with gr.Row(equal_height=True):# equal_height=False
109
  with gr.Column():# variant="panel"
@@ -115,18 +86,11 @@ with gr.Blocks() as blocks:
115
  text1 = gr.Textbox(label="Speech to Text")
116
  text2 = gr.Textbox(label="ChatGPT Response")
117
  audio = gr.Audio(label="Output", interactive=False)
118
- #gr.Markdown(info)
119
- #gr.Markdown("<center>"
120
- # +f'<img src={badge} alt="visitors badge"/>'
121
- # +"</center>")
122
-
123
  # actions
124
  submit.click(
125
  chat_hf,
126
  [audio_file, custom_token, radio],
127
  [text1, text2, audio],
128
  )
129
- #radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, text2)
130
-
131
 
132
  blocks.launch(debug=True)
 
1
+ import os
2
+ import json
3
+ import openai
4
  import tempfile
5
  import gradio as gr
6
+ import infer
7
+ import config
8
  from neon_tts_plugin_coqui import CoquiTTS
9
+ title = "Speech to ChatGPT to Speech"
10
+ coquiTTS = CoquiTTS()
11
+
12
  LANGUAGES = list(CoquiTTS.langs.keys())
13
  LANGUAGES = LANGUAGES + ['cn', 'jp']
14
  default_lang = "en"
 
 
 
15
  whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2")
 
 
 
 
 
 
 
16
  api_key = os.environ.get('api_key')
17
  #if you have OpenAI API key as a string, enable the below
18
  openai.api_key = api_key
19
 
20
+ pth_path = config.pth_path
21
+ config_json = config.config_json
22
+ net_g_ms, hps = infer.load_model(config_json, pth_path)
 
 
23
 
24
  # ChatGPT
25
+ def chat_hf(audio, language):
26
  try:
27
  whisper_text = translate(audio)
28
  if whisper_text == "ERROR: You have to either use the microphone or upload an audio file":
29
  gpt_response = "MISSING AUDIO: Record your voice by clicking the microphone button, do not forget to stop recording before sending your message ;)"
30
  else:
 
 
 
31
  gpt_response = openai_create(whisper_text)
32
 
33
  except:
 
35
  gpt_response = """Sorry, I'm quite busy right now, but please try again later :)"""
36
 
37
  # to voice
38
+ if language == 'cn' or 'jp':
39
+ text = infer.clean_text(gpt_response)
40
+ audio = infer.infer(text, net_g_ms, 2, "demo")
41
+ voice_out = (hps.data.sampling_rate, audio)
42
+ else:
43
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
44
+ coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language})
45
+ voice_out = fp.name
46
+ return whisper_text, gpt_response, voice_out
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  def translate(audio):
50
  print("""
 
52
  Sending audio to Whisper ...
53
  β€”
54
  """)
55
+
56
  text_result = whisper(audio, None, "transcribe", fn_index=0)
57
  print(text_result)
58
  return text_result
59
 
60
 
61
  def openai_create(prompt):
 
62
  response = openai.Completion.create(
63
  model="text-davinci-003",
64
  prompt=prompt,
 
72
  print(response.choices[0].text)
73
  return response.choices[0].text
74
 
75
+
76
  with gr.Blocks() as blocks:
77
+ gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>" + title + "</h1>")
 
 
 
78
  radio = gr.Radio(label="Language", choices=LANGUAGES, value=default_lang)
79
  with gr.Row(equal_height=True):# equal_height=False
80
  with gr.Column():# variant="panel"
 
86
  text1 = gr.Textbox(label="Speech to Text")
87
  text2 = gr.Textbox(label="ChatGPT Response")
88
  audio = gr.Audio(label="Output", interactive=False)
 
 
 
 
 
89
  # actions
90
  submit.click(
91
  chat_hf,
92
  [audio_file, custom_token, radio],
93
  [text1, text2, audio],
94
  )
 
 
95
 
96
  blocks.launch(debug=True)