sanchit-gandhi HF staff commited on
Commit
dab2036
1 Parent(s): 84a239d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -40
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import torch
 
2
 
3
  import gradio as gr
4
  from transformers import pipeline
@@ -12,8 +13,6 @@ import json
12
  import soundfile as sf
13
 
14
 
15
- session_token = os.environ.get("SessionToken")
16
-
17
  device = 0 if torch.cuda.is_available() else "cpu"
18
 
19
  # Intialise STT (Whisper)
@@ -22,17 +21,27 @@ pipe = pipeline(
22
  model="openai/whisper-base.en",
23
  chunk_length_s=30,
24
  device=device,
25
- )
 
 
 
 
26
 
27
  # Intialise TTS (tacotron2) and Vocoder (HiFIGAN)
28
- tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts", overrides={"max_decoder_steps": 2000}, run_opts={"device":device})
 
 
 
 
 
29
  hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
30
 
 
31
  def get_response_from_chatbot(text, reset_conversation):
32
  try:
33
  if reset_conversation:
34
  api.refresh_auth()
35
- api.reset_conversation()
36
  resp = api.send_message(text)
37
  response = resp["message"]
38
  except:
@@ -43,7 +52,7 @@ def get_response_from_chatbot(text, reset_conversation):
43
  def chat(input_audio, chat_history, reset_conversation):
44
  # speech -> text (Whisper)
45
  message = pipe(input_audio)["text"]
46
-
47
  # text -> response (chatGPT)
48
  response = get_response_from_chatbot(message, reset_conversation)
49
 
@@ -63,7 +72,7 @@ def chat(input_audio, chat_history, reset_conversation):
63
  return out_chat, chat_history, "out.wav"
64
 
65
 
66
- start_work= """async() => {
67
  function isMobile() {
68
  try {
69
  document.createEvent("TouchEvent"); return true;
@@ -81,12 +90,12 @@ start_work= """async() => {
81
  }
82
  return clientHeight;
83
  }
84
-
85
  function setNativeValue(element, value) {
86
  const valueSetter = Object.getOwnPropertyDescriptor(element.__proto__, 'value').set;
87
  const prototype = Object.getPrototypeOf(element);
88
  const prototypeValueSetter = Object.getOwnPropertyDescriptor(prototype, 'value').set;
89
-
90
  if (valueSetter && valueSetter !== prototypeValueSetter) {
91
  prototypeValueSetter.call(element, value);
92
  } else {
@@ -97,13 +106,13 @@ start_work= """async() => {
97
  if (!gradioEl) {
98
  gradioEl = document.querySelector('body > gradio-app');
99
  }
100
-
101
  if (typeof window['gradioEl'] === 'undefined') {
102
  window['gradioEl'] = gradioEl;
103
-
104
  const page1 = window['gradioEl'].querySelectorAll('#page_1')[0];
105
  const page2 = window['gradioEl'].querySelectorAll('#page_2')[0];
106
-
107
  page1.style.display = "none";
108
  page2.style.display = "block";
109
  window['div_count'] = 0;
@@ -112,7 +121,7 @@ start_work= """async() => {
112
  chat_row = window['gradioEl'].querySelectorAll('#chat_row')[0];
113
  prompt_row = window['gradioEl'].querySelectorAll('#prompt_row')[0];
114
  window['chat_bot1'].children[1].textContent = '';
115
-
116
  clientHeight = getClientHeight();
117
  new_height = (clientHeight-300) + 'px';
118
  chat_row.style.height = new_height;
@@ -122,7 +131,7 @@ start_work= """async() => {
122
  window['chat_bot1'].children[2].style.height = new_height;
123
  prompt_row.children[0].style.flex = 'auto';
124
  prompt_row.children[0].style.width = '100%';
125
-
126
  window['checkChange'] = function checkChange() {
127
  try {
128
  if (window['chat_bot'].children[2].children[0].children.length > window['div_count']) {
@@ -138,54 +147,66 @@ start_work= """async() => {
138
  } else {
139
  window['chat_bot1'].children[1].textContent = '';
140
  }
141
-
142
  } catch(e) {
143
  }
144
  }
145
  window['checkChange_interval'] = window.setInterval("window.checkChange()", 500);
146
  }
147
-
148
  return false;
149
  }"""
150
 
151
-
152
  with gr.Blocks(title="Talk to chatGPT") as demo:
153
  gr.Markdown("## Talk to chatGPT ##")
154
- gr.HTML("<p> Demo uses <a href='https://huggingface.co/openai/whisper-base.en'>Whisper</a> to convert the input speech to transcribed text, <a href='https://chat.openai.com/chat'>chatGPT</a> to generate responses, and <a href='https://huggingface.co/speechbrain/tts-tacotron2-ljspeech'>tacotron2</a> to convert the response to output speech. </p>")
155
- gr.HTML("<p>You can duplicate this space and use your own session token: <a style='display:inline-block' href='https://huggingface.co/spaces/yizhangliu/chatGPT?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=10' alt='Duplicate Space'></a></p>")
156
- gr.HTML("<p> Instruction on how to get session token can be seen in video <a style='display:inline-block' href='https://www.youtube.com/watch?v=TdNSj_qgdFk'><font style='color:blue;weight:bold;'>here</font></a>. Add your session token by going to settings and add under secrets. </p>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  with gr.Group(elem_id="page_1", visible=True) as page_1:
158
- with gr.Box():
159
  with gr.Row():
160
- start_button = gr.Button("Let's talk to chatGPT! 🗣", elem_id="start-btn", visible=True)
161
  start_button.click(fn=None, inputs=[], outputs=[], _js=start_work)
162
-
163
- with gr.Group(elem_id="page_2", visible=False) as page_2:
164
  with gr.Row(elem_id="chat_row"):
165
  chatbot = gr.Chatbot(elem_id="chat_bot", visible=False).style(color_map=("green", "blue"))
166
- chatbot1 = gr.Chatbot(elem_id="chat_bot1").style(color_map=("green", "blue"))
167
  with gr.Row():
168
  prompt_input_audio = gr.Audio(
169
- source="microphone",
170
- type="filepath",
171
- label="Record Audio Input",
172
-
173
- )
174
  prompt_output_audio = gr.Audio()
175
 
176
  reset_conversation = gr.Checkbox(label="Reset conversation?", value=False)
177
  with gr.Row(elem_id="prompt_row"):
178
  chat_history = gr.Textbox(lines=4, label="prompt", visible=False)
179
  submit_btn = gr.Button(value="Send to chatGPT", elem_id="submit-btn").style(
180
- margin=True,
181
- rounded=(True, True, True, True),
182
- width=100,
183
- )
184
-
185
-
186
- submit_btn.click(fn=chat,
187
- inputs=[prompt_input_audio, chat_history, reset_conversation],
188
- outputs=[chatbot, chat_history, prompt_output_audio],
189
- )
190
 
191
  demo.launch(debug=True)
 
1
  import torch
2
+ import os
3
 
4
  import gradio as gr
5
  from transformers import pipeline
 
13
  import soundfile as sf
14
 
15
 
 
 
16
  device = 0 if torch.cuda.is_available() else "cpu"
17
 
18
  # Intialise STT (Whisper)
 
21
  model="openai/whisper-base.en",
22
  chunk_length_s=30,
23
  device=device,
24
+ )
25
+
26
+ # Initialise ChatGPT session
27
+ session_token = os.environ.get("SessionToken")
28
+ api = ChatGPT(session_token=session_token)
29
 
30
  # Intialise TTS (tacotron2) and Vocoder (HiFIGAN)
31
+ tacotron2 = Tacotron2.from_hparams(
32
+ source="speechbrain/tts-tacotron2-ljspeech",
33
+ savedir="tmpdir_tts",
34
+ overrides={"max_decoder_steps": 2000},
35
+ run_opts={"device": device},
36
+ )
37
  hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
38
 
39
+
40
  def get_response_from_chatbot(text, reset_conversation):
41
  try:
42
  if reset_conversation:
43
  api.refresh_auth()
44
+ api.reset_conversation()
45
  resp = api.send_message(text)
46
  response = resp["message"]
47
  except:
 
52
  def chat(input_audio, chat_history, reset_conversation):
53
  # speech -> text (Whisper)
54
  message = pipe(input_audio)["text"]
55
+
56
  # text -> response (chatGPT)
57
  response = get_response_from_chatbot(message, reset_conversation)
58
 
 
72
  return out_chat, chat_history, "out.wav"
73
 
74
 
75
+ start_work = """async() => {
76
  function isMobile() {
77
  try {
78
  document.createEvent("TouchEvent"); return true;
 
90
  }
91
  return clientHeight;
92
  }
93
+
94
  function setNativeValue(element, value) {
95
  const valueSetter = Object.getOwnPropertyDescriptor(element.__proto__, 'value').set;
96
  const prototype = Object.getPrototypeOf(element);
97
  const prototypeValueSetter = Object.getOwnPropertyDescriptor(prototype, 'value').set;
98
+
99
  if (valueSetter && valueSetter !== prototypeValueSetter) {
100
  prototypeValueSetter.call(element, value);
101
  } else {
 
106
  if (!gradioEl) {
107
  gradioEl = document.querySelector('body > gradio-app');
108
  }
109
+
110
  if (typeof window['gradioEl'] === 'undefined') {
111
  window['gradioEl'] = gradioEl;
112
+
113
  const page1 = window['gradioEl'].querySelectorAll('#page_1')[0];
114
  const page2 = window['gradioEl'].querySelectorAll('#page_2')[0];
115
+
116
  page1.style.display = "none";
117
  page2.style.display = "block";
118
  window['div_count'] = 0;
 
121
  chat_row = window['gradioEl'].querySelectorAll('#chat_row')[0];
122
  prompt_row = window['gradioEl'].querySelectorAll('#prompt_row')[0];
123
  window['chat_bot1'].children[1].textContent = '';
124
+
125
  clientHeight = getClientHeight();
126
  new_height = (clientHeight-300) + 'px';
127
  chat_row.style.height = new_height;
 
131
  window['chat_bot1'].children[2].style.height = new_height;
132
  prompt_row.children[0].style.flex = 'auto';
133
  prompt_row.children[0].style.width = '100%';
134
+
135
  window['checkChange'] = function checkChange() {
136
  try {
137
  if (window['chat_bot'].children[2].children[0].children.length > window['div_count']) {
 
147
  } else {
148
  window['chat_bot1'].children[1].textContent = '';
149
  }
150
+
151
  } catch(e) {
152
  }
153
  }
154
  window['checkChange_interval'] = window.setInterval("window.checkChange()", 500);
155
  }
156
+
157
  return false;
158
  }"""
159
 
 
160
  with gr.Blocks(title="Talk to chatGPT") as demo:
161
  gr.Markdown("## Talk to chatGPT ##")
162
+ gr.HTML(
163
+ "<p> Demo uses <a href='https://huggingface.co/openai/whisper-base.en'>Whisper</a> to convert the input speech"
164
+ " to transcribed text, <a href='https://chat.openai.com/chat'>chatGPT</a> to generate responses, and <a"
165
+ " href='https://huggingface.co/speechbrain/tts-tacotron2-ljspeech'>tacotron2</a> to convert the response to"
166
+ " output speech. </p>"
167
+ )
168
+ gr.HTML(
169
+ "<p>You can duplicate this space and use your own session token: <a style='display:inline-block'"
170
+ " href='https://huggingface.co/spaces/yizhangliu/chatGPT?duplicate=true'><img"
171
+ " src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=10'"
172
+ " alt='Duplicate Space'></a></p>"
173
+ )
174
+ gr.HTML(
175
+ "<p> Instruction on how to get session token can be seen in video <a style='display:inline-block'"
176
+ " href='https://www.youtube.com/watch?v=TdNSj_qgdFk'><font style='color:blue;weight:bold;'>here</font></a>."
177
+ " Add your session token by going to settings and add under secrets. </p>"
178
+ )
179
  with gr.Group(elem_id="page_1", visible=True) as page_1:
180
+ with gr.Box():
181
  with gr.Row():
182
+ start_button = gr.Button("Let's talk to chatGPT! 🗣", elem_id="start-btn", visible=True)
183
  start_button.click(fn=None, inputs=[], outputs=[], _js=start_work)
184
+
185
+ with gr.Group(elem_id="page_2", visible=False) as page_2:
186
  with gr.Row(elem_id="chat_row"):
187
  chatbot = gr.Chatbot(elem_id="chat_bot", visible=False).style(color_map=("green", "blue"))
188
+ chatbot1 = gr.Chatbot(elem_id="chat_bot1").style(color_map=("green", "blue"))
189
  with gr.Row():
190
  prompt_input_audio = gr.Audio(
191
+ source="microphone",
192
+ type="filepath",
193
+ label="Record Audio Input",
194
+ )
 
195
  prompt_output_audio = gr.Audio()
196
 
197
  reset_conversation = gr.Checkbox(label="Reset conversation?", value=False)
198
  with gr.Row(elem_id="prompt_row"):
199
  chat_history = gr.Textbox(lines=4, label="prompt", visible=False)
200
  submit_btn = gr.Button(value="Send to chatGPT", elem_id="submit-btn").style(
201
+ margin=True,
202
+ rounded=(True, True, True, True),
203
+ width=100,
204
+ )
205
+
206
+ submit_btn.click(
207
+ fn=chat,
208
+ inputs=[prompt_input_audio, chat_history, reset_conversation],
209
+ outputs=[chatbot, chat_history, prompt_output_audio],
210
+ )
211
 
212
  demo.launch(debug=True)