dlflannery commited on
Commit
f30621f
·
verified ·
1 Parent(s): 6519c62

Update app.py

Browse files

chunked audio output

Files changed (1) hide show
  1. app.py +47 -12
app.py CHANGED
@@ -28,6 +28,9 @@ speak_file = dataDir + "speek.wav"
28
 
29
  client = OpenAI(api_key = key)
30
 
 
 
 
31
  def genUsageStats(do_reset=False):
32
  result = []
33
  ttotal4o_in = 0
@@ -176,13 +179,14 @@ def transcribe(user, pwd, fpath):
176
  def pause_message():
177
  return "Audio input is paused. Resume or Stop as desired"
178
 
179
- def gen_output_audio(txt):
180
- if len(txt) < 10:
181
- txt = "This dialog is too short to mess with!"
182
- response = client.audio.speech.create(model="tts-1", voice="fable", input=txt)
183
- with open(speak_file, 'wb') as fp:
184
- fp.write(response.content)
185
- return speak_file
 
186
 
187
  def set_speak_button(txt):
188
  vis = False
@@ -190,11 +194,41 @@ def set_speak_button(txt):
190
  vis = True
191
  return gr.Button(visible=vis)
192
 
193
- def delete_speak_file():
 
194
  if os.path.exists(speak_file):
195
  os.remove(speak_file)
 
 
 
196
 
197
  with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  history = gr.State([])
199
  password = gr.State("")
200
  model = gr.State("gpt-4o-mini")
@@ -215,11 +249,11 @@ with gr.Blocks() as demo:
215
  clear_button = gr.Button(value="Restart Conversation")
216
  # gpt_chooser=gr.Radio(choices=[("GPT-3.5","gpt-3.5-turbo"),("GPT-4o","gpt-4o-mini")],
217
  # value="gpt-3.5-turbo", label="GPT Model", interactive=True)
218
- submit_window = gr.Button(value="Submit Prompt/Question")
219
  speak_output = gr.Button(value="Speak Dialog", visible=False)
220
  prompt_window = gr.Textbox(label = "Prompt or Question")
221
  output_window = gr.Textbox(label = "Dialog")
222
- submit_window.click(chat, inputs=[prompt_window, user_window, password, history, output_window, model],
223
  outputs=[history, output_window, prompt_window, model])
224
  clear_button.click(clear, inputs=[], outputs=[prompt_window, history, output_window])
225
  audio_widget.stop_recording(fn=transcribe, inputs=[user_window, password, audio_widget],
@@ -227,7 +261,8 @@ with gr.Blocks() as demo:
227
  audio_widget.pause_recording(fn=pause_message, outputs=[prompt_window])
228
  reset_button.add(audio_widget)
229
  audio_out = gr.Audio(autoplay=True, visible=False)
230
- speak_output.click(gen_output_audio, output_window, audio_out)
 
231
  output_window.change(fn=set_speak_button, inputs=output_window,outputs=speak_output)
232
- demo.unload(delete_speak_file)
233
  demo.launch(share=True)
 
28
 
29
  client = OpenAI(api_key = key)
30
 
31
+ words_out = []
32
+ index = 0
33
+
34
  def genUsageStats(do_reset=False):
35
  result = []
36
  ttotal4o_in = 0
 
179
  def pause_message():
180
  return "Audio input is paused. Resume or Stop as desired"
181
 
182
+ # def gen_output_audio(txt):
183
+ # if len(txt) < 10:
184
+ # txt = "This dialog is too short to mess with!"
185
+ # response = client.audio.speech.create(model="tts-1", voice="fable", input=txt)
186
+ # with open(speak_file, 'wb') as fp:
187
+ # fp.write(response.content)
188
+ # return speak_file
189
+
190
 
191
  def set_speak_button(txt):
192
  vis = False
 
194
  vis = True
195
  return gr.Button(visible=vis)
196
 
197
+ def clean_up():
198
+ global words_out, index
199
  if os.path.exists(speak_file):
200
  os.remove(speak_file)
201
+ words_out=[]
202
+ index=0
203
+
204
 
205
  with gr.Blocks() as demo:
206
+ def initial_audio_output(txt):
207
+ global words_out, index
208
+ index = 0
209
+ words_out = txt.strip(' .').split('.')
210
+ chunk = words_out[0]
211
+ if chunk.strip() == '':
212
+ return gr.Audio(sources=None)
213
+ response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.8)
214
+ index += 1
215
+ with open(speak_file, 'wb') as fp:
216
+ fp.write(response.content)
217
+ return speak_file
218
+
219
+ def gen_output_audio():
220
+ global words_out, index
221
+ if index >= len(words_out):
222
+ return gr.Audio(sources=None)
223
+ chunk = words_out[index]
224
+ if chunk.strip() == '':
225
+ return gr.Audio(sources=None)
226
+ response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.8)
227
+ index += 1
228
+ with open(speak_file, 'wb') as fp:
229
+ fp.write(response.content)
230
+ return speak_file
231
+
232
  history = gr.State([])
233
  password = gr.State("")
234
  model = gr.State("gpt-4o-mini")
 
249
  clear_button = gr.Button(value="Restart Conversation")
250
  # gpt_chooser=gr.Radio(choices=[("GPT-3.5","gpt-3.5-turbo"),("GPT-4o","gpt-4o-mini")],
251
  # value="gpt-3.5-turbo", label="GPT Model", interactive=True)
252
+ submit_button = gr.Button(value="Submit Prompt/Question")
253
  speak_output = gr.Button(value="Speak Dialog", visible=False)
254
  prompt_window = gr.Textbox(label = "Prompt or Question")
255
  output_window = gr.Textbox(label = "Dialog")
256
+ submit_button.click(chat, inputs=[prompt_window, user_window, password, history, output_window, model],
257
  outputs=[history, output_window, prompt_window, model])
258
  clear_button.click(clear, inputs=[], outputs=[prompt_window, history, output_window])
259
  audio_widget.stop_recording(fn=transcribe, inputs=[user_window, password, audio_widget],
 
261
  audio_widget.pause_recording(fn=pause_message, outputs=[prompt_window])
262
  reset_button.add(audio_widget)
263
  audio_out = gr.Audio(autoplay=True, visible=False)
264
+ audio_out.stop(fn=gen_output_audio, inputs=None, outputs = audio_out)
265
+ speak_output.click(fn=initial_audio_output, inputs=output_window, outputs=audio_out)
266
  output_window.change(fn=set_speak_button, inputs=output_window,outputs=speak_output)
267
+ demo.unload(clean_up)
268
  demo.launch(share=True)