muhtasham commited on
Commit
5c5bdc4
1 Parent(s): dba0bb7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -30,7 +30,7 @@ def text2image_latent(text, steps, width, height, images, diversity):
30
  return(image_paths)
31
 
32
 
33
- def speech_to_text(mic=None, file=None):
34
  if mic is not None:
35
  audio = mic
36
  elif file is not None:
@@ -38,7 +38,8 @@ def speech_to_text(mic=None, file=None):
38
  else:
39
  return "You must either provide a mic recording or a file"
40
  transcription = asr(audio)["text"]
41
- return transcription
 
42
 
43
  def zero_shot(image, text_input):
44
  PIL_image = Image.fromarray(np.uint8(image)).convert('RGB')
@@ -66,7 +67,7 @@ with gr.Blocks() as demo:
66
  with gr.Row():
67
  with gr.Column():
68
  audio_file =[
69
- gr.Audio(source="microphone", type="filepath", optional=True),
70
  gr.Audio(source="upload", type="filepath", optional=True)]
71
  text = gr.Textbox(label="Text", placeholder="If you dont want to record or upload your voice you can input text here")
72
  with gr.Row():
@@ -88,8 +89,8 @@ with gr.Blocks() as demo:
88
  zero_shot_clf = gr.Button("Classify Image go brr")
89
 
90
 
91
- speech_to_text.click(speech_to_text, inputs=audio_file, outputs=text)
92
  get_image_latent.click(text2image_latent, inputs=[text, steps, width, height, images, diversity], outputs=gallery)
93
  zero_shot_clf.click(zero_shot, inputs=[gallery, text_input], outputs=label)
94
 
95
- demo.launch()
 
30
  return(image_paths)
31
 
32
 
33
+ def speech_to_text(mic=None, file=None, state=""):
34
  if mic is not None:
35
  audio = mic
36
  elif file is not None:
 
38
  else:
39
  return "You must either provide a mic recording or a file"
40
  transcription = asr(audio)["text"]
41
+ state += text + " "
42
+ return state, state
43
 
44
  def zero_shot(image, text_input):
45
  PIL_image = Image.fromarray(np.uint8(image)).convert('RGB')
 
67
  with gr.Row():
68
  with gr.Column():
69
  audio_file =[
70
+ gr.Audio(source="microphone", type="filepath", optional=True), "state",
71
  gr.Audio(source="upload", type="filepath", optional=True)]
72
  text = gr.Textbox(label="Text", placeholder="If you dont want to record or upload your voice you can input text here")
73
  with gr.Row():
 
89
  zero_shot_clf = gr.Button("Classify Image go brr")
90
 
91
 
92
+ speech_to_text.click(speech_to_text, inputs=audio_file, outputs=[text,"state"])
93
  get_image_latent.click(text2image_latent, inputs=[text, steps, width, height, images, diversity], outputs=gallery)
94
  zero_shot_clf.click(zero_shot, inputs=[gallery, text_input], outputs=label)
95
 
96
+ demo.launch(live=True)