fffiloni commited on
Commit
99cb9ec
1 Parent(s): 9f29da8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -6
app.py CHANGED
@@ -20,7 +20,7 @@ def get_transcribe(audio):
20
 
21
  mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
22
 
23
- #_, probs = whisper_model.detect_language(mel)
24
 
25
  options = whisper.DecodingOptions(task="translate", fp16 = False)
26
  result = whisper.decode(whisper_model, mel, options)
@@ -41,10 +41,19 @@ def get_images(audio):
41
  else:
42
  images.append(image)
43
 
44
- return images
45
  #inputs
46
- audio = gr.Audio(label="Input Audio", show_label=False, source="microphone", type="filepath")
47
  #outputs
48
- gallery = gr.Gallery(label="Generated images", show_label=False, elem_id="gallery").style(grid=[2], height="auto")
49
-
50
- gr.Interface(fn=get_images, inputs=audio, outputs=gallery).queue(max_size=10).launch(enable_queue=True)
 
 
 
 
 
 
 
 
 
 
20
 
21
  mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
22
 
23
+ _, probs = whisper_model.detect_language(mel)
24
 
25
  options = whisper.DecodingOptions(task="translate", fp16 = False)
26
  result = whisper.decode(whisper_model, mel, options)
 
41
  else:
42
  images.append(image)
43
 
44
+ return prompt, images
45
  #inputs
46
+ audio = gr.Audio(label="Input Audio of an image description", show_label=True, source="microphone", type="filepath")
47
  #outputs
48
+ translated_prompt = gr.Textbox(label="Translated audio", lines=6)
49
+ gallery = gr.Gallery(label="Generated images", show_label=False, elem_id="gallery").style(grid=[1], height="auto")
50
+ title="Whisper to Stable Diffusion"
51
+ description="""
52
+ <p style='text-align: center;'>
53
+ This demo is running on CPU. Build by Sylvain <a href='https://twitter.com/fffiloni' target='_blank'>@fffiloni</a> • <img id='visitor-badge' alt='visitor badge' src='https://visitor-badge.glitch.me/badge?page_id=gradio-blocks.sd-img-variations' style='display: inline-block' />
54
+ Record an audio description of an image, stop recording, then hit the Submit button to get 2 images from Stable Diffusion.
55
+ Your audio will be translated to English, then sent as a prompt to stable diffusion.
56
+ Try it in French ! ;)
57
+ </p>
58
+ """
59
+ gr.Interface(fn=get_images, inputs=audio, outputs=[translated_prompt, gallery]).queue(max_size=1000).launch(enable_queue=True)