fffiloni commited on
Commit
6e00cc0
1 Parent(s): a3d88bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  import whisper
3
  from PIL import Image
4
 
@@ -9,7 +10,7 @@ from diffusers import StableDiffusionPipeline
9
 
10
  whisper_model = whisper.load_model("small")
11
 
12
- device="cpu"
13
 
14
  pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=MY_SECRET_TOKEN)
15
  pipe.to(device)
@@ -50,7 +51,7 @@ gallery = gr.Gallery(label="Generated images", show_label=False, elem_id="galler
50
  title="Whisper to Stable Diffusion"
51
  description="""
52
  <p style='text-align: center;'>
53
- This demo is running on CPU. Offered by Sylvain <a href='https://twitter.com/fffiloni' target='_blank'>@fffiloni</a> • <img id='visitor-badge' alt='visitor badge' src='https://visitor-badge.glitch.me/badge?page_id=gradio-blocks.whisper-to-stable-diffusion' style='display: inline-block' /><br />
54
  Record an audio description of an image, stop recording, then hit the Submit button to get 2 images from Stable Diffusion.<br />
55
  Your audio will be translated to English through OpenAI's Whisper, then sent as a prompt to Stable Diffusion.
56
  Try it in French ! ;)<br />
@@ -61,7 +62,7 @@ Try it in French ! ;)<br />
61
  article="""
62
  <p style='text-align: center;'>—<br />
63
  Whisper is a general-purpose speech recognition model. <br />
64
- It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification.<br />
65
  Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
66
  </p>
67
  """
 
1
  import gradio as gr
2
+ import torch
3
  import whisper
4
  from PIL import Image
5
 
 
10
 
11
  whisper_model = whisper.load_model("small")
12
 
13
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14
 
15
  pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=MY_SECRET_TOKEN)
16
  pipe.to(device)
 
51
  title="Whisper to Stable Diffusion"
52
  description="""
53
  <p style='text-align: center;'>
54
+ This demo is running on CPU 🐢. Offered by Sylvain <a href='https://twitter.com/fffiloni' target='_blank'>@fffiloni</a> • <img id='visitor-badge' alt='visitor badge' src='https://visitor-badge.glitch.me/badge?page_id=gradio-blocks.whisper-to-stable-diffusion' style='display: inline-block' /><br />
55
  Record an audio description of an image, stop recording, then hit the Submit button to get 2 images from Stable Diffusion.<br />
56
  Your audio will be translated to English through OpenAI's Whisper, then sent as a prompt to Stable Diffusion.
57
  Try it in French ! ;)<br />
 
62
  article="""
63
  <p style='text-align: center;'>—<br />
64
  Whisper is a general-purpose speech recognition model. <br />
65
+ It is trained on a large dataset of diverse audio and is also a multi-task model that can perform<br />multilingual speech recognition as well as speech translation and language identification.<br />
66
  Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
67
  </p>
68
  """