ybelkada commited on
Commit
9997754
1 Parent(s): ab99fe8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -5,13 +5,23 @@ import torch
5
  from spectro import wav_bytes_from_spectrogram_image
6
  from diffusers import StableDiffusionPipeline
7
 
 
 
8
  from share_btn import community_icon_html, loading_icon_html, share_js
9
 
10
  model_id = "riffusion/riffusion-model-v1"
 
11
  pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
12
- pipe = pipe.to("cuda")
 
 
 
13
 
14
- def predict(prompt):
 
 
 
 
15
  spec = pipe(prompt).images[0]
16
  print(spec)
17
  wav = wav_bytes_from_spectrogram_image(spec)
@@ -131,7 +141,8 @@ with gr.Blocks(css=css) as demo:
131
 
132
  gr.HTML(title)
133
 
134
- prompt_input = gr.Textbox(placeholder="a cat diva singing in a New York jazz club", label="Musical prompt", elem_id="prompt-in")
 
135
  send_btn = gr.Button(value="Get a new spectrogram ! ", elem_id="submit-btn")
136
 
137
  with gr.Column(elem_id="col-container-2"):
 
5
  from spectro import wav_bytes_from_spectrogram_image
6
  from diffusers import StableDiffusionPipeline
7
 
8
+ from transformers import BlipForConditionalGeneration, BlipProcessor
9
+
10
  from share_btn import community_icon_html, loading_icon_html, share_js
11
 
12
  model_id = "riffusion/riffusion-model-v1"
13
+ blip_model_id = "Salesforce/blip-image-captioning-base"
14
  pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
15
+ # pipe = pipe.to("cuda")
16
+
17
+ blip_model = BlipForConditionalGeneration.from_pretrained(blip_model_id)
18
+ processor = BlipProcessor.from_pretrained(blip_model_id)
19
 
20
+ def predict(image):
21
+ inputs = processor(image)
22
+ output_blip = blip_model.generate(**inputs)
23
+ prompt = processor.decode(output_blip[0], skip_special_tokens=True)
24
+
25
  spec = pipe(prompt).images[0]
26
  print(spec)
27
  wav = wav_bytes_from_spectrogram_image(spec)
 
141
 
142
  gr.HTML(title)
143
 
144
+ # prompt_input = gr.Textbox(placeholder="a cat diva singing in a New York jazz club", label="Musical prompt", elem_id="prompt-in")
145
+ image_input = gr.Image()
146
  send_btn = gr.Button(value="Get a new spectrogram ! ", elem_id="submit-btn")
147
 
148
  with gr.Column(elem_id="col-container-2"):