Ken Lin commited on
Commit
02d48a9
1 Parent(s): 6d9ad56

Image Caption Model

Browse files
Files changed (2) hide show
  1. app.py +22 -6
  2. ram_swin_large_14m.pth +3 -0
app.py CHANGED
@@ -1,17 +1,33 @@
1
  import gradio as gr
 
 
2
 
3
  title = "Musicalization System of Painting Demo"
4
  description = "Pui Ching Middle School: Musicalization System of Painting Demo"
5
 
6
- def greet(name):
7
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  iface = gr.Interface(
10
- fn=greet,
11
  title=title,
12
  description=description,
13
- #fn=greet,
14
- inputs=gr.Image(),
15
- outputs=gr.Audio())
16
 
17
  iface.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoProcessor, MusicgenForConditionalGeneration
3
+ import numpy as np
4
 
5
  title = "Musicalization System of Painting Demo"
6
  description = "Pui Ching Middle School: Musicalization System of Painting Demo"
7
 
8
+ def generate_music(text):
9
+ processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
10
+ model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
11
+
12
+ inputs = processor(
13
+ text=[text],
14
+ padding=True,
15
+ return_tensors="pt",
16
+ )
17
+
18
+ audio_values = model.generate(**inputs, max_new_tokens=256)
19
+ sampling_rate = model.audio_encoder.config.sampling_rate
20
+ target_dtype = np.int16
21
+ max_range = np.iinfo(target_dtype).max
22
+ audio_values = audio_values[0, 0].numpy()
23
+ return sampling_rate, (audio_values * max_range).astype(np.int16)
24
+
25
 
26
  iface = gr.Interface(
27
+ fn=generate_music,
28
  title=title,
29
  description=description,
30
+ inputs=gr.Text(label="Content"),
31
+ outputs=gr.Audio(label='Generated Music'))
 
32
 
33
  iface.launch()
ram_swin_large_14m.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15c729c793af28b9d107c69f85836a1356d76ea830d4714699fb62e55fcc08ed
3
+ size 5625634877